correct tests with new arg names

This commit is contained in:
2018-09-19 08:37:55 +01:00
parent 679b1b7b53
commit f5f6afd1a4

View File

@@ -2,37 +2,18 @@
import unittest import unittest
from unittest import mock from unittest import mock
from utils.helpers import AsyncCrawler, RobotsTxt, standardise_url from utils.helpers import RobotsTxt, standardise_url
class TestAsyncCrawler(unittest.TestCase):
base_url = 'http://eu.httpbin.org'
concurrency = 10
testcrawler = AsyncCrawler(baseurl=base_url, concurrency=concurrency)
expected_urls = ['http://eu.httpbin.org/b/', 'http://eu.httpbin.org/c/']
crawled = set()
crawled.add('https://eu.httpbin.org/a/')
@mock.patch('utils.helpers.AsyncCrawler.validate_url', response=True)
def test_find_all_urls(self, validate_url):
with open('test/files/find_all_urls.html', 'r') as f:
source = f.read()
urls = self.testcrawler.find_all_urls(source=source)
self.assertEqual(urls, self.expected_urls)
class TestRobots(unittest.TestCase): class TestRobots(unittest.TestCase):
base_url = 'http://eu.httpbin.org' rooturl = 'http://eu.httpbin.org'
no_robots = 'https://www.simonweald.com' no_robots = 'https://www.simonweald.com'
test_paths = (('/', True), ('/deny', False)) test_paths = (('/', True), ('/deny', False))
robots = RobotsTxt(base_url=base_url) robots = RobotsTxt(rooturl=rooturl)
norobots = RobotsTxt(base_url=no_robots) norobots = RobotsTxt(rooturl=no_robots)
def test_robots_txt_deny(self): def test_robots_txt_deny(self):
''' '''
@@ -52,9 +33,9 @@ class TestRobots(unittest.TestCase):
class TestUrls(unittest.TestCase): class TestUrls(unittest.TestCase):
base_url = 'http://eu.httpbin.org' rooturl = 'http://eu.httpbin.org'
base_url_list = (('eu.httpbin.org', 'http://eu.httpbin.org'), rooturl_list = (('eu.httpbin.org', 'http://eu.httpbin.org'),
('http://eu.httpbin.org/', 'http://eu.httpbin.org'), ('http://eu.httpbin.org/', 'http://eu.httpbin.org'),
('https://eu.httpbin.org', 'https://eu.httpbin.org')) ('https://eu.httpbin.org', 'https://eu.httpbin.org'))
@@ -64,12 +45,12 @@ class TestUrls(unittest.TestCase):
('http://eu.httpbin.org/index.html?foo=bar', 'http://eu.httpbin.org/index.html'), ('http://eu.httpbin.org/index.html?foo=bar', 'http://eu.httpbin.org/index.html'),
('http://eu.httpbin.org/index.html#anchor', 'http://eu.httpbin.org/index.html')) ('http://eu.httpbin.org/index.html#anchor', 'http://eu.httpbin.org/index.html'))
def test_standardise_base_url(self): def test_standardise_rooturl(self):
''' '''
Tests whether a base URL can be standardised to the format Tests whether a base URL can be standardised to the format
proto://[sub].domain.tld. proto://[sub].domain.tld.
''' '''
for url, target in self.base_url_list: for url, target in self.rooturl_list:
result = standardise_url(url) result = standardise_url(url)
self.assertEqual(result, target) self.assertEqual(result, target)
@@ -78,7 +59,7 @@ class TestUrls(unittest.TestCase):
Ensure that fragments/anchors etc are stripped. Ensure that fragments/anchors etc are stripped.
''' '''
for url, target in self.urls_to_clean: for url, target in self.urls_to_clean:
result = standardise_url(url, base_url=self.base_url) result = standardise_url(url, rooturl=self.rooturl)
self.assertEqual(result, target) self.assertEqual(result, target)