diff --git a/test_helpers.py b/test_helpers.py index 087483d..bbec5d9 100644 --- a/test_helpers.py +++ b/test_helpers.py @@ -2,37 +2,18 @@ import unittest from unittest import mock -from utils.helpers import AsyncCrawler, RobotsTxt, standardise_url - - -class TestAsyncCrawler(unittest.TestCase): - - base_url = 'http://eu.httpbin.org' - concurrency = 10 - testcrawler = AsyncCrawler(baseurl=base_url, concurrency=concurrency) - expected_urls = ['http://eu.httpbin.org/b/', 'http://eu.httpbin.org/c/'] - crawled = set() - crawled.add('https://eu.httpbin.org/a/') - - @mock.patch('utils.helpers.AsyncCrawler.validate_url', response=True) - def test_find_all_urls(self, validate_url): - - with open('test/files/find_all_urls.html', 'r') as f: - source = f.read() - - urls = self.testcrawler.find_all_urls(source=source) - self.assertEqual(urls, self.expected_urls) +from utils.helpers import RobotsTxt, standardise_url class TestRobots(unittest.TestCase): - base_url = 'http://eu.httpbin.org' + rooturl = 'http://eu.httpbin.org' no_robots = 'https://www.simonweald.com' test_paths = (('/', True), ('/deny', False)) - robots = RobotsTxt(base_url=base_url) - norobots = RobotsTxt(base_url=no_robots) + robots = RobotsTxt(rooturl=rooturl) + norobots = RobotsTxt(rooturl=no_robots) def test_robots_txt_deny(self): ''' @@ -52,9 +33,9 @@ class TestRobots(unittest.TestCase): class TestUrls(unittest.TestCase): - base_url = 'http://eu.httpbin.org' + rooturl = 'http://eu.httpbin.org' - base_url_list = (('eu.httpbin.org', 'http://eu.httpbin.org'), + rooturl_list = (('eu.httpbin.org', 'http://eu.httpbin.org'), ('http://eu.httpbin.org/', 'http://eu.httpbin.org'), ('https://eu.httpbin.org', 'https://eu.httpbin.org')) @@ -64,12 +45,12 @@ class TestUrls(unittest.TestCase): ('http://eu.httpbin.org/index.html?foo=bar', 'http://eu.httpbin.org/index.html'), ('http://eu.httpbin.org/index.html#anchor', 'http://eu.httpbin.org/index.html')) - def test_standardise_base_url(self): + def test_standardise_rooturl(self): ''' Tests whether a base URL can be standardised to the format proto://[sub].domain.tld. ''' - for url, target in self.base_url_list: + for url, target in self.rooturl_list: result = standardise_url(url) self.assertEqual(result, target) @@ -78,7 +59,7 @@ class TestUrls(unittest.TestCase): Ensure that fragments/anchors etc are stripped. ''' for url, target in self.urls_to_clean: - result = standardise_url(url, base_url=self.base_url) + result = standardise_url(url, rooturl=self.rooturl) self.assertEqual(result, target)