diff --git a/test_helpers.py b/test_helpers.py index 62c9412..e78f8ec 100644 --- a/test_helpers.py +++ b/test_helpers.py @@ -1,34 +1,54 @@ #!/usr/bin/env python import unittest -from utils.helpers import (sanitise_url) +from utils.helpers import (RobotsTxt, standardise_url) + +class TestRobots(unittest.TestCase): + + base_url = 'http://eu.httpbin.org' + + test_paths = (('/', True), ('/deny', False)) + + robots = RobotsTxt(base_url=base_url) + + def test_robots_txt_deny(self): + ''' + Asserts result is True or False. + ''' + for path, allowed in self.test_paths: + result = self.robots.check(url=path) + self.assertIs(result, allowed) + class TestUrls(unittest.TestCase): + base_url = 'http://eu.httpbin.org' + base_url_list = (('eu.httpbin.org', 'http://eu.httpbin.org'), - ('www.simonweald.com', 'http://www.simonweald.com'), - ('http://www.github.com/', 'http://www.github.com'), - ('https://www.github.com', 'https://www.github.com')) + ('http://eu.httpbin.org/', 'http://eu.httpbin.org'), + ('https://eu.httpbin.org', 'https://eu.httpbin.org')) - urls_to_clean = (('https://www.github.com/', 'https://www.github.com/'), - ('https://github.com/?foo=bar', 'https://github.com/'), - ('https://github.com/#anchor', 'https://github.com/')) + urls_to_clean = (('http://eu.httpbin.org', 'http://eu.httpbin.org'), + ('http://eu.httpbin.org/some/path/', 'http://eu.httpbin.org/some/path/'), + ('http://eu.httpbin.org/index.html','http://eu.httpbin.org/index.html'), + ('http://eu.httpbin.org/index.html?foo=bar', 'http://eu.httpbin.org/index.html'), + ('http://eu.httpbin.org/index.html#anchor', 'http://eu.httpbin.org/index.html')) - - def test_sanitise_base_url(self): + def test_standardise_base_url(self): ''' - Tests whether a URL's protocol can be discovered if not provided. + Tests whether a base URL can be standardised to the format + proto://[sub].domain.tld. ''' for url, target in self.base_url_list: - result = sanitise_url(url, base_url=True) + result = standardise_url(url) self.assertEqual(result, target) - def test_sanitise_url(self): + def test_standardise_url(self): ''' - Tests whether a URL's protocol can be discovered if not provided. + Ensure that fragments/anchors etc are stripped. ''' for url, target in self.urls_to_clean: - result = sanitise_url(url) + result = standardise_url(url, base_url=self.base_url) self.assertEqual(result, target)