#!/usr/bin/env python import unittest from utils.helpers import (RobotsTxt, standardise_url) class TestRobots(unittest.TestCase): base_url = 'http://eu.httpbin.org' test_paths = (('/', True), ('/deny', False)) robots = RobotsTxt(base_url=base_url) def test_robots_txt_deny(self): ''' Asserts result is True or False. ''' for path, allowed in self.test_paths: result = self.robots.check(url=path) self.assertIs(result, allowed) class TestUrls(unittest.TestCase): base_url = 'http://eu.httpbin.org' base_url_list = (('eu.httpbin.org', 'http://eu.httpbin.org'), ('http://eu.httpbin.org/', 'http://eu.httpbin.org'), ('https://eu.httpbin.org', 'https://eu.httpbin.org')) urls_to_clean = (('http://eu.httpbin.org', 'http://eu.httpbin.org'), ('http://eu.httpbin.org/some/path/', 'http://eu.httpbin.org/some/path/'), ('http://eu.httpbin.org/index.html','http://eu.httpbin.org/index.html'), ('http://eu.httpbin.org/index.html?foo=bar', 'http://eu.httpbin.org/index.html'), ('http://eu.httpbin.org/index.html#anchor', 'http://eu.httpbin.org/index.html')) def test_standardise_base_url(self): ''' Tests whether a base URL can be standardised to the format proto://[sub].domain.tld. ''' for url, target in self.base_url_list: result = standardise_url(url) self.assertEqual(result, target) def test_standardise_url(self): ''' Ensure that fragments/anchors etc are stripped. ''' for url, target in self.urls_to_clean: result = standardise_url(url, base_url=self.base_url) self.assertEqual(result, target) if __name__ == '__main__': unittest.main()