From 9a4e9ddfc7c8333bb4fc528113b669cb79aad22f Mon Sep 17 00:00:00 2001 From: Simon Weald Date: Tue, 18 Sep 2018 10:53:13 +0100 Subject: [PATCH] add test for missing robots.txt --- test_helpers.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test_helpers.py b/test_helpers.py index e78f8ec..0cfd9a4 100644 --- a/test_helpers.py +++ b/test_helpers.py @@ -6,10 +6,12 @@ from utils.helpers import (RobotsTxt, standardise_url) class TestRobots(unittest.TestCase): base_url = 'http://eu.httpbin.org' + no_robots = 'https://www.simonweald.com' test_paths = (('/', True), ('/deny', False)) robots = RobotsTxt(base_url=base_url) + norobots = RobotsTxt(base_url=no_robots) def test_robots_txt_deny(self): ''' @@ -19,6 +21,13 @@ class TestRobots(unittest.TestCase): result = self.robots.check(url=path) self.assertIs(result, allowed) + def test_no_robots_txt(self): + ''' + Ensure we can crawl if robots.txt isn't present. + ''' + result = self.norobots.check(url='/') + self.assertTrue(result) + class TestUrls(unittest.TestCase):