add test for missing robots.txt
This commit is contained in:
@@ -6,10 +6,12 @@ from utils.helpers import (RobotsTxt, standardise_url)
|
|||||||
class TestRobots(unittest.TestCase):
|
class TestRobots(unittest.TestCase):
|
||||||
|
|
||||||
base_url = 'http://eu.httpbin.org'
|
base_url = 'http://eu.httpbin.org'
|
||||||
|
no_robots = 'https://www.simonweald.com'
|
||||||
|
|
||||||
test_paths = (('/', True), ('/deny', False))
|
test_paths = (('/', True), ('/deny', False))
|
||||||
|
|
||||||
robots = RobotsTxt(base_url=base_url)
|
robots = RobotsTxt(base_url=base_url)
|
||||||
|
norobots = RobotsTxt(base_url=no_robots)
|
||||||
|
|
||||||
def test_robots_txt_deny(self):
|
def test_robots_txt_deny(self):
|
||||||
'''
|
'''
|
||||||
@@ -19,6 +21,13 @@ class TestRobots(unittest.TestCase):
|
|||||||
result = self.robots.check(url=path)
|
result = self.robots.check(url=path)
|
||||||
self.assertIs(result, allowed)
|
self.assertIs(result, allowed)
|
||||||
|
|
||||||
|
def test_no_robots_txt(self):
|
||||||
|
'''
|
||||||
|
Ensure we can crawl if robots.txt isn't present.
|
||||||
|
'''
|
||||||
|
result = self.norobots.check(url='/')
|
||||||
|
self.assertTrue(result)
|
||||||
|
|
||||||
|
|
||||||
class TestUrls(unittest.TestCase):
|
class TestUrls(unittest.TestCase):
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user