initial test for AsyncCrawler

This commit is contained in:
2018-09-18 18:22:55 +01:00
parent 9a4e9ddfc7
commit f6265f18a7

View File

@@ -1,7 +1,28 @@
#!/usr/bin/env python #!/usr/bin/env python
import unittest import unittest
from utils.helpers import (RobotsTxt, standardise_url) from unittest import mock
from utils.helpers import AsyncCrawler, RobotsTxt, standardise_url
class TestAsyncCrawler(unittest.TestCase):
base_url = 'http://eu.httpbin.org'
concurrency = 10
testcrawler = AsyncCrawler(baseurl=base_url, concurrency=concurrency)
expected_urls = ['http://eu.httpbin.org/b/', 'http://eu.httpbin.org/c/']
crawled = set()
crawled.add('https://eu.httpbin.org/a/')
@mock.patch('utils.helpers.AsyncCrawler.validate_url', response=True)
def test_find_all_urls(self, validate_url):
with open('test/files/find_all_urls.html', 'r') as f:
source = f.read()
urls = self.testcrawler.find_all_urls(source=source)
self.assertEqual(urls, self.expected_urls)
class TestRobots(unittest.TestCase): class TestRobots(unittest.TestCase):