initial test for AsyncCrawler
This commit is contained in:
@@ -1,7 +1,28 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
import unittest
|
import unittest
|
||||||
from utils.helpers import (RobotsTxt, standardise_url)
|
from unittest import mock
|
||||||
|
from utils.helpers import AsyncCrawler, RobotsTxt, standardise_url
|
||||||
|
|
||||||
|
|
||||||
|
class TestAsyncCrawler(unittest.TestCase):
|
||||||
|
|
||||||
|
base_url = 'http://eu.httpbin.org'
|
||||||
|
concurrency = 10
|
||||||
|
testcrawler = AsyncCrawler(baseurl=base_url, concurrency=concurrency)
|
||||||
|
expected_urls = ['http://eu.httpbin.org/b/', 'http://eu.httpbin.org/c/']
|
||||||
|
crawled = set()
|
||||||
|
crawled.add('https://eu.httpbin.org/a/')
|
||||||
|
|
||||||
|
@mock.patch('utils.helpers.AsyncCrawler.validate_url', response=True)
|
||||||
|
def test_find_all_urls(self, validate_url):
|
||||||
|
|
||||||
|
with open('test/files/find_all_urls.html', 'r') as f:
|
||||||
|
source = f.read()
|
||||||
|
|
||||||
|
urls = self.testcrawler.find_all_urls(source=source)
|
||||||
|
self.assertEqual(urls, self.expected_urls)
|
||||||
|
|
||||||
|
|
||||||
class TestRobots(unittest.TestCase):
|
class TestRobots(unittest.TestCase):
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user