initial test for AsyncCrawler
This commit is contained in:
@@ -1,7 +1,28 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import unittest
|
||||
from utils.helpers import (RobotsTxt, standardise_url)
|
||||
from unittest import mock
|
||||
from utils.helpers import AsyncCrawler, RobotsTxt, standardise_url
|
||||
|
||||
|
||||
class TestAsyncCrawler(unittest.TestCase):
|
||||
|
||||
base_url = 'http://eu.httpbin.org'
|
||||
concurrency = 10
|
||||
testcrawler = AsyncCrawler(baseurl=base_url, concurrency=concurrency)
|
||||
expected_urls = ['http://eu.httpbin.org/b/', 'http://eu.httpbin.org/c/']
|
||||
crawled = set()
|
||||
crawled.add('https://eu.httpbin.org/a/')
|
||||
|
||||
@mock.patch('utils.helpers.AsyncCrawler.validate_url', response=True)
|
||||
def test_find_all_urls(self, validate_url):
|
||||
|
||||
with open('test/files/find_all_urls.html', 'r') as f:
|
||||
source = f.read()
|
||||
|
||||
urls = self.testcrawler.find_all_urls(source=source)
|
||||
self.assertEqual(urls, self.expected_urls)
|
||||
|
||||
|
||||
class TestRobots(unittest.TestCase):
|
||||
|
||||
|
||||
Reference in New Issue
Block a user