From f6265f18a7dcb2ae017105f7149e795b6ca05fae Mon Sep 17 00:00:00 2001 From: Simon Weald Date: Tue, 18 Sep 2018 18:22:55 +0100 Subject: [PATCH] initial test for AsyncCrawler --- test_helpers.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/test_helpers.py b/test_helpers.py index 0cfd9a4..087483d 100644 --- a/test_helpers.py +++ b/test_helpers.py @@ -1,7 +1,28 @@ #!/usr/bin/env python import unittest -from utils.helpers import (RobotsTxt, standardise_url) +from unittest import mock +from utils.helpers import AsyncCrawler, RobotsTxt, standardise_url + + +class TestAsyncCrawler(unittest.TestCase): + + base_url = 'http://eu.httpbin.org' + concurrency = 10 + testcrawler = AsyncCrawler(baseurl=base_url, concurrency=concurrency) + expected_urls = ['http://eu.httpbin.org/b/', 'http://eu.httpbin.org/c/'] + crawled = set() + crawled.add('https://eu.httpbin.org/a/') + + @mock.patch('utils.helpers.AsyncCrawler.validate_url', response=True) + def test_find_all_urls(self, validate_url): + + with open('test/files/find_all_urls.html', 'r') as f: + source = f.read() + + urls = self.testcrawler.find_all_urls(source=source) + self.assertEqual(urls, self.expected_urls) + class TestRobots(unittest.TestCase):