crawler now initialises and populates crawled pool with urls it finds

This commit is contained in:
2018-08-31 19:02:21 +01:00
parent 1b18aa83eb
commit 0517e5bc56

View File

@@ -10,6 +10,21 @@ def init_crawler(base_url=None):
'''
needs a docstring
'''
uncrawled_urls, crawled_urls = UrlPool(), UrlPool()
initial_page = WebPage(base_url)
try:
initial_urls = initial_page.run()
except Exception as e:
print(e)
for url in initial_urls:
try:
uncrawled_urls.add_to_pool(url)
except Exception as e:
print(e)
print(uncrawled_urls.url_pool)
def run(args=None):
@@ -17,7 +32,9 @@ def run(args=None):
needs a docstring.
'''
base_url = sanitise_url(args.url)
print(base_url)
init_crawler(base_url)
if __name__ == '__main__':