initial commit of crawler skeleton

This commit is contained in:
2018-08-31 18:26:49 +01:00
parent 915def3a5d
commit 5e0d9fd568

28
crawler.py Normal file
View File

@@ -0,0 +1,28 @@
#!/usr/bin/env python
'''
Need a docstring.
'''
import argparse
from utils.helpers import (UrlPool, WebPage, sanitise_url, qualify_url)
def init_crawler(base_url=None):
'''
needs a docstring
'''
def run(args=None):
'''
needs a docstring.
'''
base_url = sanitise_url(args.url)
print(base_url)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Recursive web crawler')
parser.add_argument("-u", "--url", required=True, help="Base url to crawl")
args = parser.parse_args()
run(args)