small improvements to docs and variables

This commit is contained in:
2018-09-17 21:44:04 +01:00
parent eb2395d461
commit 73c21e5bd3
2 changed files with 16 additions and 2 deletions

View File

@@ -24,6 +24,15 @@ from utils.helpers import RobotsTxt, AsyncCrawler, standardise_url
def sanity_checks(url=None):
'''
Runs some basic sanity checks before the crawler is initialised.
Accepts:
url: the root URL to be crawled.
Returns:
baseurl: a validated and cleaned version of the initial URL.
(type=string)
robots: an object which allows us to query whether a site may be crawled.
(type=RobotsTxt)
'''
# ensure we have a sensible URL to work with
baseurl = standardise_url(url=url)
@@ -41,6 +50,11 @@ def sanity_checks(url=None):
def render_sitemap(base_url=None, crawled_urls=None, runtime=None):
'''
Renders the sitemap to an HTML file.
Accepts:
base_url:
crawled_urls:
runtime:
'''
urlcount = len(crawled_urls)
sorted_urls = sorted(crawled_urls)