#!/usr/bin/env python
'''
Need a docstring.
'''

import argparse
import asyncio
from datetime import datetime
import jinja2
import os
import sys
from utils.helpers import RobotsTxt, AsyncCrawler, standardise_url


def sanity_checks(url=None):
    '''
    Runs some basic sanity checks before the crawler is initialised.
    '''
    # ensure we have a sensible URL to work with
    baseurl = standardise_url(url=url)
    # get robots.txt
    robots = RobotsTxt(base_url=baseurl)

    # fail early if robots denies all crawling
    if not robots.check(url=baseurl):
        sys.exit("{baseurl} cannot be crawled (denied by robots.txt)".format(baseurl=baseurl))

    return(baseurl, robots)


def render_sitemap(base_url=None, crawled_urls=None, runtime=None):
    '''
    Renders the sitemap to an HTML file.
    '''
    urlcount = len(crawled_urls)
    sorted_urls = sorted(crawled_urls)

    template = jinja2.Environment(
        loader=jinja2.FileSystemLoader('templates')
    ).get_template('sitemap.html.j2')

    rendered_html = template.render(
        base_url=base_url, urlcount=urlcount, urls=sorted_urls, runtime=runtime)

    with open('sitemap.html', 'w') as outfile:
        outfile.write(rendered_html)

    print('Sitemap available at {0}/sitemap.html'.format(os.getcwd()))


def main():
    '''
    Main function, responsible for prepping and running the crawler and
    rendering the sitemap.
    '''
    starttime = datetime.now()

    baseurl, robots = sanity_checks(url=args.url)

    # create a crawler
    async_crawler = AsyncCrawler(baseurl=baseurl, robots=robots, concurrency=args.concurrency)

    # create a task to run the crawler, run the loop and then gather the results.
    task = asyncio.Task(async_crawler.main())
    loop = asyncio.get_event_loop()
    loop.run_until_complete(task)
    loop.close()
    results = sorted(task.result())

    runtime = int((datetime.now() - starttime).total_seconds())

    render_sitemap(base_url=baseurl, crawled_urls=results, runtime=runtime)


if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='Recursive web crawler')
    parser.add_argument("-u", "--url", required=True, help="Base url to crawl")
    parser.add_argument("-c", "--concurrency", required=False, type=int,
                        default=100, help="Max number of pages to crawl concurrently")
    args = parser.parse_args()

    main()