diff --git a/async_crawler.py b/async_crawler.py
index 5b48b3e..dc4d4a4 100644
--- a/async_crawler.py
+++ b/async_crawler.py
@@ -1,3 +1,4 @@
+
 #!/usr/bin/env python
 '''
 Need a docstring.
@@ -6,10 +7,10 @@ Need a docstring.
 import argparse
 import jinja2
 import os
+import sys
 import asyncio
 from datetime import datetime
-# from utils.helpers import (UrlPool, WebPage, RobotsTxt, sanitise_url)
-from utils.helpers import RobotsTxt, AsyncCrawler, sanitise_url
+from utils.helpers import RobotsTxt, AsyncCrawler, standardise_url
 
 
 def init_crawler(url=None):
@@ -17,10 +18,14 @@ def init_crawler(url=None):
     docstring
     '''
     # ensure we have a sensible URL to work with
-    baseurl = sanitise_url(url=url, base_url=True)
+    baseurl = standardise_url(url=url, base_url=url)
     # get robots.txt
     robots = RobotsTxt(base_url=baseurl)
 
+    # fail early if robots denies all crawling
+    if not robots.check(url=baseurl):
+        sys.exit("{baseurl} cannot be crawled (denied by robots.txt)".format(baseurl=baseurl))
+
     return(baseurl, robots)
 
 
@@ -44,7 +49,7 @@ def render_sitemap(base_url=None, crawled_urls=None, runtime=None):
     print('Sitemap available at {0}/sitemap.html'.format(os.getcwd()))
 
 
-def main(args=None):
+def main():
     '''
     docstring
     '''
@@ -54,21 +59,25 @@ def main(args=None):
 
     # create a crawler
     async_crawler = AsyncCrawler(baseurl=baseurl, robots=robots, concurrency=args.concurrency)
-    # async_crawler.run()
+    # run the crawler
 
-    crawler = asyncio.Task(async_crawler.run())
+    task = asyncio.Task(async_crawler.run_loop())
     loop = asyncio.get_event_loop()
-    loop.run_until_complete(crawler)
+    loop.run_until_complete(task)
     loop.close()
-    result = crawler.result()
-    print(len(result))
+    results = task.result()
+    print(results)
+    print(len(results))
+    runtime = int((datetime.now() - starttime).total_seconds())
+    print(runtime)
 
 
 if __name__ == '__main__':
 
     parser = argparse.ArgumentParser(description='Recursive web crawler')
     parser.add_argument("-u", "--url", required=True, help="Base url to crawl")
-    parser.add_argument("-s", "--concurrency", required=False, type=int, default=50, help="Max number of pages to crawl concurrently")
+    parser.add_argument("-c", "--concurrency", required=False, type=int,
+                        default=50, help="Max number of pages to crawl concurrently")
     args = parser.parse_args()
 
-    main(args)
+    main()
diff --git a/utils/helpers.py b/utils/helpers.py
index 05cc85c..f0791a4 100644
--- a/utils/helpers.py
+++ b/utils/helpers.py
@@ -26,65 +26,113 @@ class AsyncCrawler(object):
         self.robots = robots
         self.uncrawled = set()
         self.crawled = set()
-        self.session = aiohttp.ClientSession()
+        # self.headers = {'Accept-Encoding': 'gzip, deflate',
+                        # 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0'}
+        self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0'}
+        self.client_session = None
         self.semaphore = asyncio.BoundedSemaphore(concurrency)
-        # add the base URL to be crawled
-        self.uncrawled.add(baseurl)
-        self.headers = {'Accept-Encoding': 'gzip, deflate',
-                        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0'}
 
-
-    def validate_url(self, url):
+    async def crawl_url(self, url=None):
         '''
-        Checks if the discovered URL is local to the base URL.
+        docstring
+        '''
+        urls = set()
+        async with self.semaphore:
+            source = await self.get_source(url)
+            if source:
+                self.crawled.add(url)
+                # for new_url in self.find_all_urls(source):
+                #     urls.add(new_url)
+                urls_to_crawl = self.find_all_urls(source)
+                # print('discovered {0} new URLs'.format(len(urls_to_crawl)))
+                for new_url in urls_to_crawl:
+                    urls.add(new_url)
+                # add the url we just crawled to the crawled pool.
+
+
+        return urls
+
+
+    def validate_url(self, url=None):
+        '''
+        Ensures we have a valid URL to crawl and that the site's robots.txt
+        allows it.
         '''
         # ensure the URL is in a sane format
-        url = sanitise_url(url=url)
+        url = standardise_url(url=url, base_url=self.baseurl)
 
-        if url.startswith(self.baseurl) and robots.check(url=url):
+        if url and self.robots.check(url=url):
+            # print('validated url: {0}'.format(url))
             return url
         else:
             return False
 
 
-    def get_source(self, url):
+    async def get_source(self, url=None):
         '''
         Obtains the page's source.
         '''
-        pass
-
-        return source
+        print('semaphore held for {0}'.format(url))
+        async with self.client_session.get(url, timeout=5) as resp:
+            try:
+                source = await resp.read()
+                return source
+            except Exception:
+                return None
 
 
-    def find_links(self, source):
+    def find_all_urls(self, source=None):
         '''
-        Find all links in a page's source.
+        Find all URLs in a page's source.
         '''
-        links = set()
+        urls = set()
 
         html = BeautifulSoup(source, 'lxml')
         hrefs = html.find_all('a', href=True)
 
+        # build a set of URLs which are valid and haven't been crawled yet
         for href in hrefs:
-            url = self.validate_url(url=href)
-            if url:
-                links.add(url)
+            url = self.validate_url(url=href['href'])
+            if url and url not in self.crawled:
+                urls.add(url)
 
-        return links
+        return urls
 
 
-    def run(self):
+    async def run_loop(self):
         '''
         function which runs the crawler
         '''
-        pass
+        print('Crawling: {}'.format(self.baseurl))
+        self.client_session = aiohttp.ClientSession(headers=self.headers)
+        # provide the starting URL to the crawler
+        self.uncrawled.add(self.baseurl)
+
+        while len(self.uncrawled) > 0:
+            # print('################################ there are {0} uncrawled urls in the pool'.format(
+            #     len(self.uncrawled)))
+            url = self.uncrawled.pop()
+            # print('################ url popped, there are now {0} uncrawled urls in the pool'.format(
+                # len(self.uncrawled)))
+            new_urls = await self.crawl_url(url=url)
+            for url in new_urls:
+                # print('adding: {0}'.format(url))
+                self.uncrawled.add(url)
+
+        await self.client_session.close()
+        return self.crawled
+
+
+
+
+
+
+
+
+
+
 
-        for url in self.uncrawled:
-            validated = validate_url(url=url)
 
-            if validated:
-                source = get_source(url=url)
-                links = find_links(source=source)
 
 
 
@@ -183,8 +231,8 @@ class WebPage(object):
         '''
         for url in self.discovered_hrefs:
             if url.startswith(self.base_url) and self.robots.check(url):
-                sanitised_url = sanitise_url(url=url)
-                self.urls_to_crawl.add(sanitised_url)
+                standardised_url = sanitise_url(url=url)
+                self.urls_to_crawl.add(standardised_url)
 
 
     def list_urls(self):
@@ -251,31 +299,35 @@ class RobotsTxt(object):
         return self.robots.can_fetch("*", url)
 
 
-def sanitise_url(url, base_url=False):
+def standardise_url(url=None, base_url=None):
     '''
-    If `base_url` is True, we attempt to standardise `url` to ensure it can be
-    prepended to relative URLs. If no scheme has been provided then we default
+    If `base_url` is None then we attempt to standarise the URL to ensure it can
+    be prepended to relative URLs. If no scheme has been provided then we default
     to http as any sane https-only site should 301 redirect http > https.
 
-    If `base_url` is False, we sanitise URLs to strip queries and fragments (we
-    don't want to scrape in-page anchors etc).
+    If `base_url` is set, we standardise URLs to strip queries and fragments (we
+    don't want to scrape in-page anchors etc). Any relative URLs will be appended
+    to the base url.
 
-    Returns a sanitised URL as a string.
+    Returns a standardised URL as a string.
     '''
     default_proto = 'http'
     delim = '://'
 
     split_url = urlsplit(url)
 
-    if base_url:
-        # This will sanitise the initial url for the initial page crawl.
+    if not base_url:
+        # This will sanitise the initial url provided by the user.
         if split_url.scheme and split_url.scheme.startswith('http'):
-            sanitised_url = "".join([split_url.scheme, delim, split_url.netloc])
+            return "".join([split_url.scheme, delim, split_url.netloc])
         elif (split_url.path and not split_url.scheme and not split_url.netloc):
-            sanitised_url = "".join([default_proto, delim, split_url.path])
+            return "".join([default_proto, delim, split_url.path])
     else:
         # Sanitise discovered URLs. We already expect them in the format
         # protocol://base_url/path
-        sanitised_url = "".join([split_url.scheme, delim, split_url.netloc, split_url.path])
+        if url.startswith('/'):
+            return urljoin(base_url, split_url.path)
+        elif url.startswith(base_url):
+            return "".join([split_url.scheme, delim, split_url.netloc, split_url.path])
 
-    return sanitised_url
+    return None