improve documentation

2018-09-16 15:53:47 +01:00
parent a926090bed
commit 524f6a45cd
2 changed files with 13 additions and 5 deletions
--- a/async_crawler.py
+++ b/async_crawler.py
@@ -1,7 +1,15 @@

 #!/usr/bin/env python
 '''
-Need a docstring.
+Asynchronous web crawler written in Python 3.5+.
+
+This script will respect the site's `robots.txt`, if one exists. If not, all
+URLs discovered will be crawled.
+
+The crawler takes a total of two arguments (concurrency is optional):
+
+    url: the base URL to begin the crawl from.
+    concurrency: the maximum number of pages which may be crawled concurrently.
 '''

 import argparse
@@ -76,7 +84,7 @@ def main():
 if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='Recursive web crawler')
-    parser.add_argument("-u", "--url", required=True, help="Base url to crawl")
+    parser.add_argument("-u", "--url", required=True, help="Initial url to crawl")
    parser.add_argument("-c", "--concurrency", required=False, type=int,
                        default=100, help="Max number of pages to crawl concurrently")
    args = parser.parse_args()