improve documentation
This commit is contained in:
@@ -1,7 +1,15 @@
|
||||
|
||||
#!/usr/bin/env python
|
||||
'''
|
||||
Need a docstring.
|
||||
Asynchronous web crawler written in Python 3.5+.
|
||||
|
||||
This script will respect the site's `robots.txt`, if one exists. If not, all
|
||||
URLs discovered will be crawled.
|
||||
|
||||
The crawler takes a total of two arguments (concurrency is optional):
|
||||
|
||||
url: the base URL to begin the crawl from.
|
||||
concurrency: the maximum number of pages which may be crawled concurrently.
|
||||
'''
|
||||
|
||||
import argparse
|
||||
@@ -76,7 +84,7 @@ def main():
|
||||
if __name__ == '__main__':
|
||||
|
||||
parser = argparse.ArgumentParser(description='Recursive web crawler')
|
||||
parser.add_argument("-u", "--url", required=True, help="Base url to crawl")
|
||||
parser.add_argument("-u", "--url", required=True, help="Initial url to crawl")
|
||||
parser.add_argument("-c", "--concurrency", required=False, type=int,
|
||||
default=100, help="Max number of pages to crawl concurrently")
|
||||
args = parser.parse_args()
|
||||
|
||||
Reference in New Issue
Block a user