more documentation and add back some required imports

This commit is contained in:
2018-09-16 09:00:43 +01:00
parent 7bc9fe0679
commit 336517e84a

View File

@@ -6,7 +6,9 @@ Utilities to provide various misc functions.
import aiohttp
import asyncio
from bs4 import BeautifulSoup
import urllib.error
from urllib.parse import urljoin, urlsplit
import urllib.request
import urllib.robotparser
@@ -151,7 +153,9 @@ class RobotsTxt(object):
def __init__(self, base_url=None):
'''
Manually retrieve robots.txt to allow us to set the user-agent.
Manually retrieve robots.txt to allow us to set the user-agent (works
around sites which disallow access to robots.txt without a sane
user-agent).
'''
self.base_url = base_url
self.headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:61.0) Gecko/20100101 Firefox/61.0'}