improve handling of gzip/deflated data detection

This commit is contained in:
2018-09-09 11:21:46 +01:00
parent 1b005570ee
commit 9e754a5584

View File

@@ -65,13 +65,16 @@ class WebPage(object):
''' '''
request = urllib.request.Request(self.url, headers=self.headers) request = urllib.request.Request(self.url, headers=self.headers)
page = urllib.request.urlopen(request, timeout=5) # handle page = urllib.request.urlopen(request, timeout=5)
headers = page.info()
print(headers['content-type']) if 'text/html' in page.info().get('Content-Type'):
if "gzip" in headers['content-type']: if page.info().get('Content-Encoding'):
self.source = gzip.decompress(page.read()) if page.info().get('Content-Encoding') == 'gzip':
elif "text/html" in headers['content-type'] or "deflate" in headers['content-type']: self.source = gzip.decompress(page.read())
self.source = page.read() elif page.info().get('Content-Encoding') == 'deflate':
self.source = page.read()
else:
self.source = page.read()
def find_links(self): def find_links(self):