improve handling of gzip/deflated data detection

This commit is contained in:
2018-09-09 11:21:46 +01:00
parent 1b005570ee
commit 9e754a5584

View File

@@ -65,13 +65,16 @@ class WebPage(object):
'''
request = urllib.request.Request(self.url, headers=self.headers)
page = urllib.request.urlopen(request, timeout=5) # handle
headers = page.info()
print(headers['content-type'])
if "gzip" in headers['content-type']:
self.source = gzip.decompress(page.read())
elif "text/html" in headers['content-type'] or "deflate" in headers['content-type']:
self.source = page.read()
page = urllib.request.urlopen(request, timeout=5)
if 'text/html' in page.info().get('Content-Type'):
if page.info().get('Content-Encoding'):
if page.info().get('Content-Encoding') == 'gzip':
self.source = gzip.decompress(page.read())
elif page.info().get('Content-Encoding') == 'deflate':
self.source = page.read()
else:
self.source = page.read()
def find_links(self):