improve handling of gzip/deflated data detection
This commit is contained in:
@@ -65,12 +65,15 @@ class WebPage(object):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
request = urllib.request.Request(self.url, headers=self.headers)
|
request = urllib.request.Request(self.url, headers=self.headers)
|
||||||
page = urllib.request.urlopen(request, timeout=5) # handle
|
page = urllib.request.urlopen(request, timeout=5)
|
||||||
headers = page.info()
|
|
||||||
print(headers['content-type'])
|
if 'text/html' in page.info().get('Content-Type'):
|
||||||
if "gzip" in headers['content-type']:
|
if page.info().get('Content-Encoding'):
|
||||||
|
if page.info().get('Content-Encoding') == 'gzip':
|
||||||
self.source = gzip.decompress(page.read())
|
self.source = gzip.decompress(page.read())
|
||||||
elif "text/html" in headers['content-type'] or "deflate" in headers['content-type']:
|
elif page.info().get('Content-Encoding') == 'deflate':
|
||||||
|
self.source = page.read()
|
||||||
|
else:
|
||||||
self.source = page.read()
|
self.source = page.read()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user