improve handling of gzip/deflated data detection
This commit is contained in:
@@ -65,13 +65,16 @@ class WebPage(object):
|
||||
'''
|
||||
|
||||
request = urllib.request.Request(self.url, headers=self.headers)
|
||||
page = urllib.request.urlopen(request, timeout=5) # handle
|
||||
headers = page.info()
|
||||
print(headers['content-type'])
|
||||
if "gzip" in headers['content-type']:
|
||||
self.source = gzip.decompress(page.read())
|
||||
elif "text/html" in headers['content-type'] or "deflate" in headers['content-type']:
|
||||
self.source = page.read()
|
||||
page = urllib.request.urlopen(request, timeout=5)
|
||||
|
||||
if 'text/html' in page.info().get('Content-Type'):
|
||||
if page.info().get('Content-Encoding'):
|
||||
if page.info().get('Content-Encoding') == 'gzip':
|
||||
self.source = gzip.decompress(page.read())
|
||||
elif page.info().get('Content-Encoding') == 'deflate':
|
||||
self.source = page.read()
|
||||
else:
|
||||
self.source = page.read()
|
||||
|
||||
|
||||
def find_links(self):
|
||||
|
||||
Reference in New Issue
Block a user