improve handling of gzip/deflated data detection

2018-09-09 11:21:46 +01:00
parent 1b005570ee
commit 9e754a5584
1 changed files with 10 additions and 7 deletions
--- a/utils/helpers.py
+++ b/utils/helpers.py
@@ -65,12 +65,15 @@ class WebPage(object):
        '''
        request = urllib.request.Request(self.url, headers=self.headers)
-        page = urllib.request.urlopen(request, timeout=5) # handle
+        page = urllib.request.urlopen(request, timeout=5)
-        headers = page.info()
+
-        print(headers['content-type'])
+        if 'text/html' in page.info().get('Content-Type'):
-        if "gzip" in headers['content-type']:
+            if page.info().get('Content-Encoding'):
                if page.info().get('Content-Encoding') == 'gzip':
                    self.source = gzip.decompress(page.read())
-        elif "text/html" in headers['content-type'] or "deflate" in headers['content-type']:
+                elif page.info().get('Content-Encoding') == 'deflate':
                    self.source = page.read()
            else:
                self.source = page.read()