diff --git a/utils/helpers.py b/utils/helpers.py index a21639a..0945238 100644 --- a/utils/helpers.py +++ b/utils/helpers.py @@ -65,13 +65,16 @@ class WebPage(object): ''' request = urllib.request.Request(self.url, headers=self.headers) - page = urllib.request.urlopen(request, timeout=5) # handle - headers = page.info() - print(headers['content-type']) - if "gzip" in headers['content-type']: - self.source = gzip.decompress(page.read()) - elif "text/html" in headers['content-type'] or "deflate" in headers['content-type']: - self.source = page.read() + page = urllib.request.urlopen(request, timeout=5) + + if 'text/html' in page.info().get('Content-Type'): + if page.info().get('Content-Encoding'): + if page.info().get('Content-Encoding') == 'gzip': + self.source = gzip.decompress(page.read()) + elif page.info().get('Content-Encoding') == 'deflate': + self.source = page.read() + else: + self.source = page.read() def find_links(self):