only attempt to read html

This commit is contained in:
2018-09-06 16:30:11 +01:00
parent a3ec9451e3
commit e57a86c60a
2 changed files with 4 additions and 2 deletions

View File

@@ -61,7 +61,9 @@ class WebPage(object):
request = urllib.request.Request(self.url, headers=self.headers)
page = urllib.request.urlopen(request, timeout=5)
self.source = page.read()
headers = page.info()
if headers['content-type'] == "text/html":
self.source = page.read()
def find_links(self):