From 759f965e9588ae5d3d740c3e23c5bc5b3e0e0c59 Mon Sep 17 00:00:00 2001 From: Simon Weald Date: Fri, 31 Aug 2018 19:12:58 +0100 Subject: [PATCH] use more explicit names, use urljoin to combine urls --- utils/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/helpers.py b/utils/helpers.py index 2b9a762..e4ed38b 100644 --- a/utils/helpers.py +++ b/utils/helpers.py @@ -53,16 +53,16 @@ class WebPage(object): for link in links: if link['href'].startswith('/'): - hrefs.add("".join([self.url, link['href']])) + hrefs.add(urljoin(self.url, link['href'])) else: hrefs.add(link['href']) - self.hrefs = hrefs + self.discovered_hrefs = hrefs def parse_urls(self): self.urls_to_crawl = set() - for url in self.hrefs: + for url in self.discovered_hrefs: if url.startswith(self.url): self.urls_to_crawl.add(url)