diff --git a/utils/helpers.py b/utils/helpers.py index 2b9a762..e4ed38b 100644 --- a/utils/helpers.py +++ b/utils/helpers.py @@ -53,16 +53,16 @@ class WebPage(object): for link in links: if link['href'].startswith('/'): - hrefs.add("".join([self.url, link['href']])) + hrefs.add(urljoin(self.url, link['href'])) else: hrefs.add(link['href']) - self.hrefs = hrefs + self.discovered_hrefs = hrefs def parse_urls(self): self.urls_to_crawl = set() - for url in self.hrefs: + for url in self.discovered_hrefs: if url.startswith(self.url): self.urls_to_crawl.add(url)