diff --git a/utils/helpers.py b/utils/helpers.py index 0a2275b..10f0d5f 100644 --- a/utils/helpers.py +++ b/utils/helpers.py @@ -53,6 +53,8 @@ class WebPage(object): self.url = url self.base_url = base_url self.robots = robots + self.source = None + self.urls_to_crawl = set() def get_source(self): @@ -75,7 +77,7 @@ class WebPage(object): ''' hrefs = set() - soup = BeautifulSoup(self.source, 'lxml') # handle no source + soup = BeautifulSoup(self.source, 'lxml') links = soup.find_all('a', href=True) for link in links: @@ -92,7 +94,6 @@ class WebPage(object): Iterate through the list of discovered URLs and add them to the pool if they start with the base URL. ''' - self.urls_to_crawl = set() for url in self.discovered_hrefs: #handle no hrefs found if url.startswith(self.url): @@ -112,8 +113,8 @@ class WebPage(object): def run(self): try: self.get_source() - except Exception as e: - print(e) + except Exception: + pass if self.source: self.find_links()