correct semaphore usage

This commit is contained in:
2018-09-14 16:06:17 +01:00
parent 7ebe4855b8
commit 3808f72f73

View File

@@ -38,16 +38,12 @@ class AsyncCrawler(object):
docstring
'''
urls = set()
async with self.semaphore:
source = await self.get_source(url)
if source:
# add the URL we've just crawled
self.crawled.add(url)
for new_url in self.find_all_urls(source):
urls.add(new_url)
# urls_to_crawl = self.find_all_urls(source)
# for new_url in urls_to_crawl:
# urls.add(new_url)
source = await self.get_source(url)
if source:
# add the URL we've just crawled
self.crawled.add(url)
for new_url in self.find_all_urls(source):
urls.add(new_url)
return urls
@@ -71,12 +67,13 @@ class AsyncCrawler(object):
Obtains the page's source.
'''
print('semaphore held for {0}'.format(url))
async with self.client_session.get(url, timeout=5) as resp:
try:
source = await resp.read()
return source
except Exception:
return None
async with self.semaphore:
async with self.client_session.get(url, timeout=5) as resp:
try:
source = await resp.read()
return source
except Exception:
return None
def find_all_urls(self, source=None):