correct semaphore usage

This commit is contained in:
2018-09-14 16:06:17 +01:00
parent 7ebe4855b8
commit 3808f72f73

View File

@@ -38,16 +38,12 @@ class AsyncCrawler(object):
docstring docstring
''' '''
urls = set() urls = set()
async with self.semaphore:
source = await self.get_source(url) source = await self.get_source(url)
if source: if source:
# add the URL we've just crawled # add the URL we've just crawled
self.crawled.add(url) self.crawled.add(url)
for new_url in self.find_all_urls(source): for new_url in self.find_all_urls(source):
urls.add(new_url) urls.add(new_url)
# urls_to_crawl = self.find_all_urls(source)
# for new_url in urls_to_crawl:
# urls.add(new_url)
return urls return urls
@@ -71,6 +67,7 @@ class AsyncCrawler(object):
Obtains the page's source. Obtains the page's source.
''' '''
print('semaphore held for {0}'.format(url)) print('semaphore held for {0}'.format(url))
async with self.semaphore:
async with self.client_session.get(url, timeout=5) as resp: async with self.client_session.get(url, timeout=5) as resp:
try: try:
source = await resp.read() source = await resp.read()