correct semaphore usage
This commit is contained in:
@@ -38,16 +38,12 @@ class AsyncCrawler(object):
|
||||
docstring
|
||||
'''
|
||||
urls = set()
|
||||
async with self.semaphore:
|
||||
source = await self.get_source(url)
|
||||
if source:
|
||||
# add the URL we've just crawled
|
||||
self.crawled.add(url)
|
||||
for new_url in self.find_all_urls(source):
|
||||
urls.add(new_url)
|
||||
# urls_to_crawl = self.find_all_urls(source)
|
||||
# for new_url in urls_to_crawl:
|
||||
# urls.add(new_url)
|
||||
|
||||
return urls
|
||||
|
||||
@@ -71,6 +67,7 @@ class AsyncCrawler(object):
|
||||
Obtains the page's source.
|
||||
'''
|
||||
print('semaphore held for {0}'.format(url))
|
||||
async with self.semaphore:
|
||||
async with self.client_session.get(url, timeout=5) as resp:
|
||||
try:
|
||||
source = await resp.read()
|
||||
|
||||
Reference in New Issue
Block a user