garvitcpp commited on
Commit
730ee00
·
verified ·
1 Parent(s): 714045e

Update core/scraper.py

Browse files
Files changed (1) hide show
  1. core/scraper.py +46 -45
core/scraper.py CHANGED
@@ -1,46 +1,47 @@
1
- import asyncio
2
- import aiohttp # type: ignore
3
- import logging
4
- from typing import List, Dict, Any
5
- from services.booking_service import BookingService
6
- from models.requests import HotelQuery
7
-
8
- logger = logging.getLogger(__name__)
9
-
10
- class HotelScraper:
11
- """Main scraper class that coordinates the scraping process"""
12
-
13
- def __init__(self):
14
- self.booking_service = BookingService()
15
-
16
- async def scrape_hotels(self, hotel_queries: List[HotelQuery]) -> List[Dict[str, Any]]:
17
- """Scrape multiple hotels concurrently"""
18
- logger.info(f"Starting to scrape {len(hotel_queries)} hotels")
19
-
20
- async with aiohttp.ClientSession() as session:
21
- tasks = []
22
- for query in hotel_queries:
23
- task = self.booking_service.search_hotel(
24
- session=session,
25
- destination=query.destination,
26
- hotel_name=query.hotel_name
27
- )
28
- tasks.append(task)
29
-
30
- # Run all tasks concurrently
31
- results = await asyncio.gather(*tasks, return_exceptions=True)
32
-
33
- # Handle any exceptions
34
- processed_results = []
35
- for i, result in enumerate(results):
36
- if isinstance(result, Exception):
37
- logger.error(f"Error scraping hotel {hotel_queries[i].hotel_name}: {result}")
38
- processed_results.append({
39
- "destination": hotel_queries[i].destination,
40
- "hotel_name": hotel_queries[i].hotel_name,
41
- "error": f"Scraping failed: {str(result)}"
42
- })
43
- else:
44
- processed_results.append(result)
45
-
 
46
  return processed_results
 
1
+ import asyncio
2
+ import aiohttp # type: ignore
3
+ import logging
4
+ from typing import List, Dict, Any
5
+ from services.booking_service import BookingService
6
+ from models.requests import HotelQuery
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ class HotelScraper:
11
+ """Main scraper class that coordinates the scraping process"""
12
+
13
+ def __init__(self):
14
+ self.booking_service = BookingService()
15
+
16
+ async def scrape_hotels(self, hotel_queries: List[HotelQuery]) -> List[Dict[str, Any]]:
17
+ """Scrape multiple hotels concurrently"""
18
+ logger.info(f"Starting to scrape {len(hotel_queries)} hotels")
19
+
20
+ async with aiohttp.ClientSession() as session:
21
+ tasks = []
22
+ for query in hotel_queries:
23
+ await asyncio.sleep(random.uniform(4, 8))
24
+ task = self.booking_service.search_hotel(
25
+ session=session,
26
+ destination=query.destination,
27
+ hotel_name=query.hotel_name
28
+ )
29
+ tasks.append(task)
30
+
31
+ # Run all tasks concurrently
32
+ results = await asyncio.gather(*tasks, return_exceptions=True)
33
+
34
+ # Handle any exceptions
35
+ processed_results = []
36
+ for i, result in enumerate(results):
37
+ if isinstance(result, Exception):
38
+ logger.error(f"Error scraping hotel {hotel_queries[i].hotel_name}: {result}")
39
+ processed_results.append({
40
+ "destination": hotel_queries[i].destination,
41
+ "hotel_name": hotel_queries[i].hotel_name,
42
+ "error": f"Scraping failed: {str(result)}"
43
+ })
44
+ else:
45
+ processed_results.append(result)
46
+
47
  return processed_results