| import asyncio |
| import aiohttp |
| import logging |
| import random |
| from typing import List, Dict, Any |
| from services.booking_service import BookingService |
| from models.requests import HotelQuery |
|
|
| logger = logging.getLogger(__name__) |
|
|
| class HotelScraper: |
| """Main scraper class that coordinates the scraping process""" |
| |
| def __init__(self): |
| self.booking_service = BookingService() |
| |
| async def scrape_hotels(self, hotel_queries: List[HotelQuery]) -> List[Dict[str, Any]]: |
| """Scrape multiple hotels concurrently""" |
| logger.info(f"Starting to scrape {len(hotel_queries)} hotels") |
| |
| async with aiohttp.ClientSession() as session: |
| tasks = [] |
| for query in hotel_queries: |
| await asyncio.sleep(random.uniform(4, 8)) |
| task = self.booking_service.search_hotel( |
| session=session, |
| destination=query.destination, |
| hotel_name=query.hotel_name |
| ) |
| tasks.append(task) |
| |
| |
| results = await asyncio.gather(*tasks, return_exceptions=True) |
| |
| |
| processed_results = [] |
| for i, result in enumerate(results): |
| if isinstance(result, Exception): |
| logger.error(f"Error scraping hotel {hotel_queries[i].hotel_name}: {result}") |
| processed_results.append({ |
| "destination": hotel_queries[i].destination, |
| "hotel_name": hotel_queries[i].hotel_name, |
| "error": f"Scraping failed: {str(result)}" |
| }) |
| else: |
| processed_results.append(result) |
| |
| return processed_results |