| import asyncio
|
| import aiohttp
|
| import logging
|
| from typing import List, Dict, Any
|
| from services.booking_service import BookingService
|
| from models.requests import HotelQuery
|
|
|
| logger = logging.getLogger(__name__)
|
|
|
| class HotelScraper:
|
| """Main scraper class that coordinates the scraping process"""
|
|
|
| def __init__(self):
|
| self.booking_service = BookingService()
|
|
|
| async def scrape_hotels(self, hotel_queries: List[HotelQuery]) -> List[Dict[str, Any]]:
|
| """Scrape multiple hotels concurrently"""
|
| logger.info(f"Starting to scrape {len(hotel_queries)} hotels")
|
|
|
| async with aiohttp.ClientSession() as session:
|
| tasks = []
|
| for query in hotel_queries:
|
| task = self.booking_service.search_hotel(
|
| session=session,
|
| destination=query.destination,
|
| hotel_name=query.hotel_name
|
| )
|
| tasks.append(task)
|
|
|
|
|
| results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
|
|
| processed_results = []
|
| for i, result in enumerate(results):
|
| if isinstance(result, Exception):
|
| logger.error(f"Error scraping hotel {hotel_queries[i].hotel_name}: {result}")
|
| processed_results.append({
|
| "destination": hotel_queries[i].destination,
|
| "hotel_name": hotel_queries[i].hotel_name,
|
| "error": f"Scraping failed: {str(result)}"
|
| })
|
| else:
|
| processed_results.append(result)
|
|
|
| return processed_results |