File size: 1,860 Bytes
730ee00
 
 
ca01d03
730ee00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28df1e8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import asyncio
import aiohttp # type: ignore
import logging
import random
from typing import List, Dict, Any
from services.booking_service import BookingService
from models.requests import HotelQuery

logger = logging.getLogger(__name__)

class HotelScraper:
    """Main scraper class that coordinates the scraping process"""
    
    def __init__(self):
        self.booking_service = BookingService()
    
    async def scrape_hotels(self, hotel_queries: List[HotelQuery]) -> List[Dict[str, Any]]:
        """Scrape multiple hotels concurrently"""
        logger.info(f"Starting to scrape {len(hotel_queries)} hotels")
        
        async with aiohttp.ClientSession() as session:
            tasks = []
            for query in hotel_queries:
                await asyncio.sleep(random.uniform(4, 8))
                task = self.booking_service.search_hotel(
                    session=session,
                    destination=query.destination,
                    hotel_name=query.hotel_name
                )
                tasks.append(task)
            
            # Run all tasks concurrently
            results = await asyncio.gather(*tasks, return_exceptions=True)
            
            # Handle any exceptions
            processed_results = []
            for i, result in enumerate(results):
                if isinstance(result, Exception):
                    logger.error(f"Error scraping hotel {hotel_queries[i].hotel_name}: {result}")
                    processed_results.append({
                        "destination": hotel_queries[i].destination,
                        "hotel_name": hotel_queries[i].hotel_name,
                        "error": f"Scraping failed: {str(result)}"
                    })
                else:
                    processed_results.append(result)
            
            return processed_results