Spaces:

garvitcpp
/

accomodation-info-api

Paused

App Files Files Community

garvitcpp commited on May 21, 2025

Commit

730ee00

verified ·

1 Parent(s): 714045e

Update core/scraper.py

Browse files

Files changed (1) hide show

core/scraper.py +46 -45

core/scraper.py CHANGED Viewed

@@ -1,46 +1,47 @@
-import asyncio
-import aiohttp # type: ignore
-import logging
-from typing import List, Dict, Any
-from services.booking_service import BookingService
-from models.requests import HotelQuery
-logger = logging.getLogger(__name__)
-class HotelScraper:
-    """Main scraper class that coordinates the scraping process"""
-    def __init__(self):
-        self.booking_service = BookingService()
-    async def scrape_hotels(self, hotel_queries: List[HotelQuery]) -> List[Dict[str, Any]]:
-        """Scrape multiple hotels concurrently"""
-        logger.info(f"Starting to scrape {len(hotel_queries)} hotels")
-        async with aiohttp.ClientSession() as session:
-            tasks = []
-            for query in hotel_queries:
-                task = self.booking_service.search_hotel(
-                    session=session,
-                    destination=query.destination,
-                    hotel_name=query.hotel_name
-                )
-                tasks.append(task)
-            # Run all tasks concurrently
-            results = await asyncio.gather(*tasks, return_exceptions=True)
-            # Handle any exceptions
-            processed_results = []
-            for i, result in enumerate(results):
-                if isinstance(result, Exception):
-                    logger.error(f"Error scraping hotel {hotel_queries[i].hotel_name}: {result}")
-                    processed_results.append({
-                        "destination": hotel_queries[i].destination,
-                        "hotel_name": hotel_queries[i].hotel_name,
-                        "error": f"Scraping failed: {str(result)}"
-                    })
-                else:
-                    processed_results.append(result)
             return processed_results

+import asyncio
+import aiohttp # type: ignore
+import logging
+from typing import List, Dict, Any
+from services.booking_service import BookingService
+from models.requests import HotelQuery
+logger = logging.getLogger(__name__)
+class HotelScraper:
+    """Main scraper class that coordinates the scraping process"""
+    def __init__(self):
+        self.booking_service = BookingService()
+    async def scrape_hotels(self, hotel_queries: List[HotelQuery]) -> List[Dict[str, Any]]:
+        """Scrape multiple hotels concurrently"""
+        logger.info(f"Starting to scrape {len(hotel_queries)} hotels")
+        async with aiohttp.ClientSession() as session:
+            tasks = []
+            for query in hotel_queries:
+                await asyncio.sleep(random.uniform(4, 8))
+                task = self.booking_service.search_hotel(
+                    session=session,
+                    destination=query.destination,
+                    hotel_name=query.hotel_name
+                )
+                tasks.append(task)
+            # Run all tasks concurrently
+            results = await asyncio.gather(*tasks, return_exceptions=True)
+            # Handle any exceptions
+            processed_results = []
+            for i, result in enumerate(results):
+                if isinstance(result, Exception):
+                    logger.error(f"Error scraping hotel {hotel_queries[i].hotel_name}: {result}")
+                    processed_results.append({
+                        "destination": hotel_queries[i].destination,
+                        "hotel_name": hotel_queries[i].hotel_name,
+                        "error": f"Scraping failed: {str(result)}"
+                    })
+                else:
+                    processed_results.append(result)
             return processed_results