Spaces:

garvitcpp
/

accomodation-info-api

Paused

App Files Files Community

garvitcpp commited on May 22, 2025

Commit

675dcd6

verified ·

1 Parent(s): 5739cff

Update services/utils/http_utils.py

Browse files

Files changed (1) hide show

services/utils/http_utils.py +112 -8

services/utils/http_utils.py CHANGED Viewed

@@ -1,20 +1,124 @@
-import aiohttp # type: ignore
 import logging
-from typing import Optional
 logger = logging.getLogger(__name__)
 async def fetch_page(session: aiohttp.ClientSession, url: str, headers: dict) -> Optional[str]:
-    """Fetch a page using aiohttp"""
     try:
-        logger.info(f"Requesting URL: {url}")
-        async with session.get(url, headers=headers, timeout=15) as response:
             if response.status == 200:
-                logger.debug(f"Successfully retrieved content from {url}")
                 return await response.text()
             else:
-                logger.error(f"Error retrieving URL {url}: Status code {response.status}")
                 return None
     except Exception as e:
-        logger.error(f"Request failed for {url}: {e}")
         return None

+import aiohttp
 import logging
+from typing import Optional, List
+import asyncio
+from fp.fp import FreeProxy
+import random
+from aiohttp_retry import RetryClient, ExponentialRetry
+import time
 logger = logging.getLogger(__name__)
+# Cache for working proxies
+WORKING_PROXIES = []
+PROXY_REFRESH_TIME = 0
+PROXY_REFRESH_INTERVAL = 60 * 10  # 10 minutes
+def get_working_proxies() -> List[str]:
+    """Get a list of working proxies"""
+    global WORKING_PROXIES, PROXY_REFRESH_TIME
+    current_time = time.time()
+    # If we have proxies and they're not expired, use them
+    if WORKING_PROXIES and (current_time - PROXY_REFRESH_TIME) < PROXY_REFRESH_INTERVAL:
+        return WORKING_PROXIES
+    # Get new proxies
+    try:
+        proxies = []
+        # Try to get 5 working proxies
+        for _ in range(5):
+            try:
+                proxy = FreeProxy(https=True, rand=True, timeout=1).get()
+                if proxy and proxy not in proxies:
+                    proxies.append(proxy)
+            except Exception:
+                pass
+        if proxies:
+            WORKING_PROXIES = proxies
+            PROXY_REFRESH_TIME = current_time
+            logger.info(f"Refreshed proxy list, found {len(proxies)} working proxies")
+            return WORKING_PROXIES
+    except Exception as e:
+        logger.error(f"Error refreshing proxy list: {e}")
+    return WORKING_PROXIES  # Return whatever we have, even if it's empty
 async def fetch_page(session: aiohttp.ClientSession, url: str, headers: dict) -> Optional[str]:
+    """Fetch a page using aiohttp with free proxies and retry logic"""
+    logger.info(f"Requesting URL: {url}")
+    # Get list of working proxies
+    proxies = get_working_proxies()
+    # Try with proxies if available
+    if proxies:
+        # Shuffle proxies for better distribution
+        random.shuffle(proxies)
+        # Try each proxy until one works
+        for proxy in proxies:
+            try:
+                logger.info(f"Trying with proxy: {proxy}")
+                # Configure retry client
+                retry_options = ExponentialRetry(attempts=2)
+                retry_client = RetryClient(raise_for_status=False, retry_options=retry_options)
+                async with retry_client.get(
+                    url,
+                    headers=headers,
+                    proxy=proxy,
+                    timeout=20,
+                    ssl=False  # Some free proxies don't support SSL verification
+                ) as response:
+                    if response.status == 200:
+                        logger.info(f"Successfully retrieved content via proxy")
+                        return await response.text()
+                    else:
+                        logger.warning(f"Proxy {proxy} failed with status {response.status}")
+            except Exception as e:
+                logger.warning(f"Error using proxy {proxy}: {str(e)}")
+                continue
+    # If all proxies failed or no proxies available, try direct request with extensive disguise
+    logger.info("All proxies failed or no proxies available, trying direct request with disguised headers")
+    return await direct_request(session, url, headers)
+async def direct_request(session: aiohttp.ClientSession, url: str, headers: dict) -> Optional[str]:
+    """Attempt a direct request with enhanced browser-like headers"""
+    # Enhance headers to look more like a real browser
+    enhanced_headers = headers.copy()
+    enhanced_headers.update({
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
+        "Accept-Language": "en-US,en;q=0.9",
+        "Accept-Encoding": "gzip, deflate, br",
+        "Connection": "keep-alive",
+        "Cache-Control": "max-age=0",
+        "Sec-Ch-Ua": '"Google Chrome";v="123", "Not:A-Brand";v="8"',
+        "Sec-Ch-Ua-Mobile": "?0",
+        "Sec-Ch-Ua-Platform": '"Windows"',
+        "Sec-Fetch-Dest": "document",
+        "Sec-Fetch-Mode": "navigate",
+        "Sec-Fetch-Site": "none",
+        "Sec-Fetch-User": "?1",
+        "Upgrade-Insecure-Requests": "1",
+        "Referer": "https://www.google.com/"
+    })
     try:
+        # Configure retry client for direct requests too
+        retry_options = ExponentialRetry(attempts=3)
+        retry_client = RetryClient(raise_for_status=False, retry_options=retry_options)
+        async with retry_client.get(url, headers=enhanced_headers, timeout=20) as response:
             if response.status == 200:
+                logger.info(f"Successfully retrieved content directly")
                 return await response.text()
             else:
+                logger.error(f"Direct request failed with status code {response.status}")
                 return None
     except Exception as e:
+        logger.error(f"Direct request failed: {e}")
         return None