garvitcpp commited on
Commit
8e956a0
·
verified ·
1 Parent(s): 730ee00

Update services/utils/http_utils.py

Browse files
Files changed (1) hide show
  1. services/utils/http_utils.py +54 -20
services/utils/http_utils.py CHANGED
@@ -1,20 +1,54 @@
1
- import aiohttp # type: ignore
2
- import logging
3
- from typing import Optional
4
-
5
- logger = logging.getLogger(__name__)
6
-
7
- async def fetch_page(session: aiohttp.ClientSession, url: str, headers: dict) -> Optional[str]:
8
- """Fetch a page using aiohttp"""
9
- try:
10
- logger.info(f"Requesting URL: {url}")
11
- async with session.get(url, headers=headers, timeout=15) as response:
12
- if response.status == 200:
13
- logger.debug(f"Successfully retrieved content from {url}")
14
- return await response.text()
15
- else:
16
- logger.error(f"Error retrieving URL {url}: Status code {response.status}")
17
- return None
18
- except Exception as e:
19
- logger.error(f"Request failed for {url}: {e}")
20
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiohttp # type: ignore
2
+ import logging
3
+ import asyncio
4
+ import random
5
+ import os
6
+ from typing import Optional, Dict
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ async def fetch_page(session: aiohttp.ClientSession, url: str, headers: Dict[str, str]) -> Optional[str]:
11
+ """Fetch a page using aiohttp with retry logic for 202 status codes"""
12
+ # Add random cookies to mimic browser behavior
13
+ cookies = {
14
+ "bkng": f"11UmFuZG9tSVYkc2RlIyh9YSk7IZWFkeH97ry3Z{random.randint(10000, 99999)}=",
15
+ "bkng_sso_session": "e30",
16
+ "bkng_sso_ses": str(random.randint(1000000, 9999999))
17
+ }
18
+
19
+ # Get proxy from environment variable if available
20
+ proxy = os.environ.get("PROXY_URL")
21
+
22
+ for attempt in range(3): # Try up to 3 times
23
+ try:
24
+ logger.info(f"Requesting URL: {url} (attempt {attempt+1})")
25
+
26
+ request_kwargs = {
27
+ "headers": headers,
28
+ "cookies": cookies,
29
+ "timeout": 30,
30
+ "allow_redirects": True
31
+ }
32
+
33
+ # Add proxy if configured
34
+ if proxy:
35
+ request_kwargs["proxy"] = proxy
36
+
37
+ async with session.get(url, **request_kwargs) as response:
38
+ if response.status == 200:
39
+ logger.debug(f"Successfully retrieved content from {url}")
40
+ return await response.text()
41
+ elif response.status == 202:
42
+ # For 202 status codes, wait and retry
43
+ wait_time = random.uniform(5, 10) * (attempt + 1)
44
+ logger.warning(f"Got 202 status code, waiting {wait_time:.1f}s before retry")
45
+ await asyncio.sleep(wait_time)
46
+ else:
47
+ logger.error(f"Error retrieving URL {url}: Status code {response.status}")
48
+ return None
49
+
50
+ except Exception as e:
51
+ logger.error(f"Request failed for {url}: {e}")
52
+ await asyncio.sleep(random.uniform(2, 5))
53
+
54
+ return None