jashdoshi77 commited on
Commit
569c7d4
·
1 Parent(s): 14574e0

fix: use curl_cffi for Chrome TLS fingerprint impersonation on cloud

Browse files
backend/app/services/data_ingestion/yahoo.py CHANGED
@@ -18,7 +18,6 @@ from functools import partial
18
  from typing import Any, Dict, List, Optional
19
 
20
  import pandas as pd
21
- import requests
22
  import yfinance as yf
23
 
24
  from app.redis_client import cache_get, cache_set
@@ -37,26 +36,31 @@ _yf_pool = ThreadPoolExecutor(max_workers=4, thread_name_prefix="yfinance")
37
  YF_TIMEOUT = 15
38
 
39
 
40
- def _create_yf_session() -> requests.Session:
41
- """Create a requests session with browser-like headers.
42
 
43
- Yahoo Finance blocks bare requests from cloud/datacenter IPs.
44
- Using browser headers and proper cookie handling bypasses this.
 
 
45
  """
46
- session = requests.Session()
47
- session.headers.update({
48
- "User-Agent": (
49
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
50
- "AppleWebKit/537.36 (KHTML, like Gecko) "
51
- "Chrome/131.0.0.0 Safari/537.36"
52
- ),
53
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
54
- "Accept-Language": "en-US,en;q=0.5",
55
- "Accept-Encoding": "gzip, deflate",
56
- "Connection": "keep-alive",
57
- "Upgrade-Insecure-Requests": "1",
58
- })
59
- return session
 
 
 
60
 
61
 
62
  # Shared session for all yfinance calls
 
18
  from typing import Any, Dict, List, Optional
19
 
20
  import pandas as pd
 
21
  import yfinance as yf
22
 
23
  from app.redis_client import cache_get, cache_set
 
36
  YF_TIMEOUT = 15
37
 
38
 
39
+ def _create_yf_session():
40
+ """Create a session with browser TLS fingerprint impersonation.
41
 
42
+ Yahoo Finance blocks requests based on TLS fingerprinting, not just
43
+ HTTP headers. curl_cffi impersonates Chrome's actual TLS handshake,
44
+ which is undetectable by Yahoo's anti-bot systems.
45
+ Falls back to a regular requests.Session for local development.
46
  """
47
+ try:
48
+ from curl_cffi.requests import Session
49
+ session = Session(impersonate="chrome")
50
+ logger.info("Using curl_cffi session with Chrome TLS impersonation")
51
+ return session
52
+ except ImportError:
53
+ import requests
54
+ session = requests.Session()
55
+ session.headers.update({
56
+ "User-Agent": (
57
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
58
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
59
+ "Chrome/131.0.0.0 Safari/537.36"
60
+ ),
61
+ })
62
+ logger.info("curl_cffi not available, using requests.Session (local dev mode)")
63
+ return session
64
 
65
 
66
  # Shared session for all yfinance calls
backend/requirements.txt CHANGED
@@ -36,6 +36,7 @@ hmmlearn==0.3.3
36
  # HTTP Client
37
  httpx==0.28.1
38
  aiohttp==3.11.11
 
39
 
40
  # Utilities
41
  python-dateutil==2.9.0
 
36
  # HTTP Client
37
  httpx==0.28.1
38
  aiohttp==3.11.11
39
+ curl_cffi>=0.7.0
40
 
41
  # Utilities
42
  python-dateutil==2.9.0