Spaces:
Running
Running
Commit ·
569c7d4
1
Parent(s): 14574e0
fix: use curl_cffi for Chrome TLS fingerprint impersonation on cloud
Browse files
backend/app/services/data_ingestion/yahoo.py
CHANGED
|
@@ -18,7 +18,6 @@ from functools import partial
|
|
| 18 |
from typing import Any, Dict, List, Optional
|
| 19 |
|
| 20 |
import pandas as pd
|
| 21 |
-
import requests
|
| 22 |
import yfinance as yf
|
| 23 |
|
| 24 |
from app.redis_client import cache_get, cache_set
|
|
@@ -37,26 +36,31 @@ _yf_pool = ThreadPoolExecutor(max_workers=4, thread_name_prefix="yfinance")
|
|
| 37 |
YF_TIMEOUT = 15
|
| 38 |
|
| 39 |
|
| 40 |
-
def _create_yf_session()
|
| 41 |
-
"""Create a
|
| 42 |
|
| 43 |
-
Yahoo Finance blocks
|
| 44 |
-
|
|
|
|
|
|
|
| 45 |
"""
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
| 60 |
|
| 61 |
|
| 62 |
# Shared session for all yfinance calls
|
|
|
|
| 18 |
from typing import Any, Dict, List, Optional
|
| 19 |
|
| 20 |
import pandas as pd
|
|
|
|
| 21 |
import yfinance as yf
|
| 22 |
|
| 23 |
from app.redis_client import cache_get, cache_set
|
|
|
|
| 36 |
YF_TIMEOUT = 15
|
| 37 |
|
| 38 |
|
| 39 |
+
def _create_yf_session():
|
| 40 |
+
"""Create a session with browser TLS fingerprint impersonation.
|
| 41 |
|
| 42 |
+
Yahoo Finance blocks requests based on TLS fingerprinting, not just
|
| 43 |
+
HTTP headers. curl_cffi impersonates Chrome's actual TLS handshake,
|
| 44 |
+
which is undetectable by Yahoo's anti-bot systems.
|
| 45 |
+
Falls back to a regular requests.Session for local development.
|
| 46 |
"""
|
| 47 |
+
try:
|
| 48 |
+
from curl_cffi.requests import Session
|
| 49 |
+
session = Session(impersonate="chrome")
|
| 50 |
+
logger.info("Using curl_cffi session with Chrome TLS impersonation")
|
| 51 |
+
return session
|
| 52 |
+
except ImportError:
|
| 53 |
+
import requests
|
| 54 |
+
session = requests.Session()
|
| 55 |
+
session.headers.update({
|
| 56 |
+
"User-Agent": (
|
| 57 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
| 58 |
+
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
| 59 |
+
"Chrome/131.0.0.0 Safari/537.36"
|
| 60 |
+
),
|
| 61 |
+
})
|
| 62 |
+
logger.info("curl_cffi not available, using requests.Session (local dev mode)")
|
| 63 |
+
return session
|
| 64 |
|
| 65 |
|
| 66 |
# Shared session for all yfinance calls
|
backend/requirements.txt
CHANGED
|
@@ -36,6 +36,7 @@ hmmlearn==0.3.3
|
|
| 36 |
# HTTP Client
|
| 37 |
httpx==0.28.1
|
| 38 |
aiohttp==3.11.11
|
|
|
|
| 39 |
|
| 40 |
# Utilities
|
| 41 |
python-dateutil==2.9.0
|
|
|
|
| 36 |
# HTTP Client
|
| 37 |
httpx==0.28.1
|
| 38 |
aiohttp==3.11.11
|
| 39 |
+
curl_cffi>=0.7.0
|
| 40 |
|
| 41 |
# Utilities
|
| 42 |
python-dateutil==2.9.0
|