Spaces:
Sleeping
Sleeping
| """Shared HTTP session with retries and a realistic User-Agent.""" | |
| from __future__ import annotations | |
| import time | |
| import requests | |
| from requests.adapters import HTTPAdapter | |
| from urllib3.util.retry import Retry | |
| USER_AGENT = ( | |
| "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 " | |
| "(KHTML, like Gecko) Chrome/124.0 Safari/537.36" | |
| ) | |
| def make_session() -> requests.Session: | |
| s = requests.Session() | |
| s.headers.update({"User-Agent": USER_AGENT, "Accept-Language": "en-US,en;q=0.9"}) | |
| retry = Retry( | |
| total=4, | |
| backoff_factor=1.5, | |
| status_forcelist=(429, 500, 502, 503, 504), | |
| allowed_methods=frozenset(["GET"]), | |
| raise_on_status=False, | |
| ) | |
| adapter = HTTPAdapter(max_retries=retry) | |
| s.mount("https://", adapter) | |
| s.mount("http://", adapter) | |
| return s | |
| def get(session: requests.Session, url: str, *, sleep: float = 0.7) -> str: | |
| r = session.get(url, timeout=30) | |
| r.raise_for_status() | |
| if sleep: | |
| time.sleep(sleep) | |
| return r.text | |