understanding commited on
Commit
bab413c
·
verified ·
1 Parent(s): 6965c1f

Update bot/integrations/http.py

Browse files
Files changed (1) hide show
  1. bot/integrations/http.py +158 -94
bot/integrations/http.py CHANGED
@@ -1,118 +1,182 @@
1
  # PATH: bot/integrations/http.py
2
- import asyncio
3
- from typing import Any, Dict, Optional
 
 
4
 
5
  import httpx
6
 
7
- # One global client
 
 
8
  _client: Optional[httpx.AsyncClient] = None
 
9
 
10
 
11
  def get_client() -> httpx.AsyncClient:
12
- """
13
- Shared HTTP client for Worker calls.
14
- - trust_env=False avoids proxy/env issues on HF
15
- """
16
  global _client
17
- if _client is not None:
18
- return _client
19
-
20
- timeout = httpx.Timeout(
21
- connect=10.0,
22
- read=60.0,
23
- write=60.0,
24
- pool=10.0,
25
- )
26
-
27
- limits = httpx.Limits(
28
- max_connections=50,
29
- max_keepalive_connections=20,
30
- keepalive_expiry=30.0,
31
- )
32
-
33
- _client = httpx.AsyncClient(
34
- timeout=timeout,
35
- limits=limits,
36
- follow_redirects=True,
37
- http2=True,
38
- trust_env=False,
39
- headers={
40
- "user-agent": "YouTubeLoaderHF/1.0",
41
- "accept": "application/json,text/plain,*/*",
42
- },
43
- )
44
  return _client
45
 
46
 
47
- async def request_json(
48
- method: str,
49
- url: str,
50
- *,
51
- headers: Optional[Dict[str, str]] = None,
52
- json_body: Optional[Dict[str, Any]] = None,
53
- data: Optional[Any] = None,
54
- retries: int = 4,
55
- backoff_base: float = 0.6,
56
- ) -> Dict[str, Any]:
57
  """
58
- Returns dict always:
59
- - success: JSON dict (adds ok=True if missing)
60
- - error: {"ok": False, "status": <int>, "err": "<msg>"}
61
  """
62
- c = get_client()
63
- last_err: Optional[BaseException] = None
64
-
65
- for attempt in range(retries):
66
- try:
67
- r = await c.request(
68
- method.upper(),
69
- url,
70
- headers=headers,
71
- json=json_body,
72
- data=data,
73
- )
74
-
75
- # parse JSON if possible
76
- try:
77
- j = r.json()
78
- except Exception:
79
- txt = (r.text or "").strip()
80
- j = {"raw": txt}
81
 
82
- if r.is_success:
83
- if isinstance(j, dict):
84
- j.setdefault("ok", True)
85
- return j
86
- return {"ok": True, "raw": j}
87
 
88
- # non-2xx
89
- if isinstance(j, dict):
90
- j.setdefault("ok", False)
91
- j.setdefault("status", r.status_code)
92
- j.setdefault("err", f"http_{r.status_code}")
93
- return j
94
 
95
- return {"ok": False, "status": r.status_code, "err": f"http_{r.status_code}"}
96
 
97
- except (httpx.ConnectError, httpx.ConnectTimeout, httpx.ReadTimeout, httpx.RemoteProtocolError) as e:
98
- last_err = e
99
- except OSError as e:
100
- # DNS / low-level network issues (includes Errno -5)
101
- last_err = e
102
- except Exception as e:
103
- last_err = e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- if attempt < retries - 1:
106
- await asyncio.sleep(backoff_base * (2 ** attempt))
107
 
108
- return {"ok": False, "status": 0, "err": str(last_err) if last_err else "unknown_error"}
 
 
 
 
 
 
 
 
 
109
 
110
 
111
- # ----------------- Backward-compatible helpers -----------------
112
- # (your old files import these names)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
- async def post_json(url: str, payload: Dict[str, Any], headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
115
- return await request_json("POST", url, headers=headers, json_body=payload)
116
 
117
- async def get_json(url: str, headers: Optional[Dict[str, str]] = None) -> Dict[str, Any]:
118
- return await request_json("GET", url, headers=headers)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # PATH: bot/integrations/http.py
2
+ import ssl
3
+ import json as _json
4
+ from typing import Any, Dict, Optional, Tuple, List
5
+ from urllib.parse import urlparse, urlunparse
6
 
7
  import httpx
8
 
9
+
10
+ _DEFAULT_TIMEOUT = httpx.Timeout(20.0, connect=10.0)
11
+
12
  _client: Optional[httpx.AsyncClient] = None
13
+ _client_insecure: Optional[httpx.AsyncClient] = None
14
 
15
 
16
  def get_client() -> httpx.AsyncClient:
 
 
 
 
17
  global _client
18
+ if _client is None:
19
+ _client = httpx.AsyncClient(
20
+ timeout=_DEFAULT_TIMEOUT,
21
+ follow_redirects=True,
22
+ http2=True,
23
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  return _client
25
 
26
 
27
+ def get_client_insecure() -> httpx.AsyncClient:
 
 
 
 
 
 
 
 
 
28
  """
29
+ Insecure client used only for IP fallback (TLS verify off),
30
+ because we connect to https://<IP> with Host header.
 
31
  """
32
+ global _client_insecure
33
+ if _client_insecure is None:
34
+ _client_insecure = httpx.AsyncClient(
35
+ timeout=_DEFAULT_TIMEOUT,
36
+ follow_redirects=True,
37
+ http2=True,
38
+ verify=False,
39
+ )
40
+ return _client_insecure
 
 
 
 
 
 
 
 
 
 
41
 
 
 
 
 
 
42
 
43
+ def _is_dns_error(e: Exception) -> bool:
44
+ s = str(e).lower()
45
+ return ("no address associated with hostname" in s) or ("gaierror" in s) or ("name or service not known" in s)
 
 
 
46
 
 
47
 
48
+ async def doh_resolve_a(host: str) -> List[str]:
49
+ """
50
+ Resolve A records via DNS-over-HTTPS.
51
+ Uses dns.google (since google.com resolves fine in your env).
52
+ """
53
+ c = get_client()
54
+ try:
55
+ r = await c.get("https://dns.google/resolve", params={"name": host, "type": "A"})
56
+ if r.status_code != 200:
57
+ return []
58
+ j = r.json()
59
+ ans = j.get("Answer") or []
60
+ ips = []
61
+ for a in ans:
62
+ if a.get("type") == 1 and a.get("data"):
63
+ ips.append(str(a["data"]))
64
+ # unique keep order
65
+ out = []
66
+ for ip in ips:
67
+ if ip not in out:
68
+ out.append(ip)
69
+ return out
70
+ except Exception:
71
+ return []
72
+
73
+
74
+ def _split_url(url: str) -> Tuple[str, str, str]:
75
+ """
76
+ Returns (scheme, host, rest(path+query+fragment))
77
+ """
78
+ u = urlparse(url)
79
+ scheme = u.scheme or "https"
80
+ host = u.netloc
81
+ rest = urlunparse(("", "", u.path or "/", u.params, u.query, u.fragment))
82
+ return scheme, host, rest
83
 
 
 
84
 
85
+ async def fetch_status(url: str) -> str:
86
+ """
87
+ Returns status code as string, or ERR:...
88
+ Uses same DNS fallback logic.
89
+ """
90
+ try:
91
+ r = await request_text("GET", url)
92
+ return str(r[0])
93
+ except Exception as e:
94
+ return f"ERR:{type(e).__name__}:{e}"
95
 
96
 
97
+ async def request_text(method: str, url: str, headers: Optional[Dict[str, str]] = None) -> Tuple[int, str]:
98
+ """
99
+ Low-level text request with DNS fallback.
100
+ Returns (status_code, text)
101
+ """
102
+ headers = dict(headers or {})
103
+ c = get_client()
104
+ try:
105
+ r = await c.request(method, url, headers=headers)
106
+ return r.status_code, r.text
107
+ except Exception as e:
108
+ # DNS fail -> DoH fallback
109
+ if not _is_dns_error(e):
110
+ raise
111
+
112
+ scheme, host, rest = _split_url(url)
113
+ if not host:
114
+ raise
115
+
116
+ ips = await doh_resolve_a(host)
117
+ if not ips:
118
+ raise
119
+
120
+ ic = get_client_insecure()
121
+ last_err = e
122
+ for ip in ips:
123
+ try:
124
+ # connect to IP but keep routing via Host header
125
+ h2 = dict(headers)
126
+ h2["Host"] = host
127
+ r = await ic.request(method, f"{scheme}://{ip}{rest}", headers=h2)
128
+ return r.status_code, r.text
129
+ except Exception as e2:
130
+ last_err = e2
131
+ continue
132
+ raise last_err
133
+
134
+
135
+ async def post_json(url: str, headers: Optional[Dict[str, str]] = None, payload: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
136
+ """
137
+ POST JSON helper with DNS fallback.
138
+ Returns dict: {ok, status, data?, err?}
139
+ """
140
+ headers = dict(headers or {})
141
+ headers.setdefault("Content-Type", "application/json")
142
 
143
+ c = get_client()
144
+ payload = payload or {}
145
 
146
+ try:
147
+ r = await c.post(url, headers=headers, json=payload)
148
+ try:
149
+ data = r.json()
150
+ except Exception:
151
+ data = {"raw": r.text}
152
+ return {"ok": r.status_code < 400, "status": r.status_code, "data": data}
153
+ except Exception as e:
154
+ if not _is_dns_error(e):
155
+ return {"ok": False, "status": 0, "err": str(e)}
156
+
157
+ # DNS fail -> DoH -> IP fallback (verify off + Host header)
158
+ scheme, host, rest = _split_url(url)
159
+ if not host:
160
+ return {"ok": False, "status": 0, "err": str(e)}
161
+
162
+ ips = await doh_resolve_a(host)
163
+ if not ips:
164
+ return {"ok": False, "status": 0, "err": str(e)}
165
+
166
+ ic = get_client_insecure()
167
+ last_err: Exception = e
168
+ for ip in ips:
169
+ try:
170
+ h2 = dict(headers)
171
+ h2["Host"] = host
172
+ r = await ic.post(f"{scheme}://{ip}{rest}", headers=h2, json=payload)
173
+ try:
174
+ data = r.json()
175
+ except Exception:
176
+ data = {"raw": r.text}
177
+ return {"ok": r.status_code < 400, "status": r.status_code, "data": data}
178
+ except Exception as e2:
179
+ last_err = e2
180
+ continue
181
+
182
+ return {"ok": False, "status": 0, "err": str(last_err)}