overwrite69 commited on
Commit
0ebb9b8
Β·
verified Β·
1 Parent(s): 49412a5

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +175 -83
app.py CHANGED
@@ -3,12 +3,13 @@ Haiku API - OpenAI-compatible proxy for chatgpt.org/claude/chat
3
  Deploy to Hugging Face Spaces (Docker SDK)
4
 
5
  Auto-continues when upstream hits the ~1K token output limit.
 
6
  Sends SSE keep-alive comments during continuation gaps.
7
- Handles content arrays, CORS, extra params, etc.
8
  """
9
 
10
  import asyncio
11
  import json
 
12
  import re
13
  import time
14
  import uuid
@@ -20,7 +21,7 @@ from fastapi import FastAPI, HTTPException, Request
20
  from fastapi.middleware.cors import CORSMiddleware
21
  from fastapi.responses import StreamingResponse, JSONResponse
22
 
23
- app = FastAPI(title="Haiku API", version="3.1.0")
24
 
25
  # ── CORS ─────────────────────────────────────────────────────────
26
  app.add_middleware(
@@ -31,6 +32,28 @@ app.add_middleware(
31
  allow_headers=["*"],
32
  )
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # ── Session State ────────────────────────────────────────────────
35
  class SessionState:
36
  def __init__(self):
@@ -46,46 +69,68 @@ class SessionState:
46
  now = time.time()
47
  if self.cookies and (now - self.last_refresh) < self.refresh_interval:
48
  return
49
- try:
50
- resp = await client.get(
51
- "https://chatgpt.org/claude/chat",
52
- follow_redirects=True,
53
- headers={
54
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36",
55
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
56
- },
57
- timeout=30.0,
58
- )
59
- if resp.status_code != 200:
60
- print(f"[Session] GET returned {resp.status_code}")
61
- return
62
-
63
- new_cookies = httpx.Cookies()
64
- for name, value in resp.cookies.items():
65
- new_cookies.set(name, value, domain="chatgpt.org")
66
- for header in resp.headers.get_list("set-cookie"):
67
- parts = header.split(";")[0]
68
- if "=" in parts:
69
- k, v = parts.split("=", 1)
70
- new_cookies.set(k.strip(), v.strip(), domain="chatgpt.org")
71
-
72
- xsrf = new_cookies.get("XSRF-TOKEN", domain="chatgpt.org")
73
- if xsrf:
74
- xsrf = unquote(xsrf)
75
-
76
- csrf = None
77
- m = re.search(r'<meta\s+name="csrf-token"\s+content="([^"]+)"', resp.text)
78
- if m:
79
- csrf = m.group(1)
80
-
81
- self.cookies = new_cookies
82
- self.xsrf_token = xsrf
83
- self.csrf_token = csrf
84
- self.last_refresh = now
85
- print(f"[Session] OK β€” CSRF:{bool(csrf)} XSRF:{bool(xsrf)} Cookies:{list(new_cookies.keys())}")
86
-
87
- except Exception as e:
88
- print(f"[Session] Refresh error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
 
91
  session = SessionState()
@@ -96,7 +141,7 @@ http_client: Optional[httpx.AsyncClient] = None
96
  @app.on_event("startup")
97
  async def startup():
98
  global http_client
99
- http_client = httpx.AsyncClient(verify=False, timeout=httpx.Timeout(120.0, connect=10.0))
100
  await session.refresh(http_client)
101
 
102
  @app.on_event("shutdown")
@@ -104,6 +149,7 @@ async def shutdown():
104
  if http_client:
105
  await http_client.aclose()
106
 
 
107
  # ── Message normalization ────────────────────────────────────────
108
  def normalize_messages(messages: list[dict]) -> list[dict]:
109
  """Normalize messages: content arrays β†’ plain text, strip extra fields."""
@@ -150,18 +196,71 @@ def _headers() -> dict:
150
  return h
151
 
152
 
153
- async def _raw_call(messages: list[dict], model: str, retry_on_429: bool = True) -> httpx.Response:
154
- """Make a single POST to chatgpt.org/api/chat, returns raw streaming Response.
155
- Retries with backoff on 429 rate limits."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  await session.refresh(http_client)
157
 
158
  payload = {"model": model, "messages": messages}
159
 
160
- # CSRF retry
161
- for attempt in range(2):
162
- # 429 retry with backoff
163
- for rate_attempt in range(3): # up to 3 attempts on 429
164
- resp = await http_client.post(
165
  "https://chatgpt.org/api/chat",
166
  json=payload,
167
  headers=_headers(),
@@ -172,14 +271,14 @@ async def _raw_call(messages: list[dict], model: str, retry_on_429: bool = True)
172
  print("[Chat] 419 β†’ refreshing session...")
173
  session.last_refresh = 0
174
  await session.refresh(http_client)
175
- break # break inner loop, retry CSRF
176
 
177
  if resp.status_code == 429:
178
- wait_time = (rate_attempt + 1) * 10 # 10s, 20s, 30s
179
  print(f"[Chat] 429 rate limited, waiting {wait_time}s (attempt {rate_attempt+1}/3)...")
180
  session.last_refresh = 0
181
  await session.refresh(http_client)
182
- if retry_on_429 and rate_attempt < 2:
183
  await asyncio.sleep(wait_time)
184
  continue
185
  raise HTTPException(429, f"Rate limited by upstream after {rate_attempt+1} retries")
@@ -194,12 +293,7 @@ async def _raw_call(messages: list[dict], model: str, retry_on_429: bool = True)
194
 
195
 
196
  async def _stream_one_response(resp):
197
- """
198
- Stream a single upstream SSE response in real-time.
199
- Yields (content_chunk, finish_reason) tuples.
200
- finish_reason is None while streaming, "stop" or "length" at the end.
201
- """
202
- content = ""
203
  finish_reason = None
204
 
205
  async for raw_line in resp.aiter_lines():
@@ -222,8 +316,6 @@ async def _stream_one_response(resp):
222
  delta = choice.get("delta", {})
223
  c = delta.get("content", "")
224
  if c:
225
- content += c
226
- # Yield each piece of content immediately for real-time streaming
227
  yield c, None
228
 
229
  fr = choice.get("finish_reason")
@@ -233,22 +325,23 @@ async def _stream_one_response(resp):
233
  elif fr in ("length", "max_tokens"):
234
  finish_reason = "length"
235
 
236
- # Yield the final finish reason
237
  yield "", finish_reason
238
 
239
 
240
- # ── Main streaming endpoint with auto-continue ──────────────────
241
  MAX_CONTINUATIONS = 20
242
 
243
  async def _raw_call_streaming(messages: list[dict], model: str):
244
- """Like _raw_call but yields SSE keep-alive comments during 429 retries.
245
- For use in streaming mode so the client connection stays alive."""
246
  await session.refresh(http_client)
247
  payload = {"model": model, "messages": messages}
248
 
249
  for attempt in range(2): # CSRF retry
250
  for rate_attempt in range(3): # 429 retry
251
- resp = await http_client.post(
 
 
 
252
  "https://chatgpt.org/api/chat",
253
  json=payload,
254
  headers=_headers(),
@@ -267,7 +360,6 @@ async def _raw_call_streaming(messages: list[dict], model: str):
267
  session.last_refresh = 0
268
  await session.refresh(http_client)
269
  if rate_attempt < 2:
270
- # Send keep-alive pings while waiting
271
  for _ in range(wait_time):
272
  yield ": retrying...\n\n"
273
  await asyncio.sleep(1)
@@ -285,25 +377,18 @@ async def _raw_call_streaming(messages: list[dict], model: str):
285
 
286
 
287
  async def _stream_with_auto_continue(messages: list[dict], model: str):
288
- """
289
- Stream the response in real-time. If it gets cut off (length limit),
290
- auto-continue by appending to the conversation and making another call.
291
- Sends SSE keep-alive comments during continuation gaps.
292
- """
293
  chunk_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
294
  created = int(time.time())
295
  conversation = list(messages)
296
  total_content = ""
297
 
298
  for cont_num in range(MAX_CONTINUATIONS):
299
- # Send keep-alive before making the call
300
  yield ": thinking...\n\n"
301
 
302
- # Make the upstream call (with keep-alive during 429 retries)
303
  resp = None
304
  async for result in _raw_call_streaming(conversation, model):
305
  if isinstance(result, str):
306
- # This is a keep-alive comment
307
  yield result
308
  else:
309
  resp = result
@@ -311,7 +396,6 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
311
  if resp is None:
312
  raise HTTPException(500, "No response from upstream")
313
 
314
- # Stream it in real-time
315
  finish_reason = "stop"
316
  chunk_content = ""
317
 
@@ -354,7 +438,6 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
354
  yield "data: [DONE]\n\n"
355
  return
356
 
357
- # Auto-continue β€” send keep-alive
358
  yield ": continuing...\n\n"
359
 
360
  conversation.append({"role": "assistant", "content": chunk_content})
@@ -402,7 +485,6 @@ async def _collect_with_auto_continue(messages: list[dict], model: str) -> str:
402
  if finish_reason == "stop":
403
  return full_content
404
 
405
- # Auto-continue
406
  conversation.append({"role": "assistant", "content": content})
407
  conversation.append({"role": "user", "content": "Continue exactly from where you left off. Do not repeat any text you already wrote."})
408
 
@@ -440,7 +522,7 @@ async def chat_completions(request: Request):
440
  headers={
441
  "Cache-Control": "no-cache",
442
  "Connection": "keep-alive",
443
- "X-Accel-Buffering": "no", # Disable nginx buffering
444
  },
445
  )
446
  else:
@@ -473,23 +555,32 @@ async def list_models():
473
 
474
  @app.get("/")
475
  async def root():
476
- return {"status": "ok", "version": "3.1.0", "endpoints": ["/v1/chat/completions", "/v1/models"]}
 
 
 
 
 
477
 
478
 
479
  @app.get("/health")
480
  async def health():
481
- return {"status": "ok", "session_active": bool(session.cookies)}
 
 
 
 
482
 
483
 
484
  @app.get("/debug/refresh")
485
  async def force_refresh():
486
- """Force refresh the session cookies."""
487
  session.last_refresh = 0
488
  await session.refresh(http_client)
489
  return {
490
  "refreshed": True,
491
  "has_cookies": bool(session.cookies),
492
  "has_csrf": bool(session.csrf_token),
 
493
  }
494
 
495
 
@@ -501,6 +592,7 @@ async def debug_session():
501
  "has_csrf": bool(session.csrf_token),
502
  "has_xsrf": bool(session.xsrf_token),
503
  "last_refresh_ago": int(time.time() - session.last_refresh) if session.last_refresh else None,
 
504
  }
505
 
506
 
 
3
  Deploy to Hugging Face Spaces (Docker SDK)
4
 
5
  Auto-continues when upstream hits the ~1K token output limit.
6
+ Uses rotating proxy with aggressive retries for unstable IPs.
7
  Sends SSE keep-alive comments during continuation gaps.
 
8
  """
9
 
10
  import asyncio
11
  import json
12
+ import os
13
  import re
14
  import time
15
  import uuid
 
21
  from fastapi.middleware.cors import CORSMiddleware
22
  from fastapi.responses import StreamingResponse, JSONResponse
23
 
24
+ app = FastAPI(title="Haiku API", version="4.0.0")
25
 
26
  # ── CORS ─────────────────────────────────────────────────────────
27
  app.add_middleware(
 
32
  allow_headers=["*"],
33
  )
34
 
35
+ # ── Proxy Config ─────────────────────────────────────────────────
36
+ PROXY_URL = os.environ.get("PROXY_URL", "")
37
+ # e.g. http://user:pass@proxy.op.wtf:32424
38
+
39
+ PROXY_MAX_RETRIES = 6 # rotating proxy: try many IPs since ~half are dead
40
+ PROXY_RETRY_DELAY = 1 # seconds between proxy retries
41
+
42
+
43
+ def _make_client() -> httpx.AsyncClient:
44
+ """Create an httpx client, with or without proxy."""
45
+ kwargs = dict(
46
+ verify=False,
47
+ timeout=httpx.Timeout(120.0, connect=15.0),
48
+ )
49
+ if PROXY_URL:
50
+ kwargs["proxy"] = PROXY_URL
51
+ print(f"[Proxy] Using rotating proxy: {PROXY_URL.split('@')[-1]}")
52
+ else:
53
+ print("[Proxy] No proxy configured, direct connection")
54
+ return httpx.AsyncClient(**kwargs)
55
+
56
+
57
  # ── Session State ────────────────────────────────────────────────
58
  class SessionState:
59
  def __init__(self):
 
69
  now = time.time()
70
  if self.cookies and (now - self.last_refresh) < self.refresh_interval:
71
  return
72
+
73
+ # Try multiple times with proxy rotation (new IP each request)
74
+ for attempt in range(PROXY_MAX_RETRIES):
75
+ try:
76
+ # Create fresh client for each attempt (gets new proxy IP)
77
+ if PROXY_URL and attempt > 0:
78
+ try:
79
+ await client.aclose()
80
+ except:
81
+ pass
82
+ client = _make_client()
83
+
84
+ resp = await client.get(
85
+ "https://chatgpt.org/claude/chat",
86
+ follow_redirects=True,
87
+ headers={
88
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36",
89
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
90
+ },
91
+ timeout=30.0,
92
+ )
93
+
94
+ if resp.status_code != 200:
95
+ print(f"[Session] GET returned {resp.status_code}, retry #{attempt+1}")
96
+ await asyncio.sleep(PROXY_RETRY_DELAY)
97
+ continue
98
+
99
+ new_cookies = httpx.Cookies()
100
+ for name, value in resp.cookies.items():
101
+ new_cookies.set(name, value, domain="chatgpt.org")
102
+ for header in resp.headers.get_list("set-cookie"):
103
+ parts = header.split(";")[0]
104
+ if "=" in parts:
105
+ k, v = parts.split("=", 1)
106
+ new_cookies.set(k.strip(), v.strip(), domain="chatgpt.org")
107
+
108
+ xsrf = new_cookies.get("XSRF-TOKEN", domain="chatgpt.org")
109
+ if xsrf:
110
+ xsrf = unquote(xsrf)
111
+
112
+ csrf = None
113
+ m = re.search(r'<meta\s+name="csrf-token"\s+content="([^"]+)"', resp.text)
114
+ if m:
115
+ csrf = m.group(1)
116
+
117
+ self.cookies = new_cookies
118
+ self.xsrf_token = xsrf
119
+ self.csrf_token = csrf
120
+ self.last_refresh = now
121
+ print(f"[Session] OK β€” CSRF:{bool(csrf)} XSRF:{bool(xsrf)} Cookies:{list(new_cookies.keys())} (attempt {attempt+1})")
122
+ return # Success!
123
+
124
+ except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
125
+ print(f"[Session] Proxy error attempt #{attempt+1}: {type(e).__name__}: {e}")
126
+ await asyncio.sleep(PROXY_RETRY_DELAY)
127
+ continue
128
+ except Exception as e:
129
+ print(f"[Session] Error attempt #{attempt+1}: {e}")
130
+ await asyncio.sleep(PROXY_RETRY_DELAY)
131
+ continue
132
+
133
+ print("[Session] WARNING: All refresh attempts failed")
134
 
135
 
136
  session = SessionState()
 
141
  @app.on_event("startup")
142
  async def startup():
143
  global http_client
144
+ http_client = _make_client()
145
  await session.refresh(http_client)
146
 
147
  @app.on_event("shutdown")
 
149
  if http_client:
150
  await http_client.aclose()
151
 
152
+
153
  # ── Message normalization ────────────────────────────────────────
154
  def normalize_messages(messages: list[dict]) -> list[dict]:
155
  """Normalize messages: content arrays β†’ plain text, strip extra fields."""
 
196
  return h
197
 
198
 
199
+ # ── Proxy-aware request with retry ──────────────────────────────
200
+ async def _proxy_post(url: str, **kwargs) -> httpx.Response:
201
+ """POST with proxy retry logic. Creates new client on each retry to get fresh IP."""
202
+ global http_client
203
+
204
+ for attempt in range(PROXY_MAX_RETRIES):
205
+ try:
206
+ resp = await http_client.post(url, **kwargs)
207
+
208
+ # Proxy returned a non-connection error β€” return it
209
+ return resp
210
+
211
+ except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
212
+ print(f"[Proxy] Connection error #{attempt+1}: {type(e).__name__}")
213
+ # Recreate client with new proxy IP
214
+ if PROXY_URL:
215
+ try:
216
+ await http_client.aclose()
217
+ except:
218
+ pass
219
+ http_client = _make_client()
220
+ # Re-apply session cookies
221
+ await asyncio.sleep(PROXY_RETRY_DELAY)
222
+ else:
223
+ await asyncio.sleep(2)
224
+ continue
225
+
226
+ # All retries exhausted β€” return last attempt anyway
227
+ return await http_client.post(url, **kwargs)
228
+
229
+
230
+ async def _proxy_get(url: str, **kwargs) -> httpx.Response:
231
+ """GET with proxy retry logic."""
232
+ global http_client
233
+
234
+ for attempt in range(PROXY_MAX_RETRIES):
235
+ try:
236
+ resp = await http_client.get(url, **kwargs)
237
+ return resp
238
+ except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
239
+ print(f"[Proxy] GET error #{attempt+1}: {type(e).__name__}")
240
+ if PROXY_URL:
241
+ try:
242
+ await http_client.aclose()
243
+ except:
244
+ pass
245
+ http_client = _make_client()
246
+ await asyncio.sleep(PROXY_RETRY_DELAY)
247
+ else:
248
+ await asyncio.sleep(2)
249
+ continue
250
+
251
+ return await http_client.get(url, **kwargs)
252
+
253
+
254
+ # ── Raw call with retries ───────────────────────────────────────
255
+ async def _raw_call(messages: list[dict], model: str) -> httpx.Response:
256
+ """Make a single POST to chatgpt.org/api/chat with full retry logic."""
257
  await session.refresh(http_client)
258
 
259
  payload = {"model": model, "messages": messages}
260
 
261
+ for attempt in range(2): # CSRF retry
262
+ for rate_attempt in range(3): # 429 retry
263
+ resp = await _proxy_post(
 
 
264
  "https://chatgpt.org/api/chat",
265
  json=payload,
266
  headers=_headers(),
 
271
  print("[Chat] 419 β†’ refreshing session...")
272
  session.last_refresh = 0
273
  await session.refresh(http_client)
274
+ break
275
 
276
  if resp.status_code == 429:
277
+ wait_time = (rate_attempt + 1) * 10
278
  print(f"[Chat] 429 rate limited, waiting {wait_time}s (attempt {rate_attempt+1}/3)...")
279
  session.last_refresh = 0
280
  await session.refresh(http_client)
281
+ if rate_attempt < 2:
282
  await asyncio.sleep(wait_time)
283
  continue
284
  raise HTTPException(429, f"Rate limited by upstream after {rate_attempt+1} retries")
 
293
 
294
 
295
  async def _stream_one_response(resp):
296
+ """Stream a single upstream SSE response in real-time."""
 
 
 
 
 
297
  finish_reason = None
298
 
299
  async for raw_line in resp.aiter_lines():
 
316
  delta = choice.get("delta", {})
317
  c = delta.get("content", "")
318
  if c:
 
 
319
  yield c, None
320
 
321
  fr = choice.get("finish_reason")
 
325
  elif fr in ("length", "max_tokens"):
326
  finish_reason = "length"
327
 
 
328
  yield "", finish_reason
329
 
330
 
331
+ # ── Streaming with auto-continue ────────────────────────────────
332
  MAX_CONTINUATIONS = 20
333
 
334
  async def _raw_call_streaming(messages: list[dict], model: str):
335
+ """Like _raw_call but yields SSE keep-alive comments during retries."""
 
336
  await session.refresh(http_client)
337
  payload = {"model": model, "messages": messages}
338
 
339
  for attempt in range(2): # CSRF retry
340
  for rate_attempt in range(3): # 429 retry
341
+ # Keep-alive before request
342
+ yield ": thinking...\n\n"
343
+
344
+ resp = await _proxy_post(
345
  "https://chatgpt.org/api/chat",
346
  json=payload,
347
  headers=_headers(),
 
360
  session.last_refresh = 0
361
  await session.refresh(http_client)
362
  if rate_attempt < 2:
 
363
  for _ in range(wait_time):
364
  yield ": retrying...\n\n"
365
  await asyncio.sleep(1)
 
377
 
378
 
379
  async def _stream_with_auto_continue(messages: list[dict], model: str):
380
+ """Stream with real-time output, auto-continue, and keep-alive pings."""
 
 
 
 
381
  chunk_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
382
  created = int(time.time())
383
  conversation = list(messages)
384
  total_content = ""
385
 
386
  for cont_num in range(MAX_CONTINUATIONS):
 
387
  yield ": thinking...\n\n"
388
 
 
389
  resp = None
390
  async for result in _raw_call_streaming(conversation, model):
391
  if isinstance(result, str):
 
392
  yield result
393
  else:
394
  resp = result
 
396
  if resp is None:
397
  raise HTTPException(500, "No response from upstream")
398
 
 
399
  finish_reason = "stop"
400
  chunk_content = ""
401
 
 
438
  yield "data: [DONE]\n\n"
439
  return
440
 
 
441
  yield ": continuing...\n\n"
442
 
443
  conversation.append({"role": "assistant", "content": chunk_content})
 
485
  if finish_reason == "stop":
486
  return full_content
487
 
 
488
  conversation.append({"role": "assistant", "content": content})
489
  conversation.append({"role": "user", "content": "Continue exactly from where you left off. Do not repeat any text you already wrote."})
490
 
 
522
  headers={
523
  "Cache-Control": "no-cache",
524
  "Connection": "keep-alive",
525
+ "X-Accel-Buffering": "no",
526
  },
527
  )
528
  else:
 
555
 
556
  @app.get("/")
557
  async def root():
558
+ return {
559
+ "status": "ok",
560
+ "version": "4.0.0",
561
+ "proxy": bool(PROXY_URL),
562
+ "endpoints": ["/v1/chat/completions", "/v1/models"],
563
+ }
564
 
565
 
566
  @app.get("/health")
567
  async def health():
568
+ return {
569
+ "status": "ok",
570
+ "session_active": bool(session.cookies),
571
+ "proxy": bool(PROXY_URL),
572
+ }
573
 
574
 
575
  @app.get("/debug/refresh")
576
  async def force_refresh():
 
577
  session.last_refresh = 0
578
  await session.refresh(http_client)
579
  return {
580
  "refreshed": True,
581
  "has_cookies": bool(session.cookies),
582
  "has_csrf": bool(session.csrf_token),
583
+ "proxy": bool(PROXY_URL),
584
  }
585
 
586
 
 
592
  "has_csrf": bool(session.csrf_token),
593
  "has_xsrf": bool(session.xsrf_token),
594
  "last_refresh_ago": int(time.time() - session.last_refresh) if session.last_refresh else None,
595
+ "proxy": bool(PROXY_URL),
596
  }
597
 
598