overwrite69 commited on
Commit
49412a5
Β·
verified Β·
1 Parent(s): 9bc9aad

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +106 -31
app.py CHANGED
@@ -150,33 +150,45 @@ def _headers() -> dict:
150
  return h
151
 
152
 
153
- async def _raw_call(messages: list[dict], model: str) -> httpx.Response:
154
- """Make a single POST to chatgpt.org/api/chat, returns raw streaming Response."""
 
155
  await session.refresh(http_client)
156
 
157
  payload = {"model": model, "messages": messages}
158
 
 
159
  for attempt in range(2):
160
- resp = await http_client.post(
161
- "https://chatgpt.org/api/chat",
162
- json=payload,
163
- headers=_headers(),
164
- cookies=session.cookies,
165
- )
166
-
167
- if resp.status_code == 419 and attempt == 0:
168
- print("[Chat] 419 β†’ refreshing session...")
169
- session.last_refresh = 0
170
- await session.refresh(http_client)
171
- continue
172
-
173
- if resp.status_code == 429:
174
- raise HTTPException(429, "Rate limited by upstream")
175
- if resp.status_code != 200:
176
- session.last_refresh = 0
177
- raise HTTPException(resp.status_code, f"Upstream {resp.status_code}: {resp.text[:300]}")
178
-
179
- return resp
 
 
 
 
 
 
 
 
 
 
180
 
181
  raise HTTPException(500, "Failed after retry")
182
 
@@ -228,6 +240,50 @@ async def _stream_one_response(resp):
228
  # ── Main streaming endpoint with auto-continue ──────────────────
229
  MAX_CONTINUATIONS = 20
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  async def _stream_with_auto_continue(messages: list[dict], model: str):
232
  """
233
  Stream the response in real-time. If it gets cut off (length limit),
@@ -240,8 +296,20 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
240
  total_content = ""
241
 
242
  for cont_num in range(MAX_CONTINUATIONS):
243
- # Make the upstream call
244
- resp = await _raw_call(conversation, model)
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
  # Stream it in real-time
247
  finish_reason = "stop"
@@ -249,14 +317,12 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
249
 
250
  async for text, fr in _stream_one_response(resp):
251
  if fr is not None:
252
- # This is the final yield with finish_reason
253
  finish_reason = fr
254
  continue
255
 
256
  if text:
257
  chunk_content += text
258
  total_content += text
259
- # Send the content chunk to the client immediately
260
  sse_data = json.dumps({
261
  "id": chunk_id,
262
  "object": "chat.completion.chunk",
@@ -273,7 +339,6 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
273
  print(f"[Chat] Chunk #{cont_num+1}: {len(chunk_content)} chars, finish={finish_reason}")
274
 
275
  if finish_reason == "stop":
276
- # All done β€” send final chunk with finish_reason=stop
277
  sse_data = json.dumps({
278
  "id": chunk_id,
279
  "object": "chat.completion.chunk",
@@ -289,17 +354,15 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
289
  yield "data: [DONE]\n\n"
290
  return
291
 
292
- # Response was cut off β€” need to auto-continue
293
- # Send a keep-alive comment so the client doesn't timeout
294
  yield ": continuing...\n\n"
295
 
296
- # Append to conversation for next round
297
  conversation.append({"role": "assistant", "content": chunk_content})
298
  conversation.append({"role": "user", "content": "Continue exactly from where you left off. Do not repeat any text you already wrote."})
299
 
300
  print(f"[Chat] Auto-continue #{cont_num+1}, total so far: {len(total_content)} chars")
301
 
302
- # Safety: hit max continuations
303
  sse_data = json.dumps({
304
  "id": chunk_id,
305
  "object": "chat.completion.chunk",
@@ -418,6 +481,18 @@ async def health():
418
  return {"status": "ok", "session_active": bool(session.cookies)}
419
 
420
 
 
 
 
 
 
 
 
 
 
 
 
 
421
  @app.get("/debug/session")
422
  async def debug_session():
423
  return {
 
150
  return h
151
 
152
 
153
+ async def _raw_call(messages: list[dict], model: str, retry_on_429: bool = True) -> httpx.Response:
154
+ """Make a single POST to chatgpt.org/api/chat, returns raw streaming Response.
155
+ Retries with backoff on 429 rate limits."""
156
  await session.refresh(http_client)
157
 
158
  payload = {"model": model, "messages": messages}
159
 
160
+ # CSRF retry
161
  for attempt in range(2):
162
+ # 429 retry with backoff
163
+ for rate_attempt in range(3): # up to 3 attempts on 429
164
+ resp = await http_client.post(
165
+ "https://chatgpt.org/api/chat",
166
+ json=payload,
167
+ headers=_headers(),
168
+ cookies=session.cookies,
169
+ )
170
+
171
+ if resp.status_code == 419 and attempt == 0:
172
+ print("[Chat] 419 β†’ refreshing session...")
173
+ session.last_refresh = 0
174
+ await session.refresh(http_client)
175
+ break # break inner loop, retry CSRF
176
+
177
+ if resp.status_code == 429:
178
+ wait_time = (rate_attempt + 1) * 10 # 10s, 20s, 30s
179
+ print(f"[Chat] 429 rate limited, waiting {wait_time}s (attempt {rate_attempt+1}/3)...")
180
+ session.last_refresh = 0
181
+ await session.refresh(http_client)
182
+ if retry_on_429 and rate_attempt < 2:
183
+ await asyncio.sleep(wait_time)
184
+ continue
185
+ raise HTTPException(429, f"Rate limited by upstream after {rate_attempt+1} retries")
186
+
187
+ if resp.status_code != 200:
188
+ session.last_refresh = 0
189
+ raise HTTPException(resp.status_code, f"Upstream {resp.status_code}: {resp.text[:300]}")
190
+
191
+ return resp
192
 
193
  raise HTTPException(500, "Failed after retry")
194
 
 
240
  # ── Main streaming endpoint with auto-continue ──────────────────
241
  MAX_CONTINUATIONS = 20
242
 
243
+ async def _raw_call_streaming(messages: list[dict], model: str):
244
+ """Like _raw_call but yields SSE keep-alive comments during 429 retries.
245
+ For use in streaming mode so the client connection stays alive."""
246
+ await session.refresh(http_client)
247
+ payload = {"model": model, "messages": messages}
248
+
249
+ for attempt in range(2): # CSRF retry
250
+ for rate_attempt in range(3): # 429 retry
251
+ resp = await http_client.post(
252
+ "https://chatgpt.org/api/chat",
253
+ json=payload,
254
+ headers=_headers(),
255
+ cookies=session.cookies,
256
+ )
257
+
258
+ if resp.status_code == 419 and attempt == 0:
259
+ print("[Chat] 419 β†’ refreshing session...")
260
+ session.last_refresh = 0
261
+ await session.refresh(http_client)
262
+ break
263
+
264
+ if resp.status_code == 429:
265
+ wait_time = (rate_attempt + 1) * 10
266
+ print(f"[Chat] 429 rate limited, waiting {wait_time}s (attempt {rate_attempt+1}/3)...")
267
+ session.last_refresh = 0
268
+ await session.refresh(http_client)
269
+ if rate_attempt < 2:
270
+ # Send keep-alive pings while waiting
271
+ for _ in range(wait_time):
272
+ yield ": retrying...\n\n"
273
+ await asyncio.sleep(1)
274
+ continue
275
+ raise HTTPException(429, f"Rate limited after {rate_attempt+1} retries")
276
+
277
+ if resp.status_code != 200:
278
+ session.last_refresh = 0
279
+ raise HTTPException(resp.status_code, f"Upstream {resp.status_code}: {resp.text[:300]}")
280
+
281
+ yield resp
282
+ return
283
+
284
+ raise HTTPException(500, "Failed after retry")
285
+
286
+
287
  async def _stream_with_auto_continue(messages: list[dict], model: str):
288
  """
289
  Stream the response in real-time. If it gets cut off (length limit),
 
296
  total_content = ""
297
 
298
  for cont_num in range(MAX_CONTINUATIONS):
299
+ # Send keep-alive before making the call
300
+ yield ": thinking...\n\n"
301
+
302
+ # Make the upstream call (with keep-alive during 429 retries)
303
+ resp = None
304
+ async for result in _raw_call_streaming(conversation, model):
305
+ if isinstance(result, str):
306
+ # This is a keep-alive comment
307
+ yield result
308
+ else:
309
+ resp = result
310
+
311
+ if resp is None:
312
+ raise HTTPException(500, "No response from upstream")
313
 
314
  # Stream it in real-time
315
  finish_reason = "stop"
 
317
 
318
  async for text, fr in _stream_one_response(resp):
319
  if fr is not None:
 
320
  finish_reason = fr
321
  continue
322
 
323
  if text:
324
  chunk_content += text
325
  total_content += text
 
326
  sse_data = json.dumps({
327
  "id": chunk_id,
328
  "object": "chat.completion.chunk",
 
339
  print(f"[Chat] Chunk #{cont_num+1}: {len(chunk_content)} chars, finish={finish_reason}")
340
 
341
  if finish_reason == "stop":
 
342
  sse_data = json.dumps({
343
  "id": chunk_id,
344
  "object": "chat.completion.chunk",
 
354
  yield "data: [DONE]\n\n"
355
  return
356
 
357
+ # Auto-continue β€” send keep-alive
 
358
  yield ": continuing...\n\n"
359
 
 
360
  conversation.append({"role": "assistant", "content": chunk_content})
361
  conversation.append({"role": "user", "content": "Continue exactly from where you left off. Do not repeat any text you already wrote."})
362
 
363
  print(f"[Chat] Auto-continue #{cont_num+1}, total so far: {len(total_content)} chars")
364
 
365
+ # Safety
366
  sse_data = json.dumps({
367
  "id": chunk_id,
368
  "object": "chat.completion.chunk",
 
481
  return {"status": "ok", "session_active": bool(session.cookies)}
482
 
483
 
484
+ @app.get("/debug/refresh")
485
+ async def force_refresh():
486
+ """Force refresh the session cookies."""
487
+ session.last_refresh = 0
488
+ await session.refresh(http_client)
489
+ return {
490
+ "refreshed": True,
491
+ "has_cookies": bool(session.cookies),
492
+ "has_csrf": bool(session.csrf_token),
493
+ }
494
+
495
+
496
  @app.get("/debug/session")
497
  async def debug_session():
498
  return {