Spaces:
Sleeping
Sleeping
Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -150,33 +150,45 @@ def _headers() -> dict:
|
|
| 150 |
return h
|
| 151 |
|
| 152 |
|
| 153 |
-
async def _raw_call(messages: list[dict], model: str) -> httpx.Response:
|
| 154 |
-
"""Make a single POST to chatgpt.org/api/chat, returns raw streaming Response.
|
|
|
|
| 155 |
await session.refresh(http_client)
|
| 156 |
|
| 157 |
payload = {"model": model, "messages": messages}
|
| 158 |
|
|
|
|
| 159 |
for attempt in range(2):
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
raise HTTPException(500, "Failed after retry")
|
| 182 |
|
|
@@ -228,6 +240,50 @@ async def _stream_one_response(resp):
|
|
| 228 |
# ββ Main streaming endpoint with auto-continue ββββββββββββββββββ
|
| 229 |
MAX_CONTINUATIONS = 20
|
| 230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
async def _stream_with_auto_continue(messages: list[dict], model: str):
|
| 232 |
"""
|
| 233 |
Stream the response in real-time. If it gets cut off (length limit),
|
|
@@ -240,8 +296,20 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
|
|
| 240 |
total_content = ""
|
| 241 |
|
| 242 |
for cont_num in range(MAX_CONTINUATIONS):
|
| 243 |
-
#
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
# Stream it in real-time
|
| 247 |
finish_reason = "stop"
|
|
@@ -249,14 +317,12 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
|
|
| 249 |
|
| 250 |
async for text, fr in _stream_one_response(resp):
|
| 251 |
if fr is not None:
|
| 252 |
-
# This is the final yield with finish_reason
|
| 253 |
finish_reason = fr
|
| 254 |
continue
|
| 255 |
|
| 256 |
if text:
|
| 257 |
chunk_content += text
|
| 258 |
total_content += text
|
| 259 |
-
# Send the content chunk to the client immediately
|
| 260 |
sse_data = json.dumps({
|
| 261 |
"id": chunk_id,
|
| 262 |
"object": "chat.completion.chunk",
|
|
@@ -273,7 +339,6 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
|
|
| 273 |
print(f"[Chat] Chunk #{cont_num+1}: {len(chunk_content)} chars, finish={finish_reason}")
|
| 274 |
|
| 275 |
if finish_reason == "stop":
|
| 276 |
-
# All done β send final chunk with finish_reason=stop
|
| 277 |
sse_data = json.dumps({
|
| 278 |
"id": chunk_id,
|
| 279 |
"object": "chat.completion.chunk",
|
|
@@ -289,17 +354,15 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
|
|
| 289 |
yield "data: [DONE]\n\n"
|
| 290 |
return
|
| 291 |
|
| 292 |
-
#
|
| 293 |
-
# Send a keep-alive comment so the client doesn't timeout
|
| 294 |
yield ": continuing...\n\n"
|
| 295 |
|
| 296 |
-
# Append to conversation for next round
|
| 297 |
conversation.append({"role": "assistant", "content": chunk_content})
|
| 298 |
conversation.append({"role": "user", "content": "Continue exactly from where you left off. Do not repeat any text you already wrote."})
|
| 299 |
|
| 300 |
print(f"[Chat] Auto-continue #{cont_num+1}, total so far: {len(total_content)} chars")
|
| 301 |
|
| 302 |
-
# Safety
|
| 303 |
sse_data = json.dumps({
|
| 304 |
"id": chunk_id,
|
| 305 |
"object": "chat.completion.chunk",
|
|
@@ -418,6 +481,18 @@ async def health():
|
|
| 418 |
return {"status": "ok", "session_active": bool(session.cookies)}
|
| 419 |
|
| 420 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
@app.get("/debug/session")
|
| 422 |
async def debug_session():
|
| 423 |
return {
|
|
|
|
| 150 |
return h
|
| 151 |
|
| 152 |
|
| 153 |
+
async def _raw_call(messages: list[dict], model: str, retry_on_429: bool = True) -> httpx.Response:
|
| 154 |
+
"""Make a single POST to chatgpt.org/api/chat, returns raw streaming Response.
|
| 155 |
+
Retries with backoff on 429 rate limits."""
|
| 156 |
await session.refresh(http_client)
|
| 157 |
|
| 158 |
payload = {"model": model, "messages": messages}
|
| 159 |
|
| 160 |
+
# CSRF retry
|
| 161 |
for attempt in range(2):
|
| 162 |
+
# 429 retry with backoff
|
| 163 |
+
for rate_attempt in range(3): # up to 3 attempts on 429
|
| 164 |
+
resp = await http_client.post(
|
| 165 |
+
"https://chatgpt.org/api/chat",
|
| 166 |
+
json=payload,
|
| 167 |
+
headers=_headers(),
|
| 168 |
+
cookies=session.cookies,
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
if resp.status_code == 419 and attempt == 0:
|
| 172 |
+
print("[Chat] 419 β refreshing session...")
|
| 173 |
+
session.last_refresh = 0
|
| 174 |
+
await session.refresh(http_client)
|
| 175 |
+
break # break inner loop, retry CSRF
|
| 176 |
+
|
| 177 |
+
if resp.status_code == 429:
|
| 178 |
+
wait_time = (rate_attempt + 1) * 10 # 10s, 20s, 30s
|
| 179 |
+
print(f"[Chat] 429 rate limited, waiting {wait_time}s (attempt {rate_attempt+1}/3)...")
|
| 180 |
+
session.last_refresh = 0
|
| 181 |
+
await session.refresh(http_client)
|
| 182 |
+
if retry_on_429 and rate_attempt < 2:
|
| 183 |
+
await asyncio.sleep(wait_time)
|
| 184 |
+
continue
|
| 185 |
+
raise HTTPException(429, f"Rate limited by upstream after {rate_attempt+1} retries")
|
| 186 |
+
|
| 187 |
+
if resp.status_code != 200:
|
| 188 |
+
session.last_refresh = 0
|
| 189 |
+
raise HTTPException(resp.status_code, f"Upstream {resp.status_code}: {resp.text[:300]}")
|
| 190 |
+
|
| 191 |
+
return resp
|
| 192 |
|
| 193 |
raise HTTPException(500, "Failed after retry")
|
| 194 |
|
|
|
|
| 240 |
# ββ Main streaming endpoint with auto-continue ββββββββββββββββββ
|
| 241 |
MAX_CONTINUATIONS = 20
|
| 242 |
|
| 243 |
+
async def _raw_call_streaming(messages: list[dict], model: str):
|
| 244 |
+
"""Like _raw_call but yields SSE keep-alive comments during 429 retries.
|
| 245 |
+
For use in streaming mode so the client connection stays alive."""
|
| 246 |
+
await session.refresh(http_client)
|
| 247 |
+
payload = {"model": model, "messages": messages}
|
| 248 |
+
|
| 249 |
+
for attempt in range(2): # CSRF retry
|
| 250 |
+
for rate_attempt in range(3): # 429 retry
|
| 251 |
+
resp = await http_client.post(
|
| 252 |
+
"https://chatgpt.org/api/chat",
|
| 253 |
+
json=payload,
|
| 254 |
+
headers=_headers(),
|
| 255 |
+
cookies=session.cookies,
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
+
if resp.status_code == 419 and attempt == 0:
|
| 259 |
+
print("[Chat] 419 β refreshing session...")
|
| 260 |
+
session.last_refresh = 0
|
| 261 |
+
await session.refresh(http_client)
|
| 262 |
+
break
|
| 263 |
+
|
| 264 |
+
if resp.status_code == 429:
|
| 265 |
+
wait_time = (rate_attempt + 1) * 10
|
| 266 |
+
print(f"[Chat] 429 rate limited, waiting {wait_time}s (attempt {rate_attempt+1}/3)...")
|
| 267 |
+
session.last_refresh = 0
|
| 268 |
+
await session.refresh(http_client)
|
| 269 |
+
if rate_attempt < 2:
|
| 270 |
+
# Send keep-alive pings while waiting
|
| 271 |
+
for _ in range(wait_time):
|
| 272 |
+
yield ": retrying...\n\n"
|
| 273 |
+
await asyncio.sleep(1)
|
| 274 |
+
continue
|
| 275 |
+
raise HTTPException(429, f"Rate limited after {rate_attempt+1} retries")
|
| 276 |
+
|
| 277 |
+
if resp.status_code != 200:
|
| 278 |
+
session.last_refresh = 0
|
| 279 |
+
raise HTTPException(resp.status_code, f"Upstream {resp.status_code}: {resp.text[:300]}")
|
| 280 |
+
|
| 281 |
+
yield resp
|
| 282 |
+
return
|
| 283 |
+
|
| 284 |
+
raise HTTPException(500, "Failed after retry")
|
| 285 |
+
|
| 286 |
+
|
| 287 |
async def _stream_with_auto_continue(messages: list[dict], model: str):
|
| 288 |
"""
|
| 289 |
Stream the response in real-time. If it gets cut off (length limit),
|
|
|
|
| 296 |
total_content = ""
|
| 297 |
|
| 298 |
for cont_num in range(MAX_CONTINUATIONS):
|
| 299 |
+
# Send keep-alive before making the call
|
| 300 |
+
yield ": thinking...\n\n"
|
| 301 |
+
|
| 302 |
+
# Make the upstream call (with keep-alive during 429 retries)
|
| 303 |
+
resp = None
|
| 304 |
+
async for result in _raw_call_streaming(conversation, model):
|
| 305 |
+
if isinstance(result, str):
|
| 306 |
+
# This is a keep-alive comment
|
| 307 |
+
yield result
|
| 308 |
+
else:
|
| 309 |
+
resp = result
|
| 310 |
+
|
| 311 |
+
if resp is None:
|
| 312 |
+
raise HTTPException(500, "No response from upstream")
|
| 313 |
|
| 314 |
# Stream it in real-time
|
| 315 |
finish_reason = "stop"
|
|
|
|
| 317 |
|
| 318 |
async for text, fr in _stream_one_response(resp):
|
| 319 |
if fr is not None:
|
|
|
|
| 320 |
finish_reason = fr
|
| 321 |
continue
|
| 322 |
|
| 323 |
if text:
|
| 324 |
chunk_content += text
|
| 325 |
total_content += text
|
|
|
|
| 326 |
sse_data = json.dumps({
|
| 327 |
"id": chunk_id,
|
| 328 |
"object": "chat.completion.chunk",
|
|
|
|
| 339 |
print(f"[Chat] Chunk #{cont_num+1}: {len(chunk_content)} chars, finish={finish_reason}")
|
| 340 |
|
| 341 |
if finish_reason == "stop":
|
|
|
|
| 342 |
sse_data = json.dumps({
|
| 343 |
"id": chunk_id,
|
| 344 |
"object": "chat.completion.chunk",
|
|
|
|
| 354 |
yield "data: [DONE]\n\n"
|
| 355 |
return
|
| 356 |
|
| 357 |
+
# Auto-continue β send keep-alive
|
|
|
|
| 358 |
yield ": continuing...\n\n"
|
| 359 |
|
|
|
|
| 360 |
conversation.append({"role": "assistant", "content": chunk_content})
|
| 361 |
conversation.append({"role": "user", "content": "Continue exactly from where you left off. Do not repeat any text you already wrote."})
|
| 362 |
|
| 363 |
print(f"[Chat] Auto-continue #{cont_num+1}, total so far: {len(total_content)} chars")
|
| 364 |
|
| 365 |
+
# Safety
|
| 366 |
sse_data = json.dumps({
|
| 367 |
"id": chunk_id,
|
| 368 |
"object": "chat.completion.chunk",
|
|
|
|
| 481 |
return {"status": "ok", "session_active": bool(session.cookies)}
|
| 482 |
|
| 483 |
|
| 484 |
+
@app.get("/debug/refresh")
|
| 485 |
+
async def force_refresh():
|
| 486 |
+
"""Force refresh the session cookies."""
|
| 487 |
+
session.last_refresh = 0
|
| 488 |
+
await session.refresh(http_client)
|
| 489 |
+
return {
|
| 490 |
+
"refreshed": True,
|
| 491 |
+
"has_cookies": bool(session.cookies),
|
| 492 |
+
"has_csrf": bool(session.csrf_token),
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
|
| 496 |
@app.get("/debug/session")
|
| 497 |
async def debug_session():
|
| 498 |
return {
|