overwrite69 commited on
Commit
d1a0754
Β·
verified Β·
1 Parent(s): 1c15fbd

v8.0.0: proxy fallback, better timeouts, error handling

Browse files
Files changed (1) hide show
  1. app.py +250 -216
app.py CHANGED
@@ -5,9 +5,10 @@ Deploy to Hugging Face Spaces (Docker SDK)
5
  Features:
6
  - Tool/function calling support (always detects tool call tags in output)
7
  - Auto-continues when upstream hits the ~1K token output limit
8
- - Rotating proxy with aggressive retries for unstable IPs
9
  - SSE keep-alive comments during continuation gaps
10
  - Message normalization for Orchids.app compatibility
 
11
  """
12
 
13
  import asyncio
@@ -16,6 +17,7 @@ import os
16
  import re
17
  import time
18
  import uuid
 
19
  from typing import Optional
20
  from urllib.parse import unquote
21
 
@@ -24,7 +26,7 @@ from fastapi import FastAPI, HTTPException, Request
24
  from fastapi.middleware.cors import CORSMiddleware
25
  from fastapi.responses import StreamingResponse, JSONResponse
26
 
27
- app = FastAPI(title="Haiku API", version="7.0.0")
28
 
29
  # ── CORS ─────────────────────────────────────────────────────────
30
  app.add_middleware(
@@ -38,21 +40,20 @@ app.add_middleware(
38
  # ── Proxy Config ─────────────────────────────────────────────────
39
  PROXY_URL = os.environ.get("PROXY_URL", "")
40
 
41
- PROXY_MAX_RETRIES = 6 # rotating proxy: try many IPs since ~half are dead
42
  PROXY_RETRY_DELAY = 1 # seconds between proxy retries
 
 
43
 
44
 
45
- def _make_client() -> httpx.AsyncClient:
46
  """Create an httpx client, with or without proxy."""
47
  kwargs = dict(
48
  verify=False,
49
- timeout=httpx.Timeout(120.0, connect=15.0),
50
  )
51
- if PROXY_URL:
52
  kwargs["proxy"] = PROXY_URL
53
- print(f"[Proxy] Using rotating proxy: {PROXY_URL.split('@')[-1]}")
54
- else:
55
- print("[Proxy] No proxy configured, direct connection")
56
  return httpx.AsyncClient(**kwargs)
57
 
58
 
@@ -72,65 +73,72 @@ class SessionState:
72
  if self.cookies and (now - self.last_refresh) < self.refresh_interval:
73
  return
74
 
75
- for attempt in range(PROXY_MAX_RETRIES):
76
- try:
77
- if PROXY_URL and attempt > 0:
78
- try:
79
- await client.aclose()
80
- except:
81
- pass
82
- client = _make_client()
83
-
84
- resp = await client.get(
85
- "https://chatgpt.org/claude/chat",
86
- follow_redirects=True,
87
- headers={
88
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36",
89
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
90
- },
91
- timeout=30.0,
92
- )
93
 
94
- if resp.status_code != 200:
95
- print(f"[Session] GET returned {resp.status_code}, retry #{attempt+1}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  await asyncio.sleep(PROXY_RETRY_DELAY)
97
  continue
98
 
99
- new_cookies = httpx.Cookies()
100
- for name, value in resp.cookies.items():
101
- new_cookies.set(name, value, domain="chatgpt.org")
102
- for header in resp.headers.get_list("set-cookie"):
103
- parts = header.split(";")[0]
104
- if "=" in parts:
105
- k, v = parts.split("=", 1)
106
- new_cookies.set(k.strip(), v.strip(), domain="chatgpt.org")
107
-
108
- xsrf = new_cookies.get("XSRF-TOKEN", domain="chatgpt.org")
109
- if xsrf:
110
- xsrf = unquote(xsrf)
111
-
112
- csrf = None
113
- m = re.search(r'<meta\s+name="csrf-token"\s+content="([^"]+)"', resp.text)
114
- if m:
115
- csrf = m.group(1)
116
-
117
- self.cookies = new_cookies
118
- self.xsrf_token = xsrf
119
- self.csrf_token = csrf
120
- self.last_refresh = now
121
- print(f"[Session] OK β€” CSRF:{bool(csrf)} XSRF:{bool(xsrf)} Cookies:{list(new_cookies.keys())} (attempt {attempt+1})")
122
- return
123
-
124
- except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
125
- print(f"[Session] Proxy error attempt #{attempt+1}: {type(e).__name__}: {e}")
126
- await asyncio.sleep(PROXY_RETRY_DELAY)
127
- continue
128
- except Exception as e:
129
- print(f"[Session] Error attempt #{attempt+1}: {e}")
130
- await asyncio.sleep(PROXY_RETRY_DELAY)
131
- continue
132
-
133
- print("[Session] WARNING: All refresh attempts failed")
134
 
135
 
136
  session = SessionState()
@@ -141,8 +149,10 @@ http_client: Optional[httpx.AsyncClient] = None
141
  @app.on_event("startup")
142
  async def startup():
143
  global http_client
144
- http_client = _make_client()
145
- await session.refresh(http_client)
 
 
146
 
147
  @app.on_event("shutdown")
148
  async def shutdown():
@@ -172,7 +182,6 @@ _TOOL_CALL_INLINE_RE = re.compile(
172
  )
173
 
174
  # Regex for Format 2: Anthropic XML function_calls blocks
175
- # Matches: <function_calls>...</function_calls> (the whole block)
176
  _ANTHROPIC_FC_BLOCK_RE = re.compile(
177
  r'<function_calls>\s*(.*?)\s*</function_calls>',
178
  re.DOTALL
@@ -224,7 +233,7 @@ def _parse_tool_calls(text: str) -> tuple[list[dict], str]:
224
  If no tool calls found, returns ([], original_text).
225
  """
226
  tool_calls = []
227
- consumed_spans = [] # (start, end) of text that was part of tool calls
228
 
229
  # --- Format 1: Inline JSON tool calls ---
230
  for match in _TOOL_CALL_INLINE_RE.finditer(text):
@@ -251,12 +260,10 @@ def _parse_tool_calls(text: str) -> tuple[list[dict], str]:
251
  func_name = invoke_match.group(1)
252
  invoke_body = invoke_match.group(2)
253
 
254
- # Parse parameters into a dict
255
  params = {}
256
  for param_match in _ANTHROPIC_PARAM_RE.finditer(invoke_body):
257
  param_name = param_match.group(1)
258
  param_value = param_match.group(2)
259
- # Try to parse as JSON value (for numbers, bools, etc.)
260
  try:
261
  params[param_name] = json.loads(param_value)
262
  except (json.JSONDecodeError, ValueError):
@@ -294,8 +301,7 @@ def _parse_tool_calls(text: str) -> tuple[list[dict], str]:
294
 
295
 
296
  def _has_incomplete_tool_call(text: str) -> bool:
297
- """Check if text has an opening tool call tag without a matching close.
298
- Checks both inline and Anthropic XML formats."""
299
  # Inline format
300
  inline_opens = len(re.findall(r'<(?:function_call|tool_call)\s+name="[^"]+">', text))
301
  inline_closes = len(re.findall(r'</(?:function_call|tool_call)_?>', text))
@@ -303,15 +309,10 @@ def _has_incomplete_tool_call(text: str) -> bool:
303
  return True
304
 
305
  # Anthropic XML format
306
- fc_opens = text.count('<function_calls>')
307
- fc_closes = text.count('</function_calls>')
308
- if fc_opens > fc_closes:
309
  return True
310
-
311
- # Also check for incomplete <invoke> tags
312
  invoke_opens = len(re.findall(r'<invoke\s+name="[^"]+">', text))
313
- invoke_closes = text.count('</invoke>')
314
- if invoke_opens > invoke_closes:
315
  return True
316
 
317
  return False
@@ -327,11 +328,10 @@ def _build_tool_system_prompt(tools: list[dict], tool_choice=None) -> str:
327
  tool_names = []
328
 
329
  for tool in tools:
330
- # Support both 'tools' and 'functions' (old OpenAI) formats
331
  if "function" in tool:
332
  func = tool["function"]
333
  else:
334
- func = tool # old format: tool IS the function definition
335
 
336
  name = func.get("name", "unknown")
337
  desc = func.get("description", "No description")
@@ -356,12 +356,11 @@ Parameters:
356
 
357
  tools_xml = '\n\n'.join(invoke_blocks)
358
 
359
- # Handle tool_choice
360
  choice_instruction = ""
361
  if tool_choice == "required":
362
  choice_instruction = "\nIMPORTANT: You MUST call at least one tool."
363
  elif tool_choice == "none":
364
- return "" # No tools injected
365
  elif isinstance(tool_choice, dict) and tool_choice.get("type") == "function":
366
  fname = tool_choice.get("function", {}).get("name", "")
367
  choice_instruction = f"\nIMPORTANT: You MUST call the {fname} function."
@@ -403,7 +402,6 @@ def normalize_messages(messages: list[dict], tools: list[dict] = None, tool_choi
403
  and inject tool definitions into system prompt if tools are provided."""
404
  result = []
405
 
406
- # Build tool system prompt if tools are provided
407
  tool_system = None
408
  if tools and tool_choice != "none":
409
  tool_system = _build_tool_system_prompt(tools, tool_choice)
@@ -414,7 +412,6 @@ def normalize_messages(messages: list[dict], tools: list[dict] = None, tool_choi
414
  role = msg.get("role", "user")
415
  content = msg.get("content", "")
416
 
417
- # Handle content arrays β†’ plain text
418
  if isinstance(content, list):
419
  content = _flatten_content_array(content)
420
 
@@ -422,7 +419,7 @@ def normalize_messages(messages: list[dict], tools: list[dict] = None, tool_choi
422
  content = ""
423
  content = str(content)
424
 
425
- # Handle tool role messages β†’ convert to user message with tool result
426
  if role == "tool":
427
  tool_name = msg.get("name", "unknown_tool")
428
  tool_call_id = msg.get("tool_call_id", "")
@@ -432,7 +429,7 @@ def normalize_messages(messages: list[dict], tools: list[dict] = None, tool_choi
432
  })
433
  continue
434
 
435
- # Handle assistant messages with tool_calls β†’ text with function_call blocks
436
  if role == "assistant" and msg.get("tool_calls"):
437
  parts = []
438
  regular_content = content if content and content.strip() else ""
@@ -449,14 +446,12 @@ def normalize_messages(messages: list[dict], tools: list[dict] = None, tool_choi
449
  args_json = json.loads(args)
450
  except (json.JSONDecodeError, TypeError):
451
  args_json = {}
452
- # Convert to Anthropic XML format (matches Claude's native output)
453
  invoke_lines = [f'<invoke name="{name}">']
454
  for k, v in args_json.items():
455
  invoke_lines.append(f'<parameter name="{k}">{v}</parameter>')
456
  invoke_lines.append('</invoke>')
457
  invoke_parts.append('\n'.join(invoke_lines))
458
 
459
- # Wrap in <function_calls> block
460
  fc_content = '<function_calls>\n' + '\n'.join(invoke_parts) + '\n</function_calls>'
461
  combined = regular_content + '\n\n' + fc_content if regular_content else fc_content
462
  result.append({"role": "assistant", "content": combined})
@@ -469,13 +464,11 @@ def normalize_messages(messages: list[dict], tools: list[dict] = None, tool_choi
469
  system_injected = True
470
  continue
471
 
472
- # System messages with empty content get filtered out
473
  if role == "system" and not content.strip():
474
  continue
475
 
476
  result.append({"role": role, "content": content})
477
 
478
- # If no system message existed but tools need to be injected
479
  if tool_system and not system_injected:
480
  result.insert(0, {"role": "system", "content": tool_system})
481
 
@@ -499,31 +492,36 @@ def _headers() -> dict:
499
  return h
500
 
501
 
502
- # ── Proxy-aware request with retry ──────────────────────────────
503
 
504
  async def _proxy_post(url: str, **kwargs) -> httpx.Response:
505
- """POST with proxy retry logic. Creates new client on each retry to get fresh IP."""
506
  global http_client
507
 
508
- for attempt in range(PROXY_MAX_RETRIES):
509
- try:
510
- resp = await http_client.post(url, **kwargs)
511
- return resp
512
-
513
- except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
514
- print(f"[Proxy] Connection error #{attempt+1}: {type(e).__name__}")
515
- if PROXY_URL:
516
  try:
517
  await http_client.aclose()
518
  except:
519
  pass
520
- http_client = _make_client()
521
  await asyncio.sleep(PROXY_RETRY_DELAY)
522
- else:
523
- await asyncio.sleep(2)
524
- continue
525
 
526
- return await http_client.post(url, **kwargs)
 
 
 
 
 
 
 
527
 
528
 
529
  # ── Raw call with retries ───────────────────────────────────────
@@ -536,12 +534,17 @@ async def _raw_call(messages: list[dict], model: str) -> httpx.Response:
536
 
537
  for attempt in range(2): # CSRF retry
538
  for rate_attempt in range(3): # 429 retry
539
- resp = await _proxy_post(
540
- "https://chatgpt.org/api/chat",
541
- json=payload,
542
- headers=_headers(),
543
- cookies=session.cookies,
544
- )
 
 
 
 
 
545
 
546
  if resp.status_code == 419 and attempt == 0:
547
  print("[Chat] 419 -> refreshing session...")
@@ -573,34 +576,39 @@ async def _stream_one_response(resp):
573
  Yields (text, finish_reason) tuples. finish_reason is None for text chunks."""
574
  finish_reason = None
575
 
576
- async for raw_line in resp.aiter_lines():
577
- line = raw_line.strip()
578
- if not line or line.startswith(":"):
579
- continue
580
- if not line.startswith("data: "):
581
- continue
582
-
583
- payload_str = line[6:]
584
- if payload_str.strip() == "[DONE]":
585
- break
586
 
587
- try:
588
- chunk = json.loads(payload_str)
589
- except json.JSONDecodeError:
590
- continue
591
 
592
- for choice in chunk.get("choices", []):
593
- delta = choice.get("delta", {})
594
- c = delta.get("content", "")
595
- if c:
596
- yield c, None
597
 
598
- fr = choice.get("finish_reason")
599
- if fr:
600
- if fr in ("stop", "end_turn"):
601
- finish_reason = "stop"
602
- elif fr in ("length", "max_tokens"):
603
- finish_reason = "length"
 
 
 
 
 
 
 
 
 
 
604
 
605
  yield "", finish_reason
606
 
@@ -619,12 +627,17 @@ async def _raw_call_streaming(messages: list[dict], model: str):
619
  for rate_attempt in range(3): # 429 retry
620
  yield ": thinking...\n\n"
621
 
622
- resp = await _proxy_post(
623
- "https://chatgpt.org/api/chat",
624
- json=payload,
625
- headers=_headers(),
626
- cookies=session.cookies,
627
- )
 
 
 
 
 
628
 
629
  if resp.status_code == 419 and attempt == 0:
630
  print("[Chat] 419 -> refreshing session...")
@@ -684,7 +697,7 @@ def _emit_tool_call_chunks(chunk_id: str, created: int, model: str, tool_calls:
684
  })
685
  chunks.append(f"data: {sse_start}\n\n")
686
 
687
- # Argument chunks β€” split into small pieces for streaming feel
688
  args = tc["function"]["arguments"]
689
  chunk_size = max(1, len(args) // 3)
690
  for offset in range(0, len(args), chunk_size):
@@ -709,7 +722,7 @@ def _emit_tool_call_chunks(chunk_id: str, created: int, model: str, tool_calls:
709
  })
710
  chunks.append(f"data: {sse_arg}\n\n")
711
 
712
- # If there's remaining text alongside tool calls, emit it too
713
  if remaining_text.strip():
714
  sse_text = json.dumps({
715
  "id": chunk_id,
@@ -755,23 +768,43 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
755
  total_content = ""
756
 
757
  for cont_num in range(MAX_CONTINUATIONS):
758
- # Send keep-alive while we buffer
759
  yield ": thinking...\n\n"
760
 
761
  resp = None
762
- async for result in _raw_call_streaming(conversation, model):
763
- if isinstance(result, str):
764
- yield result
765
- else:
766
- resp = result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
767
 
768
  if resp is None:
769
- raise HTTPException(500, "No response from upstream")
 
 
 
770
 
771
  finish_reason = "stop"
772
  chunk_content = ""
773
 
774
- # Buffer the full response (don't stream in real-time so we can detect tool calls)
775
  async for text, fr in _stream_one_response(resp):
776
  if fr is not None:
777
  finish_reason = fr
@@ -780,25 +813,21 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
780
  if text:
781
  chunk_content += text
782
  total_content += text
783
-
784
- # Send keep-alive pings while buffering
785
  yield ": streaming...\n\n"
786
 
787
  print(f"[Chat] Chunk #{cont_num+1}: {len(chunk_content)} chars, finish={finish_reason}")
788
 
789
- # ALWAYS check for tool calls in the accumulated text
790
  tool_calls, remaining_text = _parse_tool_calls(total_content)
791
 
792
  if tool_calls:
793
  print(f"[Chat] Detected {len(tool_calls)} tool call(s)")
794
- # Emit tool calls as proper OpenAI streaming chunks
795
  for sse_chunk in _emit_tool_call_chunks(chunk_id, created, model, tool_calls, remaining_text):
796
  yield sse_chunk
797
  return
798
 
799
- # No tool calls found
800
  if finish_reason == "stop":
801
- # Stream the buffered text content as regular content chunks
802
  chunk_sz = 50
803
  for offset in range(0, len(total_content), chunk_sz):
804
  piece = total_content[offset:offset + chunk_sz]
@@ -815,7 +844,6 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
815
  })
816
  yield f"data: {sse_data}\n\n"
817
 
818
- # Final stop chunk
819
  sse_data = json.dumps({
820
  "id": chunk_id,
821
  "object": "chat.completion.chunk",
@@ -831,7 +859,7 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
831
  yield "data: [DONE]\n\n"
832
  return
833
 
834
- # Auto-continue for length-limited responses
835
  yield ": continuing...\n\n"
836
 
837
  if _has_incomplete_tool_call(chunk_content):
@@ -862,8 +890,7 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
862
  # ── Non-streaming with auto-continue ────────────────────────────
863
 
864
  async def _collect_with_auto_continue(messages: list[dict], model: str) -> dict:
865
- """Collect the full response, auto-continuing if cut off.
866
- Always checks for tool calls. Returns dict with 'content' and/or 'tool_calls'."""
867
  conversation = list(messages)
868
  full_content = ""
869
 
@@ -886,9 +913,8 @@ async def _collect_with_auto_continue(messages: list[dict], model: str) -> dict:
886
  tool_calls, remaining_text = _parse_tool_calls(full_content)
887
 
888
  if tool_calls:
889
- # If there are incomplete tool calls and we got cut off, continue
890
  if _has_incomplete_tool_call(full_content) and finish_reason == "length":
891
- pass # fall through to auto-continue
892
  else:
893
  return {
894
  "tool_calls": tool_calls,
@@ -926,16 +952,14 @@ async def chat_completions(request: Request):
926
  messages_raw = body.get("messages", [])
927
  stream = body.get("stream", False)
928
 
929
- # Extract tools β€” support both new 'tools' and old 'functions' formats
930
  tools = body.get("tools") or body.get("functions") or None
931
  tool_choice = body.get("tool_choice", "auto")
932
 
933
- # Convert old 'functions' format to new 'tools' format if needed
934
  if tools and "function" not in tools[0] and "name" in tools[0]:
935
- # Old format: [{"name": "X", "parameters": {...}}]
936
  tools = [{"type": "function", "function": f} for f in tools]
937
 
938
- # Log request for debugging
939
  print(f"[Request] model={model} stream={stream} tools={bool(tools)} tool_choice={tool_choice} msgs={len(messages_raw)}")
940
 
941
  if not messages_raw or not isinstance(messages_raw, list):
@@ -946,52 +970,59 @@ async def chat_completions(request: Request):
946
  if not messages:
947
  raise HTTPException(400, "No valid messages after normalization")
948
 
949
- if stream:
950
- return StreamingResponse(
951
- _stream_with_auto_continue(messages, model),
952
- media_type="text/event-stream",
953
- headers={
954
- "Cache-Control": "no-cache",
955
- "Connection": "keep-alive",
956
- "X-Accel-Buffering": "no",
957
- },
958
- )
959
- else:
960
- result = await _collect_with_auto_continue(messages, model)
961
-
962
- tool_calls = result.get("tool_calls")
963
- content = result.get("content")
964
-
965
- if tool_calls:
966
- return JSONResponse({
967
- "id": f"chatcmpl-{int(time.time())}",
968
- "object": "chat.completion",
969
- "created": int(time.time()),
970
- "model": model,
971
- "choices": [{
972
- "index": 0,
973
- "message": {
974
- "role": "assistant",
975
- "content": content,
976
- "tool_calls": tool_calls,
977
- },
978
- "finish_reason": "tool_calls",
979
- }],
980
- "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
981
- })
982
  else:
983
- return JSONResponse({
984
- "id": f"chatcmpl-{int(time.time())}",
985
- "object": "chat.completion",
986
- "created": int(time.time()),
987
- "model": model,
988
- "choices": [{
989
- "index": 0,
990
- "message": {"role": "assistant", "content": content or ""},
991
- "finish_reason": "stop",
992
- }],
993
- "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
994
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
995
 
996
 
997
  # ── Models / Health ─────────────────────────────────────────────
@@ -1011,7 +1042,7 @@ async def list_models():
1011
  async def root():
1012
  return {
1013
  "status": "ok",
1014
- "version": "7.0.0",
1015
  "proxy": bool(PROXY_URL),
1016
  "tool_calling": True,
1017
  "endpoints": ["/v1/chat/completions", "/v1/models"],
@@ -1030,7 +1061,10 @@ async def health():
1030
  @app.get("/debug/refresh")
1031
  async def force_refresh():
1032
  session.last_refresh = 0
1033
- await session.refresh(http_client)
 
 
 
1034
  return {
1035
  "refreshed": True,
1036
  "has_cookies": bool(session.cookies),
 
5
  Features:
6
  - Tool/function calling support (always detects tool call tags in output)
7
  - Auto-continues when upstream hits the ~1K token output limit
8
+ - Rotating proxy with direct-connection fallback
9
  - SSE keep-alive comments during continuation gaps
10
  - Message normalization for Orchids.app compatibility
11
+ - Robust error handling with proper JSON error responses
12
  """
13
 
14
  import asyncio
 
17
  import re
18
  import time
19
  import uuid
20
+ import traceback
21
  from typing import Optional
22
  from urllib.parse import unquote
23
 
 
26
  from fastapi.middleware.cors import CORSMiddleware
27
  from fastapi.responses import StreamingResponse, JSONResponse
28
 
29
+ app = FastAPI(title="Haiku API", version="8.0.0")
30
 
31
  # ── CORS ─────────────────────────────────────────────────────────
32
  app.add_middleware(
 
40
  # ── Proxy Config ─────────────────────────────────────────────────
41
  PROXY_URL = os.environ.get("PROXY_URL", "")
42
 
43
+ PROXY_MAX_RETRIES = 4 # rotating proxy: try a few IPs
44
  PROXY_RETRY_DELAY = 1 # seconds between proxy retries
45
+ CONNECT_TIMEOUT = 10.0 # short connect timeout
46
+ READ_TIMEOUT = 120.0 # long read timeout (for streaming responses)
47
 
48
 
49
+ def _make_client(use_proxy: bool = True) -> httpx.AsyncClient:
50
  """Create an httpx client, with or without proxy."""
51
  kwargs = dict(
52
  verify=False,
53
+ timeout=httpx.Timeout(READ_TIMEOUT, connect=CONNECT_TIMEOUT),
54
  )
55
+ if use_proxy and PROXY_URL:
56
  kwargs["proxy"] = PROXY_URL
 
 
 
57
  return httpx.AsyncClient(**kwargs)
58
 
59
 
 
73
  if self.cookies and (now - self.last_refresh) < self.refresh_interval:
74
  return
75
 
76
+ # Try with proxy first, then fallback to direct
77
+ for use_proxy in [True, False]:
78
+ if use_proxy and not PROXY_URL:
79
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
+ working_client = client
82
+ for attempt in range(PROXY_MAX_RETRIES if use_proxy else 2):
83
+ try:
84
+ if attempt > 0:
85
+ try:
86
+ await working_client.aclose()
87
+ except:
88
+ pass
89
+ working_client = _make_client(use_proxy=use_proxy)
90
+
91
+ resp = await working_client.get(
92
+ "https://chatgpt.org/claude/chat",
93
+ follow_redirects=True,
94
+ headers={
95
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36",
96
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
97
+ },
98
+ timeout=20.0,
99
+ )
100
+
101
+ if resp.status_code != 200:
102
+ print(f"[Session] GET returned {resp.status_code} (proxy={use_proxy}, attempt {attempt+1})")
103
+ await asyncio.sleep(PROXY_RETRY_DELAY)
104
+ continue
105
+
106
+ new_cookies = httpx.Cookies()
107
+ for name, value in resp.cookies.items():
108
+ new_cookies.set(name, value, domain="chatgpt.org")
109
+ for header in resp.headers.get_list("set-cookie"):
110
+ parts = header.split(";")[0]
111
+ if "=" in parts:
112
+ k, v = parts.split("=", 1)
113
+ new_cookies.set(k.strip(), v.strip(), domain="chatgpt.org")
114
+
115
+ xsrf = new_cookies.get("XSRF-TOKEN", domain="chatgpt.org")
116
+ if xsrf:
117
+ xsrf = unquote(xsrf)
118
+
119
+ csrf = None
120
+ m = re.search(r'<meta\s+name="csrf-token"\s+content="([^"]+)"', resp.text)
121
+ if m:
122
+ csrf = m.group(1)
123
+
124
+ self.cookies = new_cookies
125
+ self.xsrf_token = xsrf
126
+ self.csrf_token = csrf
127
+ self.last_refresh = now
128
+ mode = "proxy" if use_proxy else "direct"
129
+ print(f"[Session] OK ({mode}) β€” CSRF:{bool(csrf)} XSRF:{bool(xsrf)} Cookies:{list(new_cookies.keys())}")
130
+ return working_client
131
+
132
+ except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
133
+ print(f"[Session] Connection error (proxy={use_proxy}, attempt {attempt+1}): {type(e).__name__}")
134
+ await asyncio.sleep(PROXY_RETRY_DELAY)
135
+ continue
136
+ except Exception as e:
137
+ print(f"[Session] Error (proxy={use_proxy}, attempt {attempt+1}): {type(e).__name__}: {e}")
138
  await asyncio.sleep(PROXY_RETRY_DELAY)
139
  continue
140
 
141
+ print("[Session] WARNING: All refresh attempts failed (both proxy and direct)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
 
144
  session = SessionState()
 
149
  @app.on_event("startup")
150
  async def startup():
151
  global http_client
152
+ http_client = _make_client(use_proxy=bool(PROXY_URL))
153
+ result = await session.refresh(http_client)
154
+ if result is not None:
155
+ http_client = result
156
 
157
  @app.on_event("shutdown")
158
  async def shutdown():
 
182
  )
183
 
184
  # Regex for Format 2: Anthropic XML function_calls blocks
 
185
  _ANTHROPIC_FC_BLOCK_RE = re.compile(
186
  r'<function_calls>\s*(.*?)\s*</function_calls>',
187
  re.DOTALL
 
233
  If no tool calls found, returns ([], original_text).
234
  """
235
  tool_calls = []
236
+ consumed_spans = []
237
 
238
  # --- Format 1: Inline JSON tool calls ---
239
  for match in _TOOL_CALL_INLINE_RE.finditer(text):
 
260
  func_name = invoke_match.group(1)
261
  invoke_body = invoke_match.group(2)
262
 
 
263
  params = {}
264
  for param_match in _ANTHROPIC_PARAM_RE.finditer(invoke_body):
265
  param_name = param_match.group(1)
266
  param_value = param_match.group(2)
 
267
  try:
268
  params[param_name] = json.loads(param_value)
269
  except (json.JSONDecodeError, ValueError):
 
301
 
302
 
303
  def _has_incomplete_tool_call(text: str) -> bool:
304
+ """Check if text has an opening tool call tag without a matching close."""
 
305
  # Inline format
306
  inline_opens = len(re.findall(r'<(?:function_call|tool_call)\s+name="[^"]+">', text))
307
  inline_closes = len(re.findall(r'</(?:function_call|tool_call)_?>', text))
 
309
  return True
310
 
311
  # Anthropic XML format
312
+ if text.count('<function_calls>') > text.count('</function_calls>'):
 
 
313
  return True
 
 
314
  invoke_opens = len(re.findall(r'<invoke\s+name="[^"]+">', text))
315
+ if invoke_opens > text.count('</invoke>'):
 
316
  return True
317
 
318
  return False
 
328
  tool_names = []
329
 
330
  for tool in tools:
 
331
  if "function" in tool:
332
  func = tool["function"]
333
  else:
334
+ func = tool
335
 
336
  name = func.get("name", "unknown")
337
  desc = func.get("description", "No description")
 
356
 
357
  tools_xml = '\n\n'.join(invoke_blocks)
358
 
 
359
  choice_instruction = ""
360
  if tool_choice == "required":
361
  choice_instruction = "\nIMPORTANT: You MUST call at least one tool."
362
  elif tool_choice == "none":
363
+ return ""
364
  elif isinstance(tool_choice, dict) and tool_choice.get("type") == "function":
365
  fname = tool_choice.get("function", {}).get("name", "")
366
  choice_instruction = f"\nIMPORTANT: You MUST call the {fname} function."
 
402
  and inject tool definitions into system prompt if tools are provided."""
403
  result = []
404
 
 
405
  tool_system = None
406
  if tools and tool_choice != "none":
407
  tool_system = _build_tool_system_prompt(tools, tool_choice)
 
412
  role = msg.get("role", "user")
413
  content = msg.get("content", "")
414
 
 
415
  if isinstance(content, list):
416
  content = _flatten_content_array(content)
417
 
 
419
  content = ""
420
  content = str(content)
421
 
422
+ # Handle tool role messages
423
  if role == "tool":
424
  tool_name = msg.get("name", "unknown_tool")
425
  tool_call_id = msg.get("tool_call_id", "")
 
429
  })
430
  continue
431
 
432
+ # Handle assistant messages with tool_calls
433
  if role == "assistant" and msg.get("tool_calls"):
434
  parts = []
435
  regular_content = content if content and content.strip() else ""
 
446
  args_json = json.loads(args)
447
  except (json.JSONDecodeError, TypeError):
448
  args_json = {}
 
449
  invoke_lines = [f'<invoke name="{name}">']
450
  for k, v in args_json.items():
451
  invoke_lines.append(f'<parameter name="{k}">{v}</parameter>')
452
  invoke_lines.append('</invoke>')
453
  invoke_parts.append('\n'.join(invoke_lines))
454
 
 
455
  fc_content = '<function_calls>\n' + '\n'.join(invoke_parts) + '\n</function_calls>'
456
  combined = regular_content + '\n\n' + fc_content if regular_content else fc_content
457
  result.append({"role": "assistant", "content": combined})
 
464
  system_injected = True
465
  continue
466
 
 
467
  if role == "system" and not content.strip():
468
  continue
469
 
470
  result.append({"role": role, "content": content})
471
 
 
472
  if tool_system and not system_injected:
473
  result.insert(0, {"role": "system", "content": tool_system})
474
 
 
492
  return h
493
 
494
 
495
+ # ── Proxy-aware request with retry + direct fallback ──────────────
496
 
497
  async def _proxy_post(url: str, **kwargs) -> httpx.Response:
498
+ """POST with proxy retry logic, falling back to direct connection."""
499
  global http_client
500
 
501
+ # Try with proxy first
502
+ if PROXY_URL:
503
+ for attempt in range(PROXY_MAX_RETRIES):
504
+ try:
505
+ resp = await http_client.post(url, **kwargs)
506
+ return resp
507
+ except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
508
+ print(f"[Proxy] Connection error #{attempt+1}: {type(e).__name__}")
509
  try:
510
  await http_client.aclose()
511
  except:
512
  pass
513
+ http_client = _make_client(use_proxy=True)
514
  await asyncio.sleep(PROXY_RETRY_DELAY)
515
+ continue
 
 
516
 
517
+ # Fallback: try direct connection
518
+ print("[Proxy] Falling back to direct connection")
519
+ direct_client = _make_client(use_proxy=False)
520
+ try:
521
+ resp = await direct_client.post(url, **kwargs)
522
+ return resp
523
+ finally:
524
+ await direct_client.aclose()
525
 
526
 
527
  # ── Raw call with retries ───────────────────────────────────────
 
534
 
535
  for attempt in range(2): # CSRF retry
536
  for rate_attempt in range(3): # 429 retry
537
+ try:
538
+ resp = await _proxy_post(
539
+ "https://chatgpt.org/api/chat",
540
+ json=payload,
541
+ headers=_headers(),
542
+ cookies=session.cookies,
543
+ )
544
+ except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
545
+ print(f"[Chat] Connection failed: {type(e).__name__}")
546
+ session.last_refresh = 0
547
+ raise HTTPException(502, f"Cannot reach upstream: {type(e).__name__}")
548
 
549
  if resp.status_code == 419 and attempt == 0:
550
  print("[Chat] 419 -> refreshing session...")
 
576
  Yields (text, finish_reason) tuples. finish_reason is None for text chunks."""
577
  finish_reason = None
578
 
579
+ try:
580
+ async for raw_line in resp.aiter_lines():
581
+ line = raw_line.strip()
582
+ if not line or line.startswith(":"):
583
+ continue
584
+ if not line.startswith("data: "):
585
+ continue
 
 
 
586
 
587
+ payload_str = line[6:]
588
+ if payload_str.strip() == "[DONE]":
589
+ break
 
590
 
591
+ try:
592
+ chunk = json.loads(payload_str)
593
+ except json.JSONDecodeError:
594
+ continue
 
595
 
596
+ for choice in chunk.get("choices", []):
597
+ delta = choice.get("delta", {})
598
+ c = delta.get("content", "")
599
+ if c:
600
+ yield c, None
601
+
602
+ fr = choice.get("finish_reason")
603
+ if fr:
604
+ if fr in ("stop", "end_turn"):
605
+ finish_reason = "stop"
606
+ elif fr in ("length", "max_tokens"):
607
+ finish_reason = "length"
608
+ except (httpx.ReadError, httpx.RemoteProtocolError) as e:
609
+ print(f"[Stream] Connection lost during streaming: {type(e).__name__}")
610
+ except Exception as e:
611
+ print(f"[Stream] Error during streaming: {type(e).__name__}: {e}")
612
 
613
  yield "", finish_reason
614
 
 
627
  for rate_attempt in range(3): # 429 retry
628
  yield ": thinking...\n\n"
629
 
630
+ try:
631
+ resp = await _proxy_post(
632
+ "https://chatgpt.org/api/chat",
633
+ json=payload,
634
+ headers=_headers(),
635
+ cookies=session.cookies,
636
+ )
637
+ except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
638
+ print(f"[Chat] Connection failed: {type(e).__name__}")
639
+ session.last_refresh = 0
640
+ raise HTTPException(502, f"Cannot reach upstream: {type(e).__name__}")
641
 
642
  if resp.status_code == 419 and attempt == 0:
643
  print("[Chat] 419 -> refreshing session...")
 
697
  })
698
  chunks.append(f"data: {sse_start}\n\n")
699
 
700
+ # Argument chunks
701
  args = tc["function"]["arguments"]
702
  chunk_size = max(1, len(args) // 3)
703
  for offset in range(0, len(args), chunk_size):
 
722
  })
723
  chunks.append(f"data: {sse_arg}\n\n")
724
 
725
+ # Remaining text alongside tool calls
726
  if remaining_text.strip():
727
  sse_text = json.dumps({
728
  "id": chunk_id,
 
768
  total_content = ""
769
 
770
  for cont_num in range(MAX_CONTINUATIONS):
 
771
  yield ": thinking...\n\n"
772
 
773
  resp = None
774
+ try:
775
+ async for result in _raw_call_streaming(conversation, model):
776
+ if isinstance(result, str):
777
+ yield result
778
+ else:
779
+ resp = result
780
+ except HTTPException as e:
781
+ # Send error as SSE then stop
782
+ error_data = json.dumps({
783
+ "id": chunk_id,
784
+ "object": "chat.completion.chunk",
785
+ "created": created,
786
+ "model": model,
787
+ "choices": [{
788
+ "index": 0,
789
+ "delta": {"content": f"\n\n[Error: {e.detail}]"},
790
+ "finish_reason": None,
791
+ }],
792
+ })
793
+ yield f"data: {error_data}\n\n"
794
+ yield f"data: {json.dumps({'id': chunk_id, 'object': 'chat.completion.chunk', 'created': created, 'model': model, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n"
795
+ yield "data: [DONE]\n\n"
796
+ return
797
 
798
  if resp is None:
799
+ yield f"data: {json.dumps({'id': chunk_id, 'object': 'chat.completion.chunk', 'created': created, 'model': model, 'choices': [{'index': 0, 'delta': {'content': '[Error: No response from upstream]'}, 'finish_reason': None}]})}\n\n"
800
+ yield f"data: {json.dumps({'id': chunk_id, 'object': 'chat.completion.chunk', 'created': created, 'model': model, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n"
801
+ yield "data: [DONE]\n\n"
802
+ return
803
 
804
  finish_reason = "stop"
805
  chunk_content = ""
806
 
807
+ # Buffer the full response
808
  async for text, fr in _stream_one_response(resp):
809
  if fr is not None:
810
  finish_reason = fr
 
813
  if text:
814
  chunk_content += text
815
  total_content += text
 
 
816
  yield ": streaming...\n\n"
817
 
818
  print(f"[Chat] Chunk #{cont_num+1}: {len(chunk_content)} chars, finish={finish_reason}")
819
 
820
+ # ALWAYS check for tool calls
821
  tool_calls, remaining_text = _parse_tool_calls(total_content)
822
 
823
  if tool_calls:
824
  print(f"[Chat] Detected {len(tool_calls)} tool call(s)")
 
825
  for sse_chunk in _emit_tool_call_chunks(chunk_id, created, model, tool_calls, remaining_text):
826
  yield sse_chunk
827
  return
828
 
829
+ # No tool calls
830
  if finish_reason == "stop":
 
831
  chunk_sz = 50
832
  for offset in range(0, len(total_content), chunk_sz):
833
  piece = total_content[offset:offset + chunk_sz]
 
844
  })
845
  yield f"data: {sse_data}\n\n"
846
 
 
847
  sse_data = json.dumps({
848
  "id": chunk_id,
849
  "object": "chat.completion.chunk",
 
859
  yield "data: [DONE]\n\n"
860
  return
861
 
862
+ # Auto-continue
863
  yield ": continuing...\n\n"
864
 
865
  if _has_incomplete_tool_call(chunk_content):
 
890
  # ── Non-streaming with auto-continue ────────────────────────────
891
 
892
  async def _collect_with_auto_continue(messages: list[dict], model: str) -> dict:
893
+ """Collect the full response, auto-continuing if cut off."""
 
894
  conversation = list(messages)
895
  full_content = ""
896
 
 
913
  tool_calls, remaining_text = _parse_tool_calls(full_content)
914
 
915
  if tool_calls:
 
916
  if _has_incomplete_tool_call(full_content) and finish_reason == "length":
917
+ pass
918
  else:
919
  return {
920
  "tool_calls": tool_calls,
 
952
  messages_raw = body.get("messages", [])
953
  stream = body.get("stream", False)
954
 
955
+ # Extract tools
956
  tools = body.get("tools") or body.get("functions") or None
957
  tool_choice = body.get("tool_choice", "auto")
958
 
959
+ # Convert old 'functions' format
960
  if tools and "function" not in tools[0] and "name" in tools[0]:
 
961
  tools = [{"type": "function", "function": f} for f in tools]
962
 
 
963
  print(f"[Request] model={model} stream={stream} tools={bool(tools)} tool_choice={tool_choice} msgs={len(messages_raw)}")
964
 
965
  if not messages_raw or not isinstance(messages_raw, list):
 
970
  if not messages:
971
  raise HTTPException(400, "No valid messages after normalization")
972
 
973
+ try:
974
+ if stream:
975
+ return StreamingResponse(
976
+ _stream_with_auto_continue(messages, model),
977
+ media_type="text/event-stream",
978
+ headers={
979
+ "Cache-Control": "no-cache",
980
+ "Connection": "keep-alive",
981
+ "X-Accel-Buffering": "no",
982
+ },
983
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
984
  else:
985
+ result = await _collect_with_auto_continue(messages, model)
986
+
987
+ tool_calls = result.get("tool_calls")
988
+ content = result.get("content")
989
+
990
+ if tool_calls:
991
+ return JSONResponse({
992
+ "id": f"chatcmpl-{int(time.time())}",
993
+ "object": "chat.completion",
994
+ "created": int(time.time()),
995
+ "model": model,
996
+ "choices": [{
997
+ "index": 0,
998
+ "message": {
999
+ "role": "assistant",
1000
+ "content": content,
1001
+ "tool_calls": tool_calls,
1002
+ },
1003
+ "finish_reason": "tool_calls",
1004
+ }],
1005
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
1006
+ })
1007
+ else:
1008
+ return JSONResponse({
1009
+ "id": f"chatcmpl-{int(time.time())}",
1010
+ "object": "chat.completion",
1011
+ "created": int(time.time()),
1012
+ "model": model,
1013
+ "choices": [{
1014
+ "index": 0,
1015
+ "message": {"role": "assistant", "content": content or ""},
1016
+ "finish_reason": "stop",
1017
+ }],
1018
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
1019
+ })
1020
+ except HTTPException:
1021
+ raise
1022
+ except Exception as e:
1023
+ print(f"[Request] Unhandled error: {type(e).__name__}: {e}")
1024
+ print(traceback.format_exc())
1025
+ raise HTTPException(500, f"Internal error: {type(e).__name__}")
1026
 
1027
 
1028
  # ── Models / Health ─────────────────────────────────────────────
 
1042
  async def root():
1043
  return {
1044
  "status": "ok",
1045
+ "version": "8.0.0",
1046
  "proxy": bool(PROXY_URL),
1047
  "tool_calling": True,
1048
  "endpoints": ["/v1/chat/completions", "/v1/models"],
 
1061
  @app.get("/debug/refresh")
1062
  async def force_refresh():
1063
  session.last_refresh = 0
1064
+ result = await session.refresh(http_client)
1065
+ if result is not None:
1066
+ global http_client
1067
+ http_client = result
1068
  return {
1069
  "refreshed": True,
1070
  "has_cookies": bool(session.cookies),