Spaces:
Sleeping
Sleeping
v8.0.0: proxy fallback, better timeouts, error handling
Browse files
app.py
CHANGED
|
@@ -5,9 +5,10 @@ Deploy to Hugging Face Spaces (Docker SDK)
|
|
| 5 |
Features:
|
| 6 |
- Tool/function calling support (always detects tool call tags in output)
|
| 7 |
- Auto-continues when upstream hits the ~1K token output limit
|
| 8 |
-
- Rotating proxy with
|
| 9 |
- SSE keep-alive comments during continuation gaps
|
| 10 |
- Message normalization for Orchids.app compatibility
|
|
|
|
| 11 |
"""
|
| 12 |
|
| 13 |
import asyncio
|
|
@@ -16,6 +17,7 @@ import os
|
|
| 16 |
import re
|
| 17 |
import time
|
| 18 |
import uuid
|
|
|
|
| 19 |
from typing import Optional
|
| 20 |
from urllib.parse import unquote
|
| 21 |
|
|
@@ -24,7 +26,7 @@ from fastapi import FastAPI, HTTPException, Request
|
|
| 24 |
from fastapi.middleware.cors import CORSMiddleware
|
| 25 |
from fastapi.responses import StreamingResponse, JSONResponse
|
| 26 |
|
| 27 |
-
app = FastAPI(title="Haiku API", version="
|
| 28 |
|
| 29 |
# ββ CORS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
app.add_middleware(
|
|
@@ -38,21 +40,20 @@ app.add_middleware(
|
|
| 38 |
# ββ Proxy Config βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 39 |
PROXY_URL = os.environ.get("PROXY_URL", "")
|
| 40 |
|
| 41 |
-
PROXY_MAX_RETRIES =
|
| 42 |
PROXY_RETRY_DELAY = 1 # seconds between proxy retries
|
|
|
|
|
|
|
| 43 |
|
| 44 |
|
| 45 |
-
def _make_client() -> httpx.AsyncClient:
|
| 46 |
"""Create an httpx client, with or without proxy."""
|
| 47 |
kwargs = dict(
|
| 48 |
verify=False,
|
| 49 |
-
timeout=httpx.Timeout(
|
| 50 |
)
|
| 51 |
-
if PROXY_URL:
|
| 52 |
kwargs["proxy"] = PROXY_URL
|
| 53 |
-
print(f"[Proxy] Using rotating proxy: {PROXY_URL.split('@')[-1]}")
|
| 54 |
-
else:
|
| 55 |
-
print("[Proxy] No proxy configured, direct connection")
|
| 56 |
return httpx.AsyncClient(**kwargs)
|
| 57 |
|
| 58 |
|
|
@@ -72,65 +73,72 @@ class SessionState:
|
|
| 72 |
if self.cookies and (now - self.last_refresh) < self.refresh_interval:
|
| 73 |
return
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
await client.aclose()
|
| 80 |
-
except:
|
| 81 |
-
pass
|
| 82 |
-
client = _make_client()
|
| 83 |
-
|
| 84 |
-
resp = await client.get(
|
| 85 |
-
"https://chatgpt.org/claude/chat",
|
| 86 |
-
follow_redirects=True,
|
| 87 |
-
headers={
|
| 88 |
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36",
|
| 89 |
-
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
| 90 |
-
},
|
| 91 |
-
timeout=30.0,
|
| 92 |
-
)
|
| 93 |
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
await asyncio.sleep(PROXY_RETRY_DELAY)
|
| 97 |
continue
|
| 98 |
|
| 99 |
-
|
| 100 |
-
for name, value in resp.cookies.items():
|
| 101 |
-
new_cookies.set(name, value, domain="chatgpt.org")
|
| 102 |
-
for header in resp.headers.get_list("set-cookie"):
|
| 103 |
-
parts = header.split(";")[0]
|
| 104 |
-
if "=" in parts:
|
| 105 |
-
k, v = parts.split("=", 1)
|
| 106 |
-
new_cookies.set(k.strip(), v.strip(), domain="chatgpt.org")
|
| 107 |
-
|
| 108 |
-
xsrf = new_cookies.get("XSRF-TOKEN", domain="chatgpt.org")
|
| 109 |
-
if xsrf:
|
| 110 |
-
xsrf = unquote(xsrf)
|
| 111 |
-
|
| 112 |
-
csrf = None
|
| 113 |
-
m = re.search(r'<meta\s+name="csrf-token"\s+content="([^"]+)"', resp.text)
|
| 114 |
-
if m:
|
| 115 |
-
csrf = m.group(1)
|
| 116 |
-
|
| 117 |
-
self.cookies = new_cookies
|
| 118 |
-
self.xsrf_token = xsrf
|
| 119 |
-
self.csrf_token = csrf
|
| 120 |
-
self.last_refresh = now
|
| 121 |
-
print(f"[Session] OK β CSRF:{bool(csrf)} XSRF:{bool(xsrf)} Cookies:{list(new_cookies.keys())} (attempt {attempt+1})")
|
| 122 |
-
return
|
| 123 |
-
|
| 124 |
-
except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
|
| 125 |
-
print(f"[Session] Proxy error attempt #{attempt+1}: {type(e).__name__}: {e}")
|
| 126 |
-
await asyncio.sleep(PROXY_RETRY_DELAY)
|
| 127 |
-
continue
|
| 128 |
-
except Exception as e:
|
| 129 |
-
print(f"[Session] Error attempt #{attempt+1}: {e}")
|
| 130 |
-
await asyncio.sleep(PROXY_RETRY_DELAY)
|
| 131 |
-
continue
|
| 132 |
-
|
| 133 |
-
print("[Session] WARNING: All refresh attempts failed")
|
| 134 |
|
| 135 |
|
| 136 |
session = SessionState()
|
|
@@ -141,8 +149,10 @@ http_client: Optional[httpx.AsyncClient] = None
|
|
| 141 |
@app.on_event("startup")
|
| 142 |
async def startup():
|
| 143 |
global http_client
|
| 144 |
-
http_client = _make_client()
|
| 145 |
-
await session.refresh(http_client)
|
|
|
|
|
|
|
| 146 |
|
| 147 |
@app.on_event("shutdown")
|
| 148 |
async def shutdown():
|
|
@@ -172,7 +182,6 @@ _TOOL_CALL_INLINE_RE = re.compile(
|
|
| 172 |
)
|
| 173 |
|
| 174 |
# Regex for Format 2: Anthropic XML function_calls blocks
|
| 175 |
-
# Matches: <function_calls>...</function_calls> (the whole block)
|
| 176 |
_ANTHROPIC_FC_BLOCK_RE = re.compile(
|
| 177 |
r'<function_calls>\s*(.*?)\s*</function_calls>',
|
| 178 |
re.DOTALL
|
|
@@ -224,7 +233,7 @@ def _parse_tool_calls(text: str) -> tuple[list[dict], str]:
|
|
| 224 |
If no tool calls found, returns ([], original_text).
|
| 225 |
"""
|
| 226 |
tool_calls = []
|
| 227 |
-
consumed_spans = []
|
| 228 |
|
| 229 |
# --- Format 1: Inline JSON tool calls ---
|
| 230 |
for match in _TOOL_CALL_INLINE_RE.finditer(text):
|
|
@@ -251,12 +260,10 @@ def _parse_tool_calls(text: str) -> tuple[list[dict], str]:
|
|
| 251 |
func_name = invoke_match.group(1)
|
| 252 |
invoke_body = invoke_match.group(2)
|
| 253 |
|
| 254 |
-
# Parse parameters into a dict
|
| 255 |
params = {}
|
| 256 |
for param_match in _ANTHROPIC_PARAM_RE.finditer(invoke_body):
|
| 257 |
param_name = param_match.group(1)
|
| 258 |
param_value = param_match.group(2)
|
| 259 |
-
# Try to parse as JSON value (for numbers, bools, etc.)
|
| 260 |
try:
|
| 261 |
params[param_name] = json.loads(param_value)
|
| 262 |
except (json.JSONDecodeError, ValueError):
|
|
@@ -294,8 +301,7 @@ def _parse_tool_calls(text: str) -> tuple[list[dict], str]:
|
|
| 294 |
|
| 295 |
|
| 296 |
def _has_incomplete_tool_call(text: str) -> bool:
|
| 297 |
-
"""Check if text has an opening tool call tag without a matching close.
|
| 298 |
-
Checks both inline and Anthropic XML formats."""
|
| 299 |
# Inline format
|
| 300 |
inline_opens = len(re.findall(r'<(?:function_call|tool_call)\s+name="[^"]+">', text))
|
| 301 |
inline_closes = len(re.findall(r'</(?:function_call|tool_call)_?>', text))
|
|
@@ -303,15 +309,10 @@ def _has_incomplete_tool_call(text: str) -> bool:
|
|
| 303 |
return True
|
| 304 |
|
| 305 |
# Anthropic XML format
|
| 306 |
-
|
| 307 |
-
fc_closes = text.count('</function_calls>')
|
| 308 |
-
if fc_opens > fc_closes:
|
| 309 |
return True
|
| 310 |
-
|
| 311 |
-
# Also check for incomplete <invoke> tags
|
| 312 |
invoke_opens = len(re.findall(r'<invoke\s+name="[^"]+">', text))
|
| 313 |
-
|
| 314 |
-
if invoke_opens > invoke_closes:
|
| 315 |
return True
|
| 316 |
|
| 317 |
return False
|
|
@@ -327,11 +328,10 @@ def _build_tool_system_prompt(tools: list[dict], tool_choice=None) -> str:
|
|
| 327 |
tool_names = []
|
| 328 |
|
| 329 |
for tool in tools:
|
| 330 |
-
# Support both 'tools' and 'functions' (old OpenAI) formats
|
| 331 |
if "function" in tool:
|
| 332 |
func = tool["function"]
|
| 333 |
else:
|
| 334 |
-
func = tool
|
| 335 |
|
| 336 |
name = func.get("name", "unknown")
|
| 337 |
desc = func.get("description", "No description")
|
|
@@ -356,12 +356,11 @@ Parameters:
|
|
| 356 |
|
| 357 |
tools_xml = '\n\n'.join(invoke_blocks)
|
| 358 |
|
| 359 |
-
# Handle tool_choice
|
| 360 |
choice_instruction = ""
|
| 361 |
if tool_choice == "required":
|
| 362 |
choice_instruction = "\nIMPORTANT: You MUST call at least one tool."
|
| 363 |
elif tool_choice == "none":
|
| 364 |
-
return ""
|
| 365 |
elif isinstance(tool_choice, dict) and tool_choice.get("type") == "function":
|
| 366 |
fname = tool_choice.get("function", {}).get("name", "")
|
| 367 |
choice_instruction = f"\nIMPORTANT: You MUST call the {fname} function."
|
|
@@ -403,7 +402,6 @@ def normalize_messages(messages: list[dict], tools: list[dict] = None, tool_choi
|
|
| 403 |
and inject tool definitions into system prompt if tools are provided."""
|
| 404 |
result = []
|
| 405 |
|
| 406 |
-
# Build tool system prompt if tools are provided
|
| 407 |
tool_system = None
|
| 408 |
if tools and tool_choice != "none":
|
| 409 |
tool_system = _build_tool_system_prompt(tools, tool_choice)
|
|
@@ -414,7 +412,6 @@ def normalize_messages(messages: list[dict], tools: list[dict] = None, tool_choi
|
|
| 414 |
role = msg.get("role", "user")
|
| 415 |
content = msg.get("content", "")
|
| 416 |
|
| 417 |
-
# Handle content arrays β plain text
|
| 418 |
if isinstance(content, list):
|
| 419 |
content = _flatten_content_array(content)
|
| 420 |
|
|
@@ -422,7 +419,7 @@ def normalize_messages(messages: list[dict], tools: list[dict] = None, tool_choi
|
|
| 422 |
content = ""
|
| 423 |
content = str(content)
|
| 424 |
|
| 425 |
-
# Handle tool role messages
|
| 426 |
if role == "tool":
|
| 427 |
tool_name = msg.get("name", "unknown_tool")
|
| 428 |
tool_call_id = msg.get("tool_call_id", "")
|
|
@@ -432,7 +429,7 @@ def normalize_messages(messages: list[dict], tools: list[dict] = None, tool_choi
|
|
| 432 |
})
|
| 433 |
continue
|
| 434 |
|
| 435 |
-
# Handle assistant messages with tool_calls
|
| 436 |
if role == "assistant" and msg.get("tool_calls"):
|
| 437 |
parts = []
|
| 438 |
regular_content = content if content and content.strip() else ""
|
|
@@ -449,14 +446,12 @@ def normalize_messages(messages: list[dict], tools: list[dict] = None, tool_choi
|
|
| 449 |
args_json = json.loads(args)
|
| 450 |
except (json.JSONDecodeError, TypeError):
|
| 451 |
args_json = {}
|
| 452 |
-
# Convert to Anthropic XML format (matches Claude's native output)
|
| 453 |
invoke_lines = [f'<invoke name="{name}">']
|
| 454 |
for k, v in args_json.items():
|
| 455 |
invoke_lines.append(f'<parameter name="{k}">{v}</parameter>')
|
| 456 |
invoke_lines.append('</invoke>')
|
| 457 |
invoke_parts.append('\n'.join(invoke_lines))
|
| 458 |
|
| 459 |
-
# Wrap in <function_calls> block
|
| 460 |
fc_content = '<function_calls>\n' + '\n'.join(invoke_parts) + '\n</function_calls>'
|
| 461 |
combined = regular_content + '\n\n' + fc_content if regular_content else fc_content
|
| 462 |
result.append({"role": "assistant", "content": combined})
|
|
@@ -469,13 +464,11 @@ def normalize_messages(messages: list[dict], tools: list[dict] = None, tool_choi
|
|
| 469 |
system_injected = True
|
| 470 |
continue
|
| 471 |
|
| 472 |
-
# System messages with empty content get filtered out
|
| 473 |
if role == "system" and not content.strip():
|
| 474 |
continue
|
| 475 |
|
| 476 |
result.append({"role": role, "content": content})
|
| 477 |
|
| 478 |
-
# If no system message existed but tools need to be injected
|
| 479 |
if tool_system and not system_injected:
|
| 480 |
result.insert(0, {"role": "system", "content": tool_system})
|
| 481 |
|
|
@@ -499,31 +492,36 @@ def _headers() -> dict:
|
|
| 499 |
return h
|
| 500 |
|
| 501 |
|
| 502 |
-
# ββ Proxy-aware request with retry ββββββββββββββ
|
| 503 |
|
| 504 |
async def _proxy_post(url: str, **kwargs) -> httpx.Response:
|
| 505 |
-
"""POST with proxy retry logic
|
| 506 |
global http_client
|
| 507 |
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
try:
|
| 517 |
await http_client.aclose()
|
| 518 |
except:
|
| 519 |
pass
|
| 520 |
-
http_client = _make_client()
|
| 521 |
await asyncio.sleep(PROXY_RETRY_DELAY)
|
| 522 |
-
|
| 523 |
-
await asyncio.sleep(2)
|
| 524 |
-
continue
|
| 525 |
|
| 526 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 527 |
|
| 528 |
|
| 529 |
# ββ Raw call with retries βββββββββββββββββββββββββββββββββββββββ
|
|
@@ -536,12 +534,17 @@ async def _raw_call(messages: list[dict], model: str) -> httpx.Response:
|
|
| 536 |
|
| 537 |
for attempt in range(2): # CSRF retry
|
| 538 |
for rate_attempt in range(3): # 429 retry
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 545 |
|
| 546 |
if resp.status_code == 419 and attempt == 0:
|
| 547 |
print("[Chat] 419 -> refreshing session...")
|
|
@@ -573,34 +576,39 @@ async def _stream_one_response(resp):
|
|
| 573 |
Yields (text, finish_reason) tuples. finish_reason is None for text chunks."""
|
| 574 |
finish_reason = None
|
| 575 |
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
payload_str = line[6:]
|
| 584 |
-
if payload_str.strip() == "[DONE]":
|
| 585 |
-
break
|
| 586 |
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
continue
|
| 591 |
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
yield c, None
|
| 597 |
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 604 |
|
| 605 |
yield "", finish_reason
|
| 606 |
|
|
@@ -619,12 +627,17 @@ async def _raw_call_streaming(messages: list[dict], model: str):
|
|
| 619 |
for rate_attempt in range(3): # 429 retry
|
| 620 |
yield ": thinking...\n\n"
|
| 621 |
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 628 |
|
| 629 |
if resp.status_code == 419 and attempt == 0:
|
| 630 |
print("[Chat] 419 -> refreshing session...")
|
|
@@ -684,7 +697,7 @@ def _emit_tool_call_chunks(chunk_id: str, created: int, model: str, tool_calls:
|
|
| 684 |
})
|
| 685 |
chunks.append(f"data: {sse_start}\n\n")
|
| 686 |
|
| 687 |
-
# Argument chunks
|
| 688 |
args = tc["function"]["arguments"]
|
| 689 |
chunk_size = max(1, len(args) // 3)
|
| 690 |
for offset in range(0, len(args), chunk_size):
|
|
@@ -709,7 +722,7 @@ def _emit_tool_call_chunks(chunk_id: str, created: int, model: str, tool_calls:
|
|
| 709 |
})
|
| 710 |
chunks.append(f"data: {sse_arg}\n\n")
|
| 711 |
|
| 712 |
-
#
|
| 713 |
if remaining_text.strip():
|
| 714 |
sse_text = json.dumps({
|
| 715 |
"id": chunk_id,
|
|
@@ -755,23 +768,43 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
|
|
| 755 |
total_content = ""
|
| 756 |
|
| 757 |
for cont_num in range(MAX_CONTINUATIONS):
|
| 758 |
-
# Send keep-alive while we buffer
|
| 759 |
yield ": thinking...\n\n"
|
| 760 |
|
| 761 |
resp = None
|
| 762 |
-
|
| 763 |
-
|
| 764 |
-
|
| 765 |
-
|
| 766 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 767 |
|
| 768 |
if resp is None:
|
| 769 |
-
|
|
|
|
|
|
|
|
|
|
| 770 |
|
| 771 |
finish_reason = "stop"
|
| 772 |
chunk_content = ""
|
| 773 |
|
| 774 |
-
# Buffer the full response
|
| 775 |
async for text, fr in _stream_one_response(resp):
|
| 776 |
if fr is not None:
|
| 777 |
finish_reason = fr
|
|
@@ -780,25 +813,21 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
|
|
| 780 |
if text:
|
| 781 |
chunk_content += text
|
| 782 |
total_content += text
|
| 783 |
-
|
| 784 |
-
# Send keep-alive pings while buffering
|
| 785 |
yield ": streaming...\n\n"
|
| 786 |
|
| 787 |
print(f"[Chat] Chunk #{cont_num+1}: {len(chunk_content)} chars, finish={finish_reason}")
|
| 788 |
|
| 789 |
-
# ALWAYS check for tool calls
|
| 790 |
tool_calls, remaining_text = _parse_tool_calls(total_content)
|
| 791 |
|
| 792 |
if tool_calls:
|
| 793 |
print(f"[Chat] Detected {len(tool_calls)} tool call(s)")
|
| 794 |
-
# Emit tool calls as proper OpenAI streaming chunks
|
| 795 |
for sse_chunk in _emit_tool_call_chunks(chunk_id, created, model, tool_calls, remaining_text):
|
| 796 |
yield sse_chunk
|
| 797 |
return
|
| 798 |
|
| 799 |
-
# No tool calls
|
| 800 |
if finish_reason == "stop":
|
| 801 |
-
# Stream the buffered text content as regular content chunks
|
| 802 |
chunk_sz = 50
|
| 803 |
for offset in range(0, len(total_content), chunk_sz):
|
| 804 |
piece = total_content[offset:offset + chunk_sz]
|
|
@@ -815,7 +844,6 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
|
|
| 815 |
})
|
| 816 |
yield f"data: {sse_data}\n\n"
|
| 817 |
|
| 818 |
-
# Final stop chunk
|
| 819 |
sse_data = json.dumps({
|
| 820 |
"id": chunk_id,
|
| 821 |
"object": "chat.completion.chunk",
|
|
@@ -831,7 +859,7 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
|
|
| 831 |
yield "data: [DONE]\n\n"
|
| 832 |
return
|
| 833 |
|
| 834 |
-
# Auto-continue
|
| 835 |
yield ": continuing...\n\n"
|
| 836 |
|
| 837 |
if _has_incomplete_tool_call(chunk_content):
|
|
@@ -862,8 +890,7 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
|
|
| 862 |
# ββ Non-streaming with auto-continue ββββββββββββββββββββββββββββ
|
| 863 |
|
| 864 |
async def _collect_with_auto_continue(messages: list[dict], model: str) -> dict:
|
| 865 |
-
"""Collect the full response, auto-continuing if cut off.
|
| 866 |
-
Always checks for tool calls. Returns dict with 'content' and/or 'tool_calls'."""
|
| 867 |
conversation = list(messages)
|
| 868 |
full_content = ""
|
| 869 |
|
|
@@ -886,9 +913,8 @@ async def _collect_with_auto_continue(messages: list[dict], model: str) -> dict:
|
|
| 886 |
tool_calls, remaining_text = _parse_tool_calls(full_content)
|
| 887 |
|
| 888 |
if tool_calls:
|
| 889 |
-
# If there are incomplete tool calls and we got cut off, continue
|
| 890 |
if _has_incomplete_tool_call(full_content) and finish_reason == "length":
|
| 891 |
-
pass
|
| 892 |
else:
|
| 893 |
return {
|
| 894 |
"tool_calls": tool_calls,
|
|
@@ -926,16 +952,14 @@ async def chat_completions(request: Request):
|
|
| 926 |
messages_raw = body.get("messages", [])
|
| 927 |
stream = body.get("stream", False)
|
| 928 |
|
| 929 |
-
# Extract tools
|
| 930 |
tools = body.get("tools") or body.get("functions") or None
|
| 931 |
tool_choice = body.get("tool_choice", "auto")
|
| 932 |
|
| 933 |
-
# Convert old 'functions' format
|
| 934 |
if tools and "function" not in tools[0] and "name" in tools[0]:
|
| 935 |
-
# Old format: [{"name": "X", "parameters": {...}}]
|
| 936 |
tools = [{"type": "function", "function": f} for f in tools]
|
| 937 |
|
| 938 |
-
# Log request for debugging
|
| 939 |
print(f"[Request] model={model} stream={stream} tools={bool(tools)} tool_choice={tool_choice} msgs={len(messages_raw)}")
|
| 940 |
|
| 941 |
if not messages_raw or not isinstance(messages_raw, list):
|
|
@@ -946,52 +970,59 @@ async def chat_completions(request: Request):
|
|
| 946 |
if not messages:
|
| 947 |
raise HTTPException(400, "No valid messages after normalization")
|
| 948 |
|
| 949 |
-
|
| 950 |
-
|
| 951 |
-
|
| 952 |
-
|
| 953 |
-
|
| 954 |
-
|
| 955 |
-
|
| 956 |
-
|
| 957 |
-
|
| 958 |
-
|
| 959 |
-
|
| 960 |
-
result = await _collect_with_auto_continue(messages, model)
|
| 961 |
-
|
| 962 |
-
tool_calls = result.get("tool_calls")
|
| 963 |
-
content = result.get("content")
|
| 964 |
-
|
| 965 |
-
if tool_calls:
|
| 966 |
-
return JSONResponse({
|
| 967 |
-
"id": f"chatcmpl-{int(time.time())}",
|
| 968 |
-
"object": "chat.completion",
|
| 969 |
-
"created": int(time.time()),
|
| 970 |
-
"model": model,
|
| 971 |
-
"choices": [{
|
| 972 |
-
"index": 0,
|
| 973 |
-
"message": {
|
| 974 |
-
"role": "assistant",
|
| 975 |
-
"content": content,
|
| 976 |
-
"tool_calls": tool_calls,
|
| 977 |
-
},
|
| 978 |
-
"finish_reason": "tool_calls",
|
| 979 |
-
}],
|
| 980 |
-
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
|
| 981 |
-
})
|
| 982 |
else:
|
| 983 |
-
|
| 984 |
-
|
| 985 |
-
|
| 986 |
-
|
| 987 |
-
|
| 988 |
-
|
| 989 |
-
|
| 990 |
-
"
|
| 991 |
-
"
|
| 992 |
-
|
| 993 |
-
|
| 994 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 995 |
|
| 996 |
|
| 997 |
# ββ Models / Health βββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -1011,7 +1042,7 @@ async def list_models():
|
|
| 1011 |
async def root():
|
| 1012 |
return {
|
| 1013 |
"status": "ok",
|
| 1014 |
-
"version": "
|
| 1015 |
"proxy": bool(PROXY_URL),
|
| 1016 |
"tool_calling": True,
|
| 1017 |
"endpoints": ["/v1/chat/completions", "/v1/models"],
|
|
@@ -1030,7 +1061,10 @@ async def health():
|
|
| 1030 |
@app.get("/debug/refresh")
|
| 1031 |
async def force_refresh():
|
| 1032 |
session.last_refresh = 0
|
| 1033 |
-
await session.refresh(http_client)
|
|
|
|
|
|
|
|
|
|
| 1034 |
return {
|
| 1035 |
"refreshed": True,
|
| 1036 |
"has_cookies": bool(session.cookies),
|
|
|
|
| 5 |
Features:
|
| 6 |
- Tool/function calling support (always detects tool call tags in output)
|
| 7 |
- Auto-continues when upstream hits the ~1K token output limit
|
| 8 |
+
- Rotating proxy with direct-connection fallback
|
| 9 |
- SSE keep-alive comments during continuation gaps
|
| 10 |
- Message normalization for Orchids.app compatibility
|
| 11 |
+
- Robust error handling with proper JSON error responses
|
| 12 |
"""
|
| 13 |
|
| 14 |
import asyncio
|
|
|
|
| 17 |
import re
|
| 18 |
import time
|
| 19 |
import uuid
|
| 20 |
+
import traceback
|
| 21 |
from typing import Optional
|
| 22 |
from urllib.parse import unquote
|
| 23 |
|
|
|
|
| 26 |
from fastapi.middleware.cors import CORSMiddleware
|
| 27 |
from fastapi.responses import StreamingResponse, JSONResponse
|
| 28 |
|
| 29 |
+
app = FastAPI(title="Haiku API", version="8.0.0")
|
| 30 |
|
| 31 |
# ββ CORS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 32 |
app.add_middleware(
|
|
|
|
| 40 |
# ββ Proxy Config βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 41 |
PROXY_URL = os.environ.get("PROXY_URL", "")
|
| 42 |
|
| 43 |
+
PROXY_MAX_RETRIES = 4 # rotating proxy: try a few IPs
|
| 44 |
PROXY_RETRY_DELAY = 1 # seconds between proxy retries
|
| 45 |
+
CONNECT_TIMEOUT = 10.0 # short connect timeout
|
| 46 |
+
READ_TIMEOUT = 120.0 # long read timeout (for streaming responses)
|
| 47 |
|
| 48 |
|
| 49 |
+
def _make_client(use_proxy: bool = True) -> httpx.AsyncClient:
|
| 50 |
"""Create an httpx client, with or without proxy."""
|
| 51 |
kwargs = dict(
|
| 52 |
verify=False,
|
| 53 |
+
timeout=httpx.Timeout(READ_TIMEOUT, connect=CONNECT_TIMEOUT),
|
| 54 |
)
|
| 55 |
+
if use_proxy and PROXY_URL:
|
| 56 |
kwargs["proxy"] = PROXY_URL
|
|
|
|
|
|
|
|
|
|
| 57 |
return httpx.AsyncClient(**kwargs)
|
| 58 |
|
| 59 |
|
|
|
|
| 73 |
if self.cookies and (now - self.last_refresh) < self.refresh_interval:
|
| 74 |
return
|
| 75 |
|
| 76 |
+
# Try with proxy first, then fallback to direct
|
| 77 |
+
for use_proxy in [True, False]:
|
| 78 |
+
if use_proxy and not PROXY_URL:
|
| 79 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
+
working_client = client
|
| 82 |
+
for attempt in range(PROXY_MAX_RETRIES if use_proxy else 2):
|
| 83 |
+
try:
|
| 84 |
+
if attempt > 0:
|
| 85 |
+
try:
|
| 86 |
+
await working_client.aclose()
|
| 87 |
+
except:
|
| 88 |
+
pass
|
| 89 |
+
working_client = _make_client(use_proxy=use_proxy)
|
| 90 |
+
|
| 91 |
+
resp = await working_client.get(
|
| 92 |
+
"https://chatgpt.org/claude/chat",
|
| 93 |
+
follow_redirects=True,
|
| 94 |
+
headers={
|
| 95 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36",
|
| 96 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
| 97 |
+
},
|
| 98 |
+
timeout=20.0,
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
if resp.status_code != 200:
|
| 102 |
+
print(f"[Session] GET returned {resp.status_code} (proxy={use_proxy}, attempt {attempt+1})")
|
| 103 |
+
await asyncio.sleep(PROXY_RETRY_DELAY)
|
| 104 |
+
continue
|
| 105 |
+
|
| 106 |
+
new_cookies = httpx.Cookies()
|
| 107 |
+
for name, value in resp.cookies.items():
|
| 108 |
+
new_cookies.set(name, value, domain="chatgpt.org")
|
| 109 |
+
for header in resp.headers.get_list("set-cookie"):
|
| 110 |
+
parts = header.split(";")[0]
|
| 111 |
+
if "=" in parts:
|
| 112 |
+
k, v = parts.split("=", 1)
|
| 113 |
+
new_cookies.set(k.strip(), v.strip(), domain="chatgpt.org")
|
| 114 |
+
|
| 115 |
+
xsrf = new_cookies.get("XSRF-TOKEN", domain="chatgpt.org")
|
| 116 |
+
if xsrf:
|
| 117 |
+
xsrf = unquote(xsrf)
|
| 118 |
+
|
| 119 |
+
csrf = None
|
| 120 |
+
m = re.search(r'<meta\s+name="csrf-token"\s+content="([^"]+)"', resp.text)
|
| 121 |
+
if m:
|
| 122 |
+
csrf = m.group(1)
|
| 123 |
+
|
| 124 |
+
self.cookies = new_cookies
|
| 125 |
+
self.xsrf_token = xsrf
|
| 126 |
+
self.csrf_token = csrf
|
| 127 |
+
self.last_refresh = now
|
| 128 |
+
mode = "proxy" if use_proxy else "direct"
|
| 129 |
+
print(f"[Session] OK ({mode}) β CSRF:{bool(csrf)} XSRF:{bool(xsrf)} Cookies:{list(new_cookies.keys())}")
|
| 130 |
+
return working_client
|
| 131 |
+
|
| 132 |
+
except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
|
| 133 |
+
print(f"[Session] Connection error (proxy={use_proxy}, attempt {attempt+1}): {type(e).__name__}")
|
| 134 |
+
await asyncio.sleep(PROXY_RETRY_DELAY)
|
| 135 |
+
continue
|
| 136 |
+
except Exception as e:
|
| 137 |
+
print(f"[Session] Error (proxy={use_proxy}, attempt {attempt+1}): {type(e).__name__}: {e}")
|
| 138 |
await asyncio.sleep(PROXY_RETRY_DELAY)
|
| 139 |
continue
|
| 140 |
|
| 141 |
+
print("[Session] WARNING: All refresh attempts failed (both proxy and direct)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
|
| 144 |
session = SessionState()
|
|
|
|
| 149 |
@app.on_event("startup")
|
| 150 |
async def startup():
|
| 151 |
global http_client
|
| 152 |
+
http_client = _make_client(use_proxy=bool(PROXY_URL))
|
| 153 |
+
result = await session.refresh(http_client)
|
| 154 |
+
if result is not None:
|
| 155 |
+
http_client = result
|
| 156 |
|
| 157 |
@app.on_event("shutdown")
|
| 158 |
async def shutdown():
|
|
|
|
| 182 |
)
|
| 183 |
|
| 184 |
# Regex for Format 2: Anthropic XML function_calls blocks
|
|
|
|
| 185 |
_ANTHROPIC_FC_BLOCK_RE = re.compile(
|
| 186 |
r'<function_calls>\s*(.*?)\s*</function_calls>',
|
| 187 |
re.DOTALL
|
|
|
|
| 233 |
If no tool calls found, returns ([], original_text).
|
| 234 |
"""
|
| 235 |
tool_calls = []
|
| 236 |
+
consumed_spans = []
|
| 237 |
|
| 238 |
# --- Format 1: Inline JSON tool calls ---
|
| 239 |
for match in _TOOL_CALL_INLINE_RE.finditer(text):
|
|
|
|
| 260 |
func_name = invoke_match.group(1)
|
| 261 |
invoke_body = invoke_match.group(2)
|
| 262 |
|
|
|
|
| 263 |
params = {}
|
| 264 |
for param_match in _ANTHROPIC_PARAM_RE.finditer(invoke_body):
|
| 265 |
param_name = param_match.group(1)
|
| 266 |
param_value = param_match.group(2)
|
|
|
|
| 267 |
try:
|
| 268 |
params[param_name] = json.loads(param_value)
|
| 269 |
except (json.JSONDecodeError, ValueError):
|
|
|
|
| 301 |
|
| 302 |
|
| 303 |
def _has_incomplete_tool_call(text: str) -> bool:
|
| 304 |
+
"""Check if text has an opening tool call tag without a matching close."""
|
|
|
|
| 305 |
# Inline format
|
| 306 |
inline_opens = len(re.findall(r'<(?:function_call|tool_call)\s+name="[^"]+">', text))
|
| 307 |
inline_closes = len(re.findall(r'</(?:function_call|tool_call)_?>', text))
|
|
|
|
| 309 |
return True
|
| 310 |
|
| 311 |
# Anthropic XML format
|
| 312 |
+
if text.count('<function_calls>') > text.count('</function_calls>'):
|
|
|
|
|
|
|
| 313 |
return True
|
|
|
|
|
|
|
| 314 |
invoke_opens = len(re.findall(r'<invoke\s+name="[^"]+">', text))
|
| 315 |
+
if invoke_opens > text.count('</invoke>'):
|
|
|
|
| 316 |
return True
|
| 317 |
|
| 318 |
return False
|
|
|
|
| 328 |
tool_names = []
|
| 329 |
|
| 330 |
for tool in tools:
|
|
|
|
| 331 |
if "function" in tool:
|
| 332 |
func = tool["function"]
|
| 333 |
else:
|
| 334 |
+
func = tool
|
| 335 |
|
| 336 |
name = func.get("name", "unknown")
|
| 337 |
desc = func.get("description", "No description")
|
|
|
|
| 356 |
|
| 357 |
tools_xml = '\n\n'.join(invoke_blocks)
|
| 358 |
|
|
|
|
| 359 |
choice_instruction = ""
|
| 360 |
if tool_choice == "required":
|
| 361 |
choice_instruction = "\nIMPORTANT: You MUST call at least one tool."
|
| 362 |
elif tool_choice == "none":
|
| 363 |
+
return ""
|
| 364 |
elif isinstance(tool_choice, dict) and tool_choice.get("type") == "function":
|
| 365 |
fname = tool_choice.get("function", {}).get("name", "")
|
| 366 |
choice_instruction = f"\nIMPORTANT: You MUST call the {fname} function."
|
|
|
|
| 402 |
and inject tool definitions into system prompt if tools are provided."""
|
| 403 |
result = []
|
| 404 |
|
|
|
|
| 405 |
tool_system = None
|
| 406 |
if tools and tool_choice != "none":
|
| 407 |
tool_system = _build_tool_system_prompt(tools, tool_choice)
|
|
|
|
| 412 |
role = msg.get("role", "user")
|
| 413 |
content = msg.get("content", "")
|
| 414 |
|
|
|
|
| 415 |
if isinstance(content, list):
|
| 416 |
content = _flatten_content_array(content)
|
| 417 |
|
|
|
|
| 419 |
content = ""
|
| 420 |
content = str(content)
|
| 421 |
|
| 422 |
+
# Handle tool role messages
|
| 423 |
if role == "tool":
|
| 424 |
tool_name = msg.get("name", "unknown_tool")
|
| 425 |
tool_call_id = msg.get("tool_call_id", "")
|
|
|
|
| 429 |
})
|
| 430 |
continue
|
| 431 |
|
| 432 |
+
# Handle assistant messages with tool_calls
|
| 433 |
if role == "assistant" and msg.get("tool_calls"):
|
| 434 |
parts = []
|
| 435 |
regular_content = content if content and content.strip() else ""
|
|
|
|
| 446 |
args_json = json.loads(args)
|
| 447 |
except (json.JSONDecodeError, TypeError):
|
| 448 |
args_json = {}
|
|
|
|
| 449 |
invoke_lines = [f'<invoke name="{name}">']
|
| 450 |
for k, v in args_json.items():
|
| 451 |
invoke_lines.append(f'<parameter name="{k}">{v}</parameter>')
|
| 452 |
invoke_lines.append('</invoke>')
|
| 453 |
invoke_parts.append('\n'.join(invoke_lines))
|
| 454 |
|
|
|
|
| 455 |
fc_content = '<function_calls>\n' + '\n'.join(invoke_parts) + '\n</function_calls>'
|
| 456 |
combined = regular_content + '\n\n' + fc_content if regular_content else fc_content
|
| 457 |
result.append({"role": "assistant", "content": combined})
|
|
|
|
| 464 |
system_injected = True
|
| 465 |
continue
|
| 466 |
|
|
|
|
| 467 |
if role == "system" and not content.strip():
|
| 468 |
continue
|
| 469 |
|
| 470 |
result.append({"role": role, "content": content})
|
| 471 |
|
|
|
|
| 472 |
if tool_system and not system_injected:
|
| 473 |
result.insert(0, {"role": "system", "content": tool_system})
|
| 474 |
|
|
|
|
| 492 |
return h
|
| 493 |
|
| 494 |
|
| 495 |
+
# ββ Proxy-aware request with retry + direct fallback ββββββββββββββ
|
| 496 |
|
| 497 |
async def _proxy_post(url: str, **kwargs) -> httpx.Response:
|
| 498 |
+
"""POST with proxy retry logic, falling back to direct connection."""
|
| 499 |
global http_client
|
| 500 |
|
| 501 |
+
# Try with proxy first
|
| 502 |
+
if PROXY_URL:
|
| 503 |
+
for attempt in range(PROXY_MAX_RETRIES):
|
| 504 |
+
try:
|
| 505 |
+
resp = await http_client.post(url, **kwargs)
|
| 506 |
+
return resp
|
| 507 |
+
except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
|
| 508 |
+
print(f"[Proxy] Connection error #{attempt+1}: {type(e).__name__}")
|
| 509 |
try:
|
| 510 |
await http_client.aclose()
|
| 511 |
except:
|
| 512 |
pass
|
| 513 |
+
http_client = _make_client(use_proxy=True)
|
| 514 |
await asyncio.sleep(PROXY_RETRY_DELAY)
|
| 515 |
+
continue
|
|
|
|
|
|
|
| 516 |
|
| 517 |
+
# Fallback: try direct connection
|
| 518 |
+
print("[Proxy] Falling back to direct connection")
|
| 519 |
+
direct_client = _make_client(use_proxy=False)
|
| 520 |
+
try:
|
| 521 |
+
resp = await direct_client.post(url, **kwargs)
|
| 522 |
+
return resp
|
| 523 |
+
finally:
|
| 524 |
+
await direct_client.aclose()
|
| 525 |
|
| 526 |
|
| 527 |
# ββ Raw call with retries βββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 534 |
|
| 535 |
for attempt in range(2): # CSRF retry
|
| 536 |
for rate_attempt in range(3): # 429 retry
|
| 537 |
+
try:
|
| 538 |
+
resp = await _proxy_post(
|
| 539 |
+
"https://chatgpt.org/api/chat",
|
| 540 |
+
json=payload,
|
| 541 |
+
headers=_headers(),
|
| 542 |
+
cookies=session.cookies,
|
| 543 |
+
)
|
| 544 |
+
except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
|
| 545 |
+
print(f"[Chat] Connection failed: {type(e).__name__}")
|
| 546 |
+
session.last_refresh = 0
|
| 547 |
+
raise HTTPException(502, f"Cannot reach upstream: {type(e).__name__}")
|
| 548 |
|
| 549 |
if resp.status_code == 419 and attempt == 0:
|
| 550 |
print("[Chat] 419 -> refreshing session...")
|
|
|
|
| 576 |
Yields (text, finish_reason) tuples. finish_reason is None for text chunks."""
|
| 577 |
finish_reason = None
|
| 578 |
|
| 579 |
+
try:
|
| 580 |
+
async for raw_line in resp.aiter_lines():
|
| 581 |
+
line = raw_line.strip()
|
| 582 |
+
if not line or line.startswith(":"):
|
| 583 |
+
continue
|
| 584 |
+
if not line.startswith("data: "):
|
| 585 |
+
continue
|
|
|
|
|
|
|
|
|
|
| 586 |
|
| 587 |
+
payload_str = line[6:]
|
| 588 |
+
if payload_str.strip() == "[DONE]":
|
| 589 |
+
break
|
|
|
|
| 590 |
|
| 591 |
+
try:
|
| 592 |
+
chunk = json.loads(payload_str)
|
| 593 |
+
except json.JSONDecodeError:
|
| 594 |
+
continue
|
|
|
|
| 595 |
|
| 596 |
+
for choice in chunk.get("choices", []):
|
| 597 |
+
delta = choice.get("delta", {})
|
| 598 |
+
c = delta.get("content", "")
|
| 599 |
+
if c:
|
| 600 |
+
yield c, None
|
| 601 |
+
|
| 602 |
+
fr = choice.get("finish_reason")
|
| 603 |
+
if fr:
|
| 604 |
+
if fr in ("stop", "end_turn"):
|
| 605 |
+
finish_reason = "stop"
|
| 606 |
+
elif fr in ("length", "max_tokens"):
|
| 607 |
+
finish_reason = "length"
|
| 608 |
+
except (httpx.ReadError, httpx.RemoteProtocolError) as e:
|
| 609 |
+
print(f"[Stream] Connection lost during streaming: {type(e).__name__}")
|
| 610 |
+
except Exception as e:
|
| 611 |
+
print(f"[Stream] Error during streaming: {type(e).__name__}: {e}")
|
| 612 |
|
| 613 |
yield "", finish_reason
|
| 614 |
|
|
|
|
| 627 |
for rate_attempt in range(3): # 429 retry
|
| 628 |
yield ": thinking...\n\n"
|
| 629 |
|
| 630 |
+
try:
|
| 631 |
+
resp = await _proxy_post(
|
| 632 |
+
"https://chatgpt.org/api/chat",
|
| 633 |
+
json=payload,
|
| 634 |
+
headers=_headers(),
|
| 635 |
+
cookies=session.cookies,
|
| 636 |
+
)
|
| 637 |
+
except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
|
| 638 |
+
print(f"[Chat] Connection failed: {type(e).__name__}")
|
| 639 |
+
session.last_refresh = 0
|
| 640 |
+
raise HTTPException(502, f"Cannot reach upstream: {type(e).__name__}")
|
| 641 |
|
| 642 |
if resp.status_code == 419 and attempt == 0:
|
| 643 |
print("[Chat] 419 -> refreshing session...")
|
|
|
|
| 697 |
})
|
| 698 |
chunks.append(f"data: {sse_start}\n\n")
|
| 699 |
|
| 700 |
+
# Argument chunks
|
| 701 |
args = tc["function"]["arguments"]
|
| 702 |
chunk_size = max(1, len(args) // 3)
|
| 703 |
for offset in range(0, len(args), chunk_size):
|
|
|
|
| 722 |
})
|
| 723 |
chunks.append(f"data: {sse_arg}\n\n")
|
| 724 |
|
| 725 |
+
# Remaining text alongside tool calls
|
| 726 |
if remaining_text.strip():
|
| 727 |
sse_text = json.dumps({
|
| 728 |
"id": chunk_id,
|
|
|
|
| 768 |
total_content = ""
|
| 769 |
|
| 770 |
for cont_num in range(MAX_CONTINUATIONS):
|
|
|
|
| 771 |
yield ": thinking...\n\n"
|
| 772 |
|
| 773 |
resp = None
|
| 774 |
+
try:
|
| 775 |
+
async for result in _raw_call_streaming(conversation, model):
|
| 776 |
+
if isinstance(result, str):
|
| 777 |
+
yield result
|
| 778 |
+
else:
|
| 779 |
+
resp = result
|
| 780 |
+
except HTTPException as e:
|
| 781 |
+
# Send error as SSE then stop
|
| 782 |
+
error_data = json.dumps({
|
| 783 |
+
"id": chunk_id,
|
| 784 |
+
"object": "chat.completion.chunk",
|
| 785 |
+
"created": created,
|
| 786 |
+
"model": model,
|
| 787 |
+
"choices": [{
|
| 788 |
+
"index": 0,
|
| 789 |
+
"delta": {"content": f"\n\n[Error: {e.detail}]"},
|
| 790 |
+
"finish_reason": None,
|
| 791 |
+
}],
|
| 792 |
+
})
|
| 793 |
+
yield f"data: {error_data}\n\n"
|
| 794 |
+
yield f"data: {json.dumps({'id': chunk_id, 'object': 'chat.completion.chunk', 'created': created, 'model': model, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n"
|
| 795 |
+
yield "data: [DONE]\n\n"
|
| 796 |
+
return
|
| 797 |
|
| 798 |
if resp is None:
|
| 799 |
+
yield f"data: {json.dumps({'id': chunk_id, 'object': 'chat.completion.chunk', 'created': created, 'model': model, 'choices': [{'index': 0, 'delta': {'content': '[Error: No response from upstream]'}, 'finish_reason': None}]})}\n\n"
|
| 800 |
+
yield f"data: {json.dumps({'id': chunk_id, 'object': 'chat.completion.chunk', 'created': created, 'model': model, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n"
|
| 801 |
+
yield "data: [DONE]\n\n"
|
| 802 |
+
return
|
| 803 |
|
| 804 |
finish_reason = "stop"
|
| 805 |
chunk_content = ""
|
| 806 |
|
| 807 |
+
# Buffer the full response
|
| 808 |
async for text, fr in _stream_one_response(resp):
|
| 809 |
if fr is not None:
|
| 810 |
finish_reason = fr
|
|
|
|
| 813 |
if text:
|
| 814 |
chunk_content += text
|
| 815 |
total_content += text
|
|
|
|
|
|
|
| 816 |
yield ": streaming...\n\n"
|
| 817 |
|
| 818 |
print(f"[Chat] Chunk #{cont_num+1}: {len(chunk_content)} chars, finish={finish_reason}")
|
| 819 |
|
| 820 |
+
# ALWAYS check for tool calls
|
| 821 |
tool_calls, remaining_text = _parse_tool_calls(total_content)
|
| 822 |
|
| 823 |
if tool_calls:
|
| 824 |
print(f"[Chat] Detected {len(tool_calls)} tool call(s)")
|
|
|
|
| 825 |
for sse_chunk in _emit_tool_call_chunks(chunk_id, created, model, tool_calls, remaining_text):
|
| 826 |
yield sse_chunk
|
| 827 |
return
|
| 828 |
|
| 829 |
+
# No tool calls
|
| 830 |
if finish_reason == "stop":
|
|
|
|
| 831 |
chunk_sz = 50
|
| 832 |
for offset in range(0, len(total_content), chunk_sz):
|
| 833 |
piece = total_content[offset:offset + chunk_sz]
|
|
|
|
| 844 |
})
|
| 845 |
yield f"data: {sse_data}\n\n"
|
| 846 |
|
|
|
|
| 847 |
sse_data = json.dumps({
|
| 848 |
"id": chunk_id,
|
| 849 |
"object": "chat.completion.chunk",
|
|
|
|
| 859 |
yield "data: [DONE]\n\n"
|
| 860 |
return
|
| 861 |
|
| 862 |
+
# Auto-continue
|
| 863 |
yield ": continuing...\n\n"
|
| 864 |
|
| 865 |
if _has_incomplete_tool_call(chunk_content):
|
|
|
|
| 890 |
# ββ Non-streaming with auto-continue ββββββββββββββββββββββββββββ
|
| 891 |
|
| 892 |
async def _collect_with_auto_continue(messages: list[dict], model: str) -> dict:
|
| 893 |
+
"""Collect the full response, auto-continuing if cut off."""
|
|
|
|
| 894 |
conversation = list(messages)
|
| 895 |
full_content = ""
|
| 896 |
|
|
|
|
| 913 |
tool_calls, remaining_text = _parse_tool_calls(full_content)
|
| 914 |
|
| 915 |
if tool_calls:
|
|
|
|
| 916 |
if _has_incomplete_tool_call(full_content) and finish_reason == "length":
|
| 917 |
+
pass
|
| 918 |
else:
|
| 919 |
return {
|
| 920 |
"tool_calls": tool_calls,
|
|
|
|
| 952 |
messages_raw = body.get("messages", [])
|
| 953 |
stream = body.get("stream", False)
|
| 954 |
|
| 955 |
+
# Extract tools
|
| 956 |
tools = body.get("tools") or body.get("functions") or None
|
| 957 |
tool_choice = body.get("tool_choice", "auto")
|
| 958 |
|
| 959 |
+
# Convert old 'functions' format
|
| 960 |
if tools and "function" not in tools[0] and "name" in tools[0]:
|
|
|
|
| 961 |
tools = [{"type": "function", "function": f} for f in tools]
|
| 962 |
|
|
|
|
| 963 |
print(f"[Request] model={model} stream={stream} tools={bool(tools)} tool_choice={tool_choice} msgs={len(messages_raw)}")
|
| 964 |
|
| 965 |
if not messages_raw or not isinstance(messages_raw, list):
|
|
|
|
| 970 |
if not messages:
|
| 971 |
raise HTTPException(400, "No valid messages after normalization")
|
| 972 |
|
| 973 |
+
try:
|
| 974 |
+
if stream:
|
| 975 |
+
return StreamingResponse(
|
| 976 |
+
_stream_with_auto_continue(messages, model),
|
| 977 |
+
media_type="text/event-stream",
|
| 978 |
+
headers={
|
| 979 |
+
"Cache-Control": "no-cache",
|
| 980 |
+
"Connection": "keep-alive",
|
| 981 |
+
"X-Accel-Buffering": "no",
|
| 982 |
+
},
|
| 983 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 984 |
else:
|
| 985 |
+
result = await _collect_with_auto_continue(messages, model)
|
| 986 |
+
|
| 987 |
+
tool_calls = result.get("tool_calls")
|
| 988 |
+
content = result.get("content")
|
| 989 |
+
|
| 990 |
+
if tool_calls:
|
| 991 |
+
return JSONResponse({
|
| 992 |
+
"id": f"chatcmpl-{int(time.time())}",
|
| 993 |
+
"object": "chat.completion",
|
| 994 |
+
"created": int(time.time()),
|
| 995 |
+
"model": model,
|
| 996 |
+
"choices": [{
|
| 997 |
+
"index": 0,
|
| 998 |
+
"message": {
|
| 999 |
+
"role": "assistant",
|
| 1000 |
+
"content": content,
|
| 1001 |
+
"tool_calls": tool_calls,
|
| 1002 |
+
},
|
| 1003 |
+
"finish_reason": "tool_calls",
|
| 1004 |
+
}],
|
| 1005 |
+
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
|
| 1006 |
+
})
|
| 1007 |
+
else:
|
| 1008 |
+
return JSONResponse({
|
| 1009 |
+
"id": f"chatcmpl-{int(time.time())}",
|
| 1010 |
+
"object": "chat.completion",
|
| 1011 |
+
"created": int(time.time()),
|
| 1012 |
+
"model": model,
|
| 1013 |
+
"choices": [{
|
| 1014 |
+
"index": 0,
|
| 1015 |
+
"message": {"role": "assistant", "content": content or ""},
|
| 1016 |
+
"finish_reason": "stop",
|
| 1017 |
+
}],
|
| 1018 |
+
"usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
|
| 1019 |
+
})
|
| 1020 |
+
except HTTPException:
|
| 1021 |
+
raise
|
| 1022 |
+
except Exception as e:
|
| 1023 |
+
print(f"[Request] Unhandled error: {type(e).__name__}: {e}")
|
| 1024 |
+
print(traceback.format_exc())
|
| 1025 |
+
raise HTTPException(500, f"Internal error: {type(e).__name__}")
|
| 1026 |
|
| 1027 |
|
| 1028 |
# ββ Models / Health βββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 1042 |
async def root():
|
| 1043 |
return {
|
| 1044 |
"status": "ok",
|
| 1045 |
+
"version": "8.0.0",
|
| 1046 |
"proxy": bool(PROXY_URL),
|
| 1047 |
"tool_calling": True,
|
| 1048 |
"endpoints": ["/v1/chat/completions", "/v1/models"],
|
|
|
|
| 1061 |
@app.get("/debug/refresh")
|
| 1062 |
async def force_refresh():
|
| 1063 |
session.last_refresh = 0
|
| 1064 |
+
result = await session.refresh(http_client)
|
| 1065 |
+
if result is not None:
|
| 1066 |
+
global http_client
|
| 1067 |
+
http_client = result
|
| 1068 |
return {
|
| 1069 |
"refreshed": True,
|
| 1070 |
"has_cookies": bool(session.cookies),
|