overwrite69 commited on
Commit
3d49d68
Β·
verified Β·
1 Parent(s): 7cd613b

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +47 -1
  2. app.py +501 -106
README.md CHANGED
@@ -11,6 +11,24 @@ app_port: 7860
11
 
12
  OpenAI-compatible API proxy for Claude Haiku 4.5 via chatgpt.org.
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ## Usage
15
 
16
  ### Chat Completions (non-streaming)
@@ -36,6 +54,32 @@ curl https://YOUR_SPACE.hf.space/v1/chat/completions \
36
  }'
37
  ```
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  ### With OpenAI Python SDK
40
 
41
  ```python
@@ -64,7 +108,9 @@ curl https://YOUR_SPACE.hf.space/v1/models
64
 
65
  | Endpoint | Description |
66
  |---|---|
67
- | `POST /v1/chat/completions` | OpenAI-compatible chat completions |
 
68
  | `GET /v1/models` | List available models |
69
  | `GET /health` | Health check |
70
  | `GET /debug/session` | Session debug info |
 
 
11
 
12
  OpenAI-compatible API proxy for Claude Haiku 4.5 via chatgpt.org.
13
 
14
+ Supports **tool/function calling**, auto-continue for the 1K token limit, rotating proxy, and SSE keep-alive.
15
+
16
+ ## Features
17
+
18
+ - **Tool/Function Calling**: Full OpenAI-compatible tool calling support. Converts `tools` definitions to system prompts, parses Claude's output for `<tool_call_>` blocks, and returns properly formatted `tool_calls` responses.
19
+ - **Auto-Continue**: When the upstream 1K token limit is hit, automatically continues the response with "Continue" messages.
20
+ - **SSE Keep-Alive**: Sends keep-alive comments during continuation gaps to prevent socket timeouts.
21
+ - **Rotating Proxy**: Supports unstable rotating proxies with automatic retries on connection failures.
22
+ - **Message Normalization**: Handles Orchids.app's content array format and converts it to plain text.
23
+
24
+ ## Environment Variables
25
+
26
+ | Variable | Description | Default |
27
+ |---|---|---|
28
+ | `PROXY_URL` | Rotating proxy URL (e.g. `http://user:pass@proxy.op.wtf:32424`) | `""` (direct) |
29
+
30
+ Set these in HF Spaces > Settings > Variables and Secrets.
31
+
32
  ## Usage
33
 
34
  ### Chat Completions (non-streaming)
 
54
  }'
55
  ```
56
 
57
+ ### With Tool Calling
58
+
59
+ ```bash
60
+ curl https://YOUR_SPACE.hf.space/v1/chat/completions \
61
+ -H "Content-Type: application/json" \
62
+ -d '{
63
+ "model": "anthropic/claude-haiku-4-5",
64
+ "messages": [{"role": "user", "content": "Create a file called hello.txt with hello world"}],
65
+ "tools": [{
66
+ "type": "function",
67
+ "function": {
68
+ "name": "Write",
69
+ "description": "Write content to a file",
70
+ "parameters": {
71
+ "type": "object",
72
+ "properties": {
73
+ "file_path": {"type": "string", "description": "Path to the file"},
74
+ "content": {"type": "string", "description": "Content to write"}
75
+ },
76
+ "required": ["file_path", "content"]
77
+ }
78
+ }
79
+ }]
80
+ }'
81
+ ```
82
+
83
  ### With OpenAI Python SDK
84
 
85
  ```python
 
108
 
109
  | Endpoint | Description |
110
  |---|---|
111
+ | `POST /v1/chat/completions` | OpenAI-compatible chat completions (with tool calling) |
112
+ | `POST /chat/completions` | Same, without /v1 prefix |
113
  | `GET /v1/models` | List available models |
114
  | `GET /health` | Health check |
115
  | `GET /debug/session` | Session debug info |
116
+ | `GET /debug/refresh` | Force session refresh |
app.py CHANGED
@@ -2,9 +2,12 @@
2
  Haiku API - OpenAI-compatible proxy for chatgpt.org/claude/chat
3
  Deploy to Hugging Face Spaces (Docker SDK)
4
 
5
- Auto-continues when upstream hits the ~1K token output limit.
6
- Uses rotating proxy with aggressive retries for unstable IPs.
7
- Sends SSE keep-alive comments during continuation gaps.
 
 
 
8
  """
9
 
10
  import asyncio
@@ -21,7 +24,7 @@ from fastapi import FastAPI, HTTPException, Request
21
  from fastapi.middleware.cors import CORSMiddleware
22
  from fastapi.responses import StreamingResponse, JSONResponse
23
 
24
- app = FastAPI(title="Haiku API", version="4.0.0")
25
 
26
  # ── CORS ─────────────────────────────────────────────────────────
27
  app.add_middleware(
@@ -70,10 +73,8 @@ class SessionState:
70
  if self.cookies and (now - self.last_refresh) < self.refresh_interval:
71
  return
72
 
73
- # Try multiple times with proxy rotation (new IP each request)
74
  for attempt in range(PROXY_MAX_RETRIES):
75
  try:
76
- # Create fresh client for each attempt (gets new proxy IP)
77
  if PROXY_URL and attempt > 0:
78
  try:
79
  await client.aclose()
@@ -119,7 +120,7 @@ class SessionState:
119
  self.csrf_token = csrf
120
  self.last_refresh = now
121
  print(f"[Session] OK β€” CSRF:{bool(csrf)} XSRF:{bool(xsrf)} Cookies:{list(new_cookies.keys())} (attempt {attempt+1})")
122
- return # Success!
123
 
124
  except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
125
  print(f"[Session] Proxy error attempt #{attempt+1}: {type(e).__name__}: {e}")
@@ -150,37 +151,263 @@ async def shutdown():
150
  await http_client.aclose()
151
 
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  # ── Message normalization ────────────────────────────────────────
154
- def normalize_messages(messages: list[dict]) -> list[dict]:
155
- """Normalize messages: content arrays β†’ plain text, strip extra fields."""
 
 
156
  result = []
157
- for msg in messages:
158
- role = msg.get("role", "user")
159
- content = msg.get("content", "")
160
 
161
- if isinstance(content, list):
162
- text_parts = []
163
- for part in content:
164
- if isinstance(part, str):
165
- text_parts.append(part)
166
- elif isinstance(part, dict):
167
- if part.get("type") == "text":
168
- text_parts.append(part.get("text", ""))
169
- content = "\n".join(text_parts)
170
 
171
- if content is None:
172
- content = ""
 
 
173
 
174
- content = str(content)
175
 
176
- if role == "system" and not content.strip():
 
 
 
 
 
 
 
 
 
 
 
177
  continue
178
 
179
- result.append({"role": role, "content": content})
 
 
 
 
 
 
 
180
 
181
  return result
182
 
183
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  def _headers() -> dict:
185
  h = {
186
  "Accept": "*/*",
@@ -197,6 +424,7 @@ def _headers() -> dict:
197
 
198
 
199
  # ── Proxy-aware request with retry ──────────────────────────────
 
200
  async def _proxy_post(url: str, **kwargs) -> httpx.Response:
201
  """POST with proxy retry logic. Creates new client on each retry to get fresh IP."""
202
  global http_client
@@ -204,54 +432,26 @@ async def _proxy_post(url: str, **kwargs) -> httpx.Response:
204
  for attempt in range(PROXY_MAX_RETRIES):
205
  try:
206
  resp = await http_client.post(url, **kwargs)
207
-
208
- # Proxy returned a non-connection error β€” return it
209
  return resp
210
 
211
  except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
212
  print(f"[Proxy] Connection error #{attempt+1}: {type(e).__name__}")
213
- # Recreate client with new proxy IP
214
  if PROXY_URL:
215
  try:
216
  await http_client.aclose()
217
  except:
218
  pass
219
  http_client = _make_client()
220
- # Re-apply session cookies
221
  await asyncio.sleep(PROXY_RETRY_DELAY)
222
  else:
223
  await asyncio.sleep(2)
224
  continue
225
 
226
- # All retries exhausted β€” return last attempt anyway
227
  return await http_client.post(url, **kwargs)
228
 
229
 
230
- async def _proxy_get(url: str, **kwargs) -> httpx.Response:
231
- """GET with proxy retry logic."""
232
- global http_client
233
-
234
- for attempt in range(PROXY_MAX_RETRIES):
235
- try:
236
- resp = await http_client.get(url, **kwargs)
237
- return resp
238
- except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
239
- print(f"[Proxy] GET error #{attempt+1}: {type(e).__name__}")
240
- if PROXY_URL:
241
- try:
242
- await http_client.aclose()
243
- except:
244
- pass
245
- http_client = _make_client()
246
- await asyncio.sleep(PROXY_RETRY_DELAY)
247
- else:
248
- await asyncio.sleep(2)
249
- continue
250
-
251
- return await http_client.get(url, **kwargs)
252
-
253
-
254
  # ── Raw call with retries ───────────────────────────────────────
 
255
  async def _raw_call(messages: list[dict], model: str) -> httpx.Response:
256
  """Make a single POST to chatgpt.org/api/chat with full retry logic."""
257
  await session.refresh(http_client)
@@ -268,7 +468,7 @@ async def _raw_call(messages: list[dict], model: str) -> httpx.Response:
268
  )
269
 
270
  if resp.status_code == 419 and attempt == 0:
271
- print("[Chat] 419 β†’ refreshing session...")
272
  session.last_refresh = 0
273
  await session.refresh(http_client)
274
  break
@@ -293,7 +493,8 @@ async def _raw_call(messages: list[dict], model: str) -> httpx.Response:
293
 
294
 
295
  async def _stream_one_response(resp):
296
- """Stream a single upstream SSE response in real-time."""
 
297
  finish_reason = None
298
 
299
  async for raw_line in resp.aiter_lines():
@@ -331,14 +532,15 @@ async def _stream_one_response(resp):
331
  # ── Streaming with auto-continue ────────────────────────────────
332
  MAX_CONTINUATIONS = 20
333
 
 
334
  async def _raw_call_streaming(messages: list[dict], model: str):
335
- """Like _raw_call but yields SSE keep-alive comments during retries."""
 
336
  await session.refresh(http_client)
337
  payload = {"model": model, "messages": messages}
338
 
339
  for attempt in range(2): # CSRF retry
340
  for rate_attempt in range(3): # 429 retry
341
- # Keep-alive before request
342
  yield ": thinking...\n\n"
343
 
344
  resp = await _proxy_post(
@@ -349,7 +551,7 @@ async def _raw_call_streaming(messages: list[dict], model: str):
349
  )
350
 
351
  if resp.status_code == 419 and attempt == 0:
352
- print("[Chat] 419 β†’ refreshing session...")
353
  session.last_refresh = 0
354
  await session.refresh(http_client)
355
  break
@@ -376,8 +578,13 @@ async def _raw_call_streaming(messages: list[dict], model: str):
376
  raise HTTPException(500, "Failed after retry")
377
 
378
 
379
- async def _stream_with_auto_continue(messages: list[dict], model: str):
380
- """Stream with real-time output, auto-continue, and keep-alive pings."""
 
 
 
 
 
381
  chunk_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
382
  created = int(time.time())
383
  conversation = list(messages)
@@ -407,6 +614,132 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
407
  if text:
408
  chunk_content += text
409
  total_content += text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
  sse_data = json.dumps({
411
  "id": chunk_id,
412
  "object": "chat.completion.chunk",
@@ -414,38 +747,46 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
414
  "model": model,
415
  "choices": [{
416
  "index": 0,
417
- "delta": {"content": text},
418
- "finish_reason": None,
419
  }],
420
  })
421
  yield f"data: {sse_data}\n\n"
 
 
422
 
423
- print(f"[Chat] Chunk #{cont_num+1}: {len(chunk_content)} chars, finish={finish_reason}")
424
-
425
- if finish_reason == "stop":
426
- sse_data = json.dumps({
427
- "id": chunk_id,
428
- "object": "chat.completion.chunk",
429
- "created": created,
430
- "model": model,
431
- "choices": [{
432
- "index": 0,
433
- "delta": {},
434
- "finish_reason": "stop",
435
- }],
436
- })
437
- yield f"data: {sse_data}\n\n"
438
- yield "data: [DONE]\n\n"
439
- return
440
 
 
441
  yield ": continuing...\n\n"
442
 
443
- conversation.append({"role": "assistant", "content": chunk_content})
444
- conversation.append({"role": "user", "content": "Continue exactly from where you left off. Do not repeat any text you already wrote."})
 
 
 
 
 
445
 
446
  print(f"[Chat] Auto-continue #{cont_num+1}, total so far: {len(total_content)} chars")
447
 
448
- # Safety
449
  sse_data = json.dumps({
450
  "id": chunk_id,
451
  "object": "chat.completion.chunk",
@@ -462,8 +803,10 @@ async def _stream_with_auto_continue(messages: list[dict], model: str):
462
 
463
 
464
  # ── Non-streaming with auto-continue ────────────────────────────
465
- async def _collect_with_auto_continue(messages: list[dict], model: str) -> str:
466
- """Collect the full response, auto-continuing if cut off."""
 
 
467
  conversation = list(messages)
468
  full_content = ""
469
 
@@ -482,16 +825,40 @@ async def _collect_with_auto_continue(messages: list[dict], model: str) -> str:
482
  full_content += content
483
  print(f"[Chat] Collect #{cont_num+1}: {len(content)} chars, finish={finish_reason}")
484
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
  if finish_reason == "stop":
486
- return full_content
 
 
 
487
 
488
- conversation.append({"role": "assistant", "content": content})
489
- conversation.append({"role": "user", "content": "Continue exactly from where you left off. Do not repeat any text you already wrote."})
 
 
 
 
 
490
 
491
- return full_content
492
 
493
 
494
  # ── OpenAI-compatible endpoint ──────────────────────────────────
 
495
  @app.post("/v1/chat/completions")
496
  @app.post("/chat/completions")
497
  async def chat_completions(request: Request):
@@ -506,18 +873,22 @@ async def chat_completions(request: Request):
506
  model = body.get("model", "anthropic/claude-haiku-4-5")
507
  messages_raw = body.get("messages", [])
508
  stream = body.get("stream", False)
 
 
509
 
510
  if not messages_raw or not isinstance(messages_raw, list):
511
  raise HTTPException(400, "messages must be a non-empty array")
512
 
513
- messages = normalize_messages(messages_raw)
 
 
514
 
515
  if not messages:
516
  raise HTTPException(400, "No valid messages after normalization")
517
 
518
  if stream:
519
  return StreamingResponse(
520
- _stream_with_auto_continue(messages, model),
521
  media_type="text/event-stream",
522
  headers={
523
  "Cache-Control": "no-cache",
@@ -526,22 +897,45 @@ async def chat_completions(request: Request):
526
  },
527
  )
528
  else:
529
- full_text = await _collect_with_auto_continue(messages, model)
530
- return JSONResponse({
531
- "id": f"chatcmpl-{int(time.time())}",
532
- "object": "chat.completion",
533
- "created": int(time.time()),
534
- "model": model,
535
- "choices": [{
536
- "index": 0,
537
- "message": {"role": "assistant", "content": full_text},
538
- "finish_reason": "stop",
539
- }],
540
- "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
541
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
 
543
 
544
  # ── Models / Health ─────────────────────────────────────────────
 
545
  @app.get("/v1/models")
546
  @app.get("/models")
547
  async def list_models():
@@ -557,8 +951,9 @@ async def list_models():
557
  async def root():
558
  return {
559
  "status": "ok",
560
- "version": "4.0.0",
561
  "proxy": bool(PROXY_URL),
 
562
  "endpoints": ["/v1/chat/completions", "/v1/models"],
563
  }
564
 
 
2
  Haiku API - OpenAI-compatible proxy for chatgpt.org/claude/chat
3
  Deploy to Hugging Face Spaces (Docker SDK)
4
 
5
+ Features:
6
+ - Tool/function calling support (converts OpenAI tools β†’ system prompt, parses output)
7
+ - Auto-continues when upstream hits the ~1K token output limit
8
+ - Rotating proxy with aggressive retries for unstable IPs
9
+ - SSE keep-alive comments during continuation gaps
10
+ - Message normalization for Orchids.app compatibility
11
  """
12
 
13
  import asyncio
 
24
  from fastapi.middleware.cors import CORSMiddleware
25
  from fastapi.responses import StreamingResponse, JSONResponse
26
 
27
+ app = FastAPI(title="Haiku API", version="5.0.0")
28
 
29
  # ── CORS ─────────────────────────────────────────────────────────
30
  app.add_middleware(
 
73
  if self.cookies and (now - self.last_refresh) < self.refresh_interval:
74
  return
75
 
 
76
  for attempt in range(PROXY_MAX_RETRIES):
77
  try:
 
78
  if PROXY_URL and attempt > 0:
79
  try:
80
  await client.aclose()
 
120
  self.csrf_token = csrf
121
  self.last_refresh = now
122
  print(f"[Session] OK β€” CSRF:{bool(csrf)} XSRF:{bool(xsrf)} Cookies:{list(new_cookies.keys())} (attempt {attempt+1})")
123
+ return
124
 
125
  except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
126
  print(f"[Session] Proxy error attempt #{attempt+1}: {type(e).__name__}: {e}")
 
151
  await http_client.aclose()
152
 
153
 
154
+ # ── Tool Calling Support ─────────────────────────────────────────
155
+
156
+ def _build_tool_system_prompt(tools: list[dict], tool_choice=None) -> str:
157
+ """Convert OpenAI tools format to a system prompt that instructs Claude
158
+ to output tool calls in a parseable format."""
159
+
160
+ tools_desc = []
161
+ for tool in tools:
162
+ func = tool.get("function", {})
163
+ name = func.get("name", "unknown")
164
+ desc = func.get("description", "No description")
165
+ params = func.get("parameters", {})
166
+
167
+ # Format parameters nicely
168
+ props = params.get("properties", {})
169
+ required = params.get("required", [])
170
+ param_lines = []
171
+ for pname, pdef in props.items():
172
+ ptype = pdef.get("type", "any")
173
+ pdesc = pdef.get("description", "")
174
+ req_flag = " (required)" if pname in required else " (optional)"
175
+ param_lines.append(f" - {pname}: {ptype}{req_flag} β€” {pdesc}")
176
+
177
+ params_text = "\n".join(param_lines) if param_lines else " (no parameters)"
178
+ tools_desc.append(f"### {name}\n{desc}\nParameters:\n{params_text}")
179
+
180
+ tools_text = "\n\n".join(tools_desc)
181
+
182
+ # Handle tool_choice
183
+ choice_instruction = ""
184
+ if tool_choice == "required":
185
+ choice_instruction = "\nIMPORTANT: You MUST call at least one tool. Do not respond with just text."
186
+ elif tool_choice == "none":
187
+ # Shouldn't reach here since we skip tool injection for "none"
188
+ choice_instruction = "\nDo NOT call any tools. Respond with text only."
189
+ elif isinstance(tool_choice, dict) and tool_choice.get("type") == "function":
190
+ fname = tool_choice.get("function", {}).get("name", "")
191
+ choice_instruction = f"\nIMPORTANT: You MUST call the {fname} function."
192
+
193
+ return f"""# Available Tools
194
+
195
+ You have access to the following tools that you can call:
196
+
197
+ {tools_text}
198
+
199
+ ## Tool Call Format
200
+
201
+ When you want to call a tool, you MUST use EXACTLY this XML format β€” one block per tool call:
202
+
203
+ <tool_call name="FUNCTION_NAME">
204
+ {"{"}"param1": "value1", "param2": "value2"{"}"}
205
+ </tool_call_>
206
+
207
+ Example β€” calling the Write tool:
208
+ <tool_call name="Write">
209
+ {"{"}"file_path": "hello.txt", "content": "hello world"{"}"}
210
+ </tool_call_>
211
+
212
+ ## Rules
213
+ - You may call multiple tools by using multiple <tool_call_> blocks in sequence
214
+ - The arguments inside the block MUST be valid JSON matching the tool's parameter schema
215
+ - If you need to call a tool, output ONLY <tool_call_> blocks β€” no explanatory text before or after
216
+ - If you don't need to call any tools, just respond normally with text (no <tool_call_> blocks)
217
+ - Do NOT wrap <tool_call_> blocks in markdown code blocks or any other formatting
218
+ {choice_instruction}"""
219
+
220
+
221
+ # Regex to parse <tool_call name="...">...</tool_call_> blocks
222
+ _TOOL_CALL_RE = re.compile(
223
+ r'<tool_call\s+name="([^"]+)">\s*(.*?)\s*</tool_call_>',
224
+ re.DOTALL
225
+ )
226
+
227
+ # Also try matching incomplete tool calls (for auto-continue detection)
228
+ _INCOMPLETE_TOOL_CALL_RE = re.compile(
229
+ r'<tool_call\s+name="([^"]+)">\s*(.*?)$',
230
+ re.DOTALL
231
+ )
232
+
233
+
234
+ def _parse_tool_calls(text: str) -> tuple[list[dict], str]:
235
+ """Parse tool calls from model text output.
236
+
237
+ Returns (tool_calls, remaining_text) where tool_calls is in OpenAI format.
238
+ If no tool calls found, returns ([], original_text).
239
+ """
240
+ matches = list(_TOOL_CALL_RE.finditer(text))
241
+
242
+ if not matches:
243
+ return [], text
244
+
245
+ tool_calls = []
246
+ # Collect text outside of tool call blocks
247
+ remaining_parts = []
248
+
249
+ last_end = 0
250
+ for match in matches:
251
+ # Text before this tool call
252
+ if match.start() > last_end:
253
+ before = text[last_end:match.start()].strip()
254
+ if before:
255
+ remaining_parts.append(before)
256
+ last_end = match.end()
257
+
258
+ func_name = match.group(1)
259
+ args_str = match.group(2).strip()
260
+
261
+ # Try to parse arguments as JSON
262
+ try:
263
+ args_json = json.loads(args_str)
264
+ args_final = json.dumps(args_json)
265
+ except json.JSONDecodeError:
266
+ # Try to fix common issues
267
+ # Sometimes Claude wraps args in markdown code block
268
+ args_cleaned = args_str.strip('`').strip()
269
+ if args_cleaned.startswith('json'):
270
+ args_cleaned = args_cleaned[4:].strip()
271
+ try:
272
+ args_json = json.loads(args_cleaned)
273
+ args_final = json.dumps(args_json)
274
+ except json.JSONDecodeError:
275
+ # Last resort: wrap the raw text as an argument
276
+ args_final = json.dumps({"raw_input": args_str})
277
+
278
+ tool_calls.append({
279
+ "id": f"call_{uuid.uuid4().hex[:24]}",
280
+ "type": "function",
281
+ "function": {
282
+ "name": func_name,
283
+ "arguments": args_final,
284
+ }
285
+ })
286
+
287
+ # Text after the last tool call
288
+ if last_end < len(text):
289
+ after = text[last_end:].strip()
290
+ if after:
291
+ remaining_parts.append(after)
292
+
293
+ remaining_text = "\n".join(remaining_parts)
294
+ return tool_calls, remaining_text
295
+
296
+
297
+ def _has_incomplete_tool_call(text: str) -> bool:
298
+ """Check if text has an opening <tool_call_>> tag without a matching close."""
299
+ opens = len(re.findall(r'<tool_call\s+name="[^"]+">', text))
300
+ closes = len(re.findall(r'</tool_call_>', text))
301
+ return opens > closes
302
+
303
+
304
  # ── Message normalization ────────────────────────────────────────
305
+
306
+ def normalize_messages(messages: list[dict], tools: list[dict] = None, tool_choice=None) -> list[dict]:
307
+ """Normalize messages: handle content arrays, tool roles, tool_calls,
308
+ and inject tool definitions into system prompt if tools are provided."""
309
  result = []
 
 
 
310
 
311
+ # If tools provided and tool_choice != "none", inject tool system prompt
312
+ inject_tools = tools and tool_choice != "none"
 
 
 
 
 
 
 
313
 
314
+ if inject_tools:
315
+ tool_system = _build_tool_system_prompt(tools, tool_choice)
316
+ else:
317
+ tool_system = None
318
 
319
+ system_injected = False
320
 
321
+ for msg in messages:
322
+ role = msg.get("role", "user")
323
+
324
+ # Inject tool system prompt before or as the first system message
325
+ if role == "system" and not system_injected and tool_system:
326
+ content = msg.get("content", "")
327
+ if isinstance(content, list):
328
+ content = _flatten_content_array(content)
329
+ content = str(content) if content else ""
330
+ combined = content + "\n\n" + tool_system if content.strip() else tool_system
331
+ result.append({"role": "system", "content": combined})
332
+ system_injected = True
333
  continue
334
 
335
+ result.append(_normalize_one_message(msg))
336
+
337
+ # If no system message existed, add tool system prompt as first message
338
+ if tool_system and not system_injected:
339
+ result.insert(0, {"role": "system", "content": tool_system})
340
+
341
+ # Filter out empty system messages
342
+ result = [m for m in result if not (m.get("role") == "system" and not m.get("content", "").strip())]
343
 
344
  return result
345
 
346
 
347
+ def _flatten_content_array(content: list) -> str:
348
+ """Convert a content array to plain text."""
349
+ text_parts = []
350
+ for part in content:
351
+ if isinstance(part, str):
352
+ text_parts.append(part)
353
+ elif isinstance(part, dict):
354
+ if part.get("type") == "text":
355
+ text_parts.append(part.get("text", ""))
356
+ return "\n".join(text_parts)
357
+
358
+
359
+ def _normalize_one_message(msg: dict) -> dict:
360
+ """Normalize a single message for chatgpt.org API."""
361
+ role = msg.get("role", "user")
362
+ content = msg.get("content", "")
363
+
364
+ # Handle content arrays β†’ plain text
365
+ if isinstance(content, list):
366
+ content = _flatten_content_array(content)
367
+
368
+ if content is None:
369
+ content = ""
370
+ content = str(content)
371
+
372
+ # Handle tool role messages β†’ convert to user message with tool result
373
+ if role == "tool":
374
+ tool_name = msg.get("name", "unknown_tool")
375
+ tool_call_id = msg.get("tool_call_id", "")
376
+ return {
377
+ "role": "user",
378
+ "content": f"[Tool Result for {tool_name} (id: {tool_call_id})]:\n{content}"
379
+ }
380
+
381
+ # Handle assistant messages with tool_calls β†’ text with <tool_call_> blocks
382
+ if role == "assistant" and msg.get("tool_calls"):
383
+ parts = []
384
+ regular_content = content if content and content.strip() else ""
385
+
386
+ if regular_content:
387
+ parts.append(regular_content)
388
+
389
+ for tc in msg["tool_calls"]:
390
+ func = tc.get("function", {})
391
+ name = func.get("name", "unknown")
392
+ args = func.get("arguments", "{}")
393
+ # Validate args is valid JSON
394
+ try:
395
+ json.loads(args)
396
+ except (json.JSONDecodeError, TypeError):
397
+ args = "{}"
398
+ parts.append(f'<tool_call name="{name}">\n{args}\n</tool_call_>')
399
+
400
+ return {"role": "assistant", "content": "\n\n".join(parts)}
401
+
402
+ # System messages with empty content get filtered out later
403
+ if role == "system" and not content.strip():
404
+ return {"role": "system", "content": ""}
405
+
406
+ return {"role": role, "content": content}
407
+
408
+
409
+ # ── Headers ──────────────────────────────────────────────────────
410
+
411
  def _headers() -> dict:
412
  h = {
413
  "Accept": "*/*",
 
424
 
425
 
426
  # ── Proxy-aware request with retry ──────────────────────────────
427
+
428
  async def _proxy_post(url: str, **kwargs) -> httpx.Response:
429
  """POST with proxy retry logic. Creates new client on each retry to get fresh IP."""
430
  global http_client
 
432
  for attempt in range(PROXY_MAX_RETRIES):
433
  try:
434
  resp = await http_client.post(url, **kwargs)
 
 
435
  return resp
436
 
437
  except (httpx.ConnectError, httpx.ProxyError, httpx.TimeoutException) as e:
438
  print(f"[Proxy] Connection error #{attempt+1}: {type(e).__name__}")
 
439
  if PROXY_URL:
440
  try:
441
  await http_client.aclose()
442
  except:
443
  pass
444
  http_client = _make_client()
 
445
  await asyncio.sleep(PROXY_RETRY_DELAY)
446
  else:
447
  await asyncio.sleep(2)
448
  continue
449
 
 
450
  return await http_client.post(url, **kwargs)
451
 
452
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  # ── Raw call with retries ───────────────────────────────────────
454
+
455
  async def _raw_call(messages: list[dict], model: str) -> httpx.Response:
456
  """Make a single POST to chatgpt.org/api/chat with full retry logic."""
457
  await session.refresh(http_client)
 
468
  )
469
 
470
  if resp.status_code == 419 and attempt == 0:
471
+ print("[Chat] 419 -> refreshing session...")
472
  session.last_refresh = 0
473
  await session.refresh(http_client)
474
  break
 
493
 
494
 
495
  async def _stream_one_response(resp):
496
+ """Stream a single upstream SSE response in real-time.
497
+ Yields (text, finish_reason) tuples. finish_reason is None for text chunks."""
498
  finish_reason = None
499
 
500
  async for raw_line in resp.aiter_lines():
 
532
  # ── Streaming with auto-continue ────────────────────────────────
533
  MAX_CONTINUATIONS = 20
534
 
535
+
536
  async def _raw_call_streaming(messages: list[dict], model: str):
537
+ """Like _raw_call but yields SSE keep-alive comments during retries,
538
+ then yields the httpx.Response object."""
539
  await session.refresh(http_client)
540
  payload = {"model": model, "messages": messages}
541
 
542
  for attempt in range(2): # CSRF retry
543
  for rate_attempt in range(3): # 429 retry
 
544
  yield ": thinking...\n\n"
545
 
546
  resp = await _proxy_post(
 
551
  )
552
 
553
  if resp.status_code == 419 and attempt == 0:
554
+ print("[Chat] 419 -> refreshing session...")
555
  session.last_refresh = 0
556
  await session.refresh(http_client)
557
  break
 
578
  raise HTTPException(500, "Failed after retry")
579
 
580
 
581
+ async def _stream_with_auto_continue(messages: list[dict], model: str, has_tools: bool = False):
582
+ """Stream with real-time output, auto-continue, and keep-alive pings.
583
+
584
+ When has_tools is True, we buffer the full response to properly detect
585
+ and format tool calls, sending keep-alive pings while buffering.
586
+ When has_tools is False, we stream text in real-time.
587
+ """
588
  chunk_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
589
  created = int(time.time())
590
  conversation = list(messages)
 
614
  if text:
615
  chunk_content += text
616
  total_content += text
617
+
618
+ # If no tools, stream text in real-time
619
+ if not has_tools:
620
+ sse_data = json.dumps({
621
+ "id": chunk_id,
622
+ "object": "chat.completion.chunk",
623
+ "created": created,
624
+ "model": model,
625
+ "choices": [{
626
+ "index": 0,
627
+ "delta": {"content": text},
628
+ "finish_reason": None,
629
+ }],
630
+ })
631
+ yield f"data: {sse_data}\n\n"
632
+
633
+ print(f"[Chat] Chunk #{cont_num+1}: {len(chunk_content)} chars, finish={finish_reason}")
634
+
635
+ # Check for tool calls
636
+ if has_tools:
637
+ tool_calls, remaining_text = _parse_tool_calls(total_content)
638
+
639
+ if tool_calls:
640
+ # Emit tool calls as OpenAI streaming chunks
641
+ for i, tc in enumerate(tool_calls):
642
+ # First chunk: role + tool_call with id, name, and start of arguments
643
+ sse_start = json.dumps({
644
+ "id": chunk_id,
645
+ "object": "chat.completion.chunk",
646
+ "created": created,
647
+ "model": model,
648
+ "choices": [{
649
+ "index": 0,
650
+ "delta": {
651
+ "role": "assistant",
652
+ "tool_calls": [{
653
+ "index": i,
654
+ "id": tc["id"],
655
+ "type": "function",
656
+ "function": {
657
+ "name": tc["function"]["name"],
658
+ "arguments": "",
659
+ }
660
+ }]
661
+ },
662
+ "finish_reason": None,
663
+ }],
664
+ })
665
+ yield f"data: {sse_start}\n\n"
666
+
667
+ # Argument chunks β€” split into small pieces for streaming feel
668
+ args = tc["function"]["arguments"]
669
+ chunk_size = max(1, len(args) // 3)
670
+ for offset in range(0, len(args), chunk_size):
671
+ arg_piece = args[offset:offset + chunk_size]
672
+ sse_arg = json.dumps({
673
+ "id": chunk_id,
674
+ "object": "chat.completion.chunk",
675
+ "created": created,
676
+ "model": model,
677
+ "choices": [{
678
+ "index": 0,
679
+ "delta": {
680
+ "tool_calls": [{
681
+ "index": i,
682
+ "function": {
683
+ "arguments": arg_piece,
684
+ }
685
+ }]
686
+ },
687
+ "finish_reason": None,
688
+ }],
689
+ })
690
+ yield f"data: {sse_arg}\n\n"
691
+
692
+ # If there's remaining text alongside tool calls, emit it too
693
+ if remaining_text.strip():
694
+ sse_text = json.dumps({
695
+ "id": chunk_id,
696
+ "object": "chat.completion.chunk",
697
+ "created": created,
698
+ "model": model,
699
+ "choices": [{
700
+ "index": 0,
701
+ "delta": {"content": remaining_text},
702
+ "finish_reason": None,
703
+ }],
704
+ })
705
+ yield f"data: {sse_text}\n\n"
706
+
707
+ # Final chunk with finish_reason
708
+ sse_done = json.dumps({
709
+ "id": chunk_id,
710
+ "object": "chat.completion.chunk",
711
+ "created": created,
712
+ "model": model,
713
+ "choices": [{
714
+ "index": 0,
715
+ "delta": {},
716
+ "finish_reason": "tool_calls",
717
+ }],
718
+ })
719
+ yield f"data: {sse_done}\n\n"
720
+ yield "data: [DONE]\n\n"
721
+ return
722
+
723
+ # No tool calls found β€” if text is complete, stream it as content
724
+ if finish_reason == "stop":
725
+ # Stream the buffered text content as chunks
726
+ text_to_stream = total_content
727
+ chunk_sz = 50 # characters per streaming chunk
728
+ for offset in range(0, len(text_to_stream), chunk_sz):
729
+ piece = text_to_stream[offset:offset + chunk_sz]
730
+ sse_data = json.dumps({
731
+ "id": chunk_id,
732
+ "object": "chat.completion.chunk",
733
+ "created": created,
734
+ "model": model,
735
+ "choices": [{
736
+ "index": 0,
737
+ "delta": {"content": piece},
738
+ "finish_reason": None,
739
+ }],
740
+ })
741
+ yield f"data: {sse_data}\n\n"
742
+
743
  sse_data = json.dumps({
744
  "id": chunk_id,
745
  "object": "chat.completion.chunk",
 
747
  "model": model,
748
  "choices": [{
749
  "index": 0,
750
+ "delta": {},
751
+ "finish_reason": "stop",
752
  }],
753
  })
754
  yield f"data: {sse_data}\n\n"
755
+ yield "data: [DONE]\n\n"
756
+ return
757
 
758
+ else:
759
+ # No tools β€” original behavior
760
+ if finish_reason == "stop":
761
+ sse_data = json.dumps({
762
+ "id": chunk_id,
763
+ "object": "chat.completion.chunk",
764
+ "created": created,
765
+ "model": model,
766
+ "choices": [{
767
+ "index": 0,
768
+ "delta": {},
769
+ "finish_reason": "stop",
770
+ }],
771
+ })
772
+ yield f"data: {sse_data}\n\n"
773
+ yield "data: [DONE]\n\n"
774
+ return
775
 
776
+ # Auto-continue for length-limited responses
777
  yield ": continuing...\n\n"
778
 
779
+ # Check if we're in the middle of a tool call
780
+ if _has_incomplete_tool_call(chunk_content):
781
+ conversation.append({"role": "assistant", "content": chunk_content})
782
+ conversation.append({"role": "user", "content": "Continue the tool call exactly from where you left off. Do not repeat the opening tag or any arguments you already wrote."})
783
+ else:
784
+ conversation.append({"role": "assistant", "content": chunk_content})
785
+ conversation.append({"role": "user", "content": "Continue exactly from where you left off. Do not repeat any text you already wrote."})
786
 
787
  print(f"[Chat] Auto-continue #{cont_num+1}, total so far: {len(total_content)} chars")
788
 
789
+ # Safety: max continuations reached
790
  sse_data = json.dumps({
791
  "id": chunk_id,
792
  "object": "chat.completion.chunk",
 
803
 
804
 
805
  # ── Non-streaming with auto-continue ────────────────────────────
806
+
807
+ async def _collect_with_auto_continue(messages: list[dict], model: str, has_tools: bool = False) -> dict:
808
+ """Collect the full response, auto-continuing if cut off.
809
+ Returns a dict with either 'content' or 'tool_calls' key."""
810
  conversation = list(messages)
811
  full_content = ""
812
 
 
825
  full_content += content
826
  print(f"[Chat] Collect #{cont_num+1}: {len(content)} chars, finish={finish_reason}")
827
 
828
+ # Check for tool calls if tools were provided
829
+ if has_tools:
830
+ tool_calls, remaining_text = _parse_tool_calls(full_content)
831
+
832
+ if tool_calls:
833
+ result = {
834
+ "tool_calls": tool_calls,
835
+ "content": remaining_text if remaining_text.strip() else None,
836
+ }
837
+ # If there are incomplete tool calls, continue
838
+ if _has_incomplete_tool_call(full_content) and finish_reason == "length":
839
+ pass # fall through to auto-continue
840
+ else:
841
+ return result
842
+
843
  if finish_reason == "stop":
844
+ if has_tools:
845
+ # No tool calls found, return as text
846
+ return {"content": full_content, "tool_calls": None}
847
+ return {"content": full_content, "tool_calls": None}
848
 
849
+ # Auto-continue
850
+ if _has_incomplete_tool_call(content):
851
+ conversation.append({"role": "assistant", "content": content})
852
+ conversation.append({"role": "user", "content": "Continue the tool call exactly from where you left off. Do not repeat the opening tag or any arguments you already wrote."})
853
+ else:
854
+ conversation.append({"role": "assistant", "content": content})
855
+ conversation.append({"role": "user", "content": "Continue exactly from where you left off. Do not repeat any text you already wrote."})
856
 
857
+ return {"content": full_content, "tool_calls": None}
858
 
859
 
860
  # ── OpenAI-compatible endpoint ──────────────────────────────────
861
+
862
  @app.post("/v1/chat/completions")
863
  @app.post("/chat/completions")
864
  async def chat_completions(request: Request):
 
873
  model = body.get("model", "anthropic/claude-haiku-4-5")
874
  messages_raw = body.get("messages", [])
875
  stream = body.get("stream", False)
876
+ tools = body.get("tools") or None
877
+ tool_choice = body.get("tool_choice", "auto")
878
 
879
  if not messages_raw or not isinstance(messages_raw, list):
880
  raise HTTPException(400, "messages must be a non-empty array")
881
 
882
+ has_tools = bool(tools) and tool_choice != "none"
883
+
884
+ messages = normalize_messages(messages_raw, tools=tools, tool_choice=tool_choice)
885
 
886
  if not messages:
887
  raise HTTPException(400, "No valid messages after normalization")
888
 
889
  if stream:
890
  return StreamingResponse(
891
+ _stream_with_auto_continue(messages, model, has_tools=has_tools),
892
  media_type="text/event-stream",
893
  headers={
894
  "Cache-Control": "no-cache",
 
897
  },
898
  )
899
  else:
900
+ result = await _collect_with_auto_continue(messages, model, has_tools=has_tools)
901
+
902
+ tool_calls = result.get("tool_calls")
903
+ content = result.get("content")
904
+
905
+ if tool_calls:
906
+ return JSONResponse({
907
+ "id": f"chatcmpl-{int(time.time())}",
908
+ "object": "chat.completion",
909
+ "created": int(time.time()),
910
+ "model": model,
911
+ "choices": [{
912
+ "index": 0,
913
+ "message": {
914
+ "role": "assistant",
915
+ "content": content,
916
+ "tool_calls": tool_calls,
917
+ },
918
+ "finish_reason": "tool_calls",
919
+ }],
920
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
921
+ })
922
+ else:
923
+ return JSONResponse({
924
+ "id": f"chatcmpl-{int(time.time())}",
925
+ "object": "chat.completion",
926
+ "created": int(time.time()),
927
+ "model": model,
928
+ "choices": [{
929
+ "index": 0,
930
+ "message": {"role": "assistant", "content": content or ""},
931
+ "finish_reason": "stop",
932
+ }],
933
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
934
+ })
935
 
936
 
937
  # ── Models / Health ─────────────────────────────────────────────
938
+
939
  @app.get("/v1/models")
940
  @app.get("/models")
941
  async def list_models():
 
951
  async def root():
952
  return {
953
  "status": "ok",
954
+ "version": "5.0.0",
955
  "proxy": bool(PROXY_URL),
956
+ "tool_calling": True,
957
  "endpoints": ["/v1/chat/completions", "/v1/models"],
958
  }
959