bluewinliang commited on
Commit
1636cdb
·
verified ·
1 Parent(s): 96877a8

Upload proxy_handler.py

Browse files
Files changed (1) hide show
  1. proxy_handler.py +164 -354
proxy_handler.py CHANGED
@@ -1,11 +1,7 @@
1
  """
2
  Proxy handler for Z.AI API requests
3
  """
4
- import json
5
- import logging
6
- import re
7
- import time
8
- import uuid
9
  from typing import AsyncGenerator, Dict, Any, Tuple
10
 
11
  import httpx
@@ -14,10 +10,7 @@ from fastapi.responses import StreamingResponse
14
 
15
  from config import settings
16
  from cookie_manager import cookie_manager
17
- from models import (
18
- ChatCompletionRequest,
19
- ChatCompletionResponse,
20
- )
21
 
22
  logger = logging.getLogger(__name__)
23
 
@@ -30,116 +23,70 @@ class ProxyHandler:
30
  http2=True,
31
  )
32
 
33
- async def __aenter__(self):
34
- return self
35
-
36
- async def __aexit__(self, exc_type, exc_val, exc_tb):
37
- await self.client.aclose()
38
-
39
- # ------------------------------------------------------------------
40
- # 內容處理相關工具
41
- # ------------------------------------------------------------------
42
- def _balance_think_tag(self, text: str) -> str:
43
- """確保 <think> 與 </think> 標籤數量相符"""
44
- open_cnt = len(re.findall(r"<think>", text))
45
- close_cnt = len(re.findall(r"</think>", text))
46
- if open_cnt > close_cnt:
47
- text += "</think>" * (open_cnt - close_cnt)
48
- elif close_cnt > open_cnt:
49
- extra_closes = close_cnt - open_cnt
50
- for _ in range(extra_closes):
51
- text = re.sub(r"</think>(?!</think>)(?![^<]*</think>)$", "", text, count=1)
52
- return text
53
-
54
- def _clean_thinking_content(self, content: str) -> str:
55
- """移除 HTML 標籤但保留思考文字"""
56
- if not content:
57
- return content
58
- content = re.sub(r'<details[^>]*>', '', content)
59
- content = re.sub(r'</details>', '', content)
60
- content = re.sub(r'<summary[^>]*>.*?</summary>', '', content, flags=re.DOTALL)
61
- content = re.sub(r'<[^>]+>', '', content)
62
- content = re.sub(r'^>\s*', '', content)
63
- content = re.sub(r'\n>\s*', '\n', content)
64
- return content.strip()
65
-
66
- def _clean_answer_content(self, content: str) -> str:
67
- """僅移除 <details> 區塊,保留其餘 Markdown"""
68
- if not content:
69
- return content
70
- return re.sub(r"<details[^>]*>.*?</details>", "", content, flags=re.DOTALL)
71
-
72
- def transform_content(self, content: str) -> str:
73
- """將上游 HTML 轉成 <think> 標籤"""
74
- if not content:
75
- return content
76
- if settings.SHOW_THINK_TAGS:
77
- content = re.sub(r"<details[^>]*>", "<think>", content)
78
- content = re.sub(r"</details>", "</think>", content)
79
- content = re.sub(r"<summary>.*?</summary>", "", content, flags=re.DOTALL)
80
- content = self._balance_think_tag(content)
81
- else:
82
- content = re.sub(r"<details[^>]*>.*?</details>", "", content, flags=re.DOTALL)
83
- return content.strip()
84
-
85
- def _serialize_messages(self, messages) -> list:
86
- """將 ChatMessage 物件序列化成 dict 方便 JSON 傳輸"""
87
- serialized_messages = []
88
- for message in messages:
89
- if hasattr(message, "dict"):
90
- serialized_messages.append(message.dict())
91
- elif hasattr(message, "model_dump"):
92
- serialized_messages.append(message.model_dump())
93
- elif isinstance(message, dict):
94
- serialized_messages.append(message)
95
- else:
96
- serialized_messages.append(
97
- {
98
- "role": getattr(message, "role", "user"),
99
- "content": getattr(message, "content", str(message)),
100
- }
101
- )
102
- return serialized_messages
103
-
104
- # ------------------------------------------------------------------
105
- # 與上游 API 溝通
106
- # ------------------------------------------------------------------
107
- async def _prepare_upstream(self, request: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str]:
108
- """產生送往上游的 body、headers 與 cookie"""
109
- cookie = await cookie_manager.get_next_cookie()
110
- if not cookie:
111
- raise HTTPException(status_code=503, detail="No available cookies")
112
-
113
- target_model = settings.UPSTREAM_MODEL if request.model == settings.MODEL_NAME else request.model
114
-
115
- req_body = {
116
  "stream": True,
117
- "model": target_model,
118
- "messages": self._serialize_messages(request.messages),
119
  "background_tasks": {"title_generation": True, "tags_generation": True},
120
  "chat_id": str(uuid.uuid4()),
121
  "features": {
122
- "image_generation": False,
123
- "code_interpreter": False,
124
- "web_search": False,
125
- "auto_web_search": False,
126
- "enable_thinking": True,
127
  },
128
  "id": str(uuid.uuid4()),
129
  "mcp_servers": ["deep-web-search"],
130
- "model_item": {"id": target_model, "name": "GLM-4.5", "owned_by": "openai"},
131
- "params": {},
132
- "tool_servers": [],
133
  "variables": {
134
- "{{USER_NAME}}": "User",
135
- "{{USER_LOCATION}}": "Unknown",
136
  "{{CURRENT_DATETIME}}": time.strftime("%Y-%m-%d %H:%M:%S"),
137
  },
138
  }
139
-
140
  headers = {
141
  "Content-Type": "application/json",
142
- "Authorization": f"Bearer {cookie}",
143
  "User-Agent": (
144
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
145
  "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
@@ -153,14 +100,10 @@ class ProxyHandler:
153
  "Origin": "https://chat.z.ai",
154
  "Referer": "https://chat.z.ai/",
155
  }
 
156
 
157
- return req_body, headers, cookie
158
-
159
- # ------------------------------------------------------------------
160
- # 串流模式
161
- # ------------------------------------------------------------------
162
- async def stream_proxy_response(self, request: ChatCompletionRequest) -> AsyncGenerator[str, None]:
163
- """處理串流回傳"""
164
  client = None
165
  try:
166
  client = httpx.AsyncClient(
@@ -168,204 +111,111 @@ class ProxyHandler:
168
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
169
  http2=True,
170
  )
171
- req_body, headers, cookie = await self._prepare_upstream(request)
172
- completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
173
-
174
- think_open = False # 是否已開啟 <think>
175
- current_phase = None # 追蹤 phase 切換
176
-
177
- async with client.stream("POST", settings.UPSTREAM_URL, json=req_body, headers=headers) as response:
178
- if response.status_code != 200:
179
- await cookie_manager.mark_cookie_invalid(cookie)
180
- error_chunk = {
181
- "id": completion_id,
182
  "object": "chat.completion.chunk",
183
  "created": int(time.time()),
184
- "model": request.model,
185
- "choices": [
186
- {
187
- "index": 0,
188
- "delta": {"content": f"Error: Upstream API returned {response.status_code}"},
189
- "finish_reason": "stop",
190
- }
191
- ],
192
  }
193
- yield f"data: {json.dumps(error_chunk)}\n\n"
194
- yield "data: [DONE]\n\n"
195
- return
196
 
197
- async for raw_chunk in response.aiter_text():
198
- if not raw_chunk or raw_chunk.isspace():
199
- continue
200
-
201
- for line in raw_chunk.split("\n"):
202
  line = line.strip()
203
- if not line or line.startswith(":"):
204
- continue
205
- if not line.startswith("data: "):
206
- continue
207
-
208
  payload = line[6:]
209
- if payload == "[DONE]":
210
- # 結尾時若 <think> 未關閉,自動補上
211
  if think_open and settings.SHOW_THINK_TAGS:
212
- close_chunk = {
213
- "id": completion_id,
214
- "object": "chat.completion.chunk",
215
- "created": int(time.time()),
216
- "model": request.model,
217
- "choices": [
218
- {
219
- "index": 0,
220
- "delta": {"content": "</think>\n"},
221
- "finish_reason": None,
222
- }
223
- ],
224
  }
225
- yield f"data: {json.dumps(close_chunk)}\n\n"
226
-
227
- final_chunk = {
228
- "id": completion_id,
229
- "object": "chat.completion.chunk",
230
- "created": int(time.time()),
231
- "model": request.model,
232
- "choices": [
233
- {"index": 0, "delta": {}, "finish_reason": "stop"}
234
- ],
235
  }
236
- yield f"data: {json.dumps(final_chunk)}\n\n"
237
- yield "data: [DONE]\n\n"
238
- return
239
 
240
  try:
241
  parsed = json.loads(payload)
242
  except json.JSONDecodeError:
243
  continue
 
 
244
 
245
- data = parsed.get("data", {})
246
- delta_content = data.get("delta_content", "")
247
- phase = data.get("phase")
248
-
249
- # --------------------------------------------------
250
- # phase 變化處理
251
- # --------------------------------------------------
252
- if phase != current_phase:
253
- current_phase = phase
254
- # 由 thinking → answer 時,先補 </think> 並帶換行
255
- if (
256
- phase == "answer"
257
- and think_open
258
- and settings.SHOW_THINK_TAGS
259
- ):
260
  auto_close = {
261
- "id": completion_id,
262
- "object": "chat.completion.chunk",
263
- "created": int(time.time()),
264
- "model": request.model,
265
- "choices": [
266
- {
267
- "index": 0,
268
- "delta": {"content": "</think>\n"},
269
- "finish_reason": None,
270
- }
271
- ],
272
  }
273
  yield f"data: {json.dumps(auto_close)}\n\n"
274
- think_open = False
275
-
276
- # --------------------------------------------------
277
- # 真正內容輸出
278
- # --------------------------------------------------
279
- if phase == "thinking" and not settings.SHOW_THINK_TAGS:
280
- # 不顯示思考內容就跳過
281
- continue
282
-
283
- if delta_content:
284
- if phase == "thinking" and settings.SHOW_THINK_TAGS:
285
- # 若第一次進入思考階段,先輸出 <think>
286
- if not think_open:
287
- think_chunk = {
288
- "id": completion_id,
289
- "object": "chat.completion.chunk",
290
- "created": int(time.time()),
291
- "model": request.model,
292
- "choices": [
293
- {
294
- "index": 0,
295
- "delta": {"content": "<think>"},
296
- "finish_reason": None,
297
- }
298
- ],
299
- }
300
- yield f"data: {json.dumps(think_chunk)}\n\n"
301
- think_open = True
302
 
303
- transformed_content = self._clean_thinking_content(delta_content)
304
- else:
305
- # answer 階段
306
- transformed_content = self._clean_answer_content(delta_content)
307
 
308
- if transformed_content:
309
- chunk = {
310
- "id": completion_id,
311
- "object": "chat.completion.chunk",
312
- "created": int(time.time()),
313
- "model": request.model,
314
- "choices": [
315
- {
316
- "index": 0,
317
- "delta": {"content": transformed_content},
318
- "finish_reason": None,
319
- }
320
- ],
321
  }
322
- yield f"data: {json.dumps(chunk)}\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
 
324
  except httpx.RequestError as e:
325
  logger.error(f"Request error: {e}")
326
- error_chunk = {
327
  "id": f"chatcmpl-{uuid.uuid4().hex[:29]}",
328
  "object": "chat.completion.chunk",
329
  "created": int(time.time()),
330
- "model": request.model,
331
- "choices": [
332
- {
333
- "index": 0,
334
- "delta": {"content": f"Connection error: {str(e)}"},
335
- "finish_reason": "stop",
336
- }
337
- ],
338
  }
339
- yield f"data: {json.dumps(error_chunk)}\n\n"
340
- yield "data: [DONE]\n\n"
341
-
342
  except Exception as e:
343
- logger.error(f"Unexpected error in streaming: {e}")
344
- error_chunk = {
345
  "id": f"chatcmpl-{uuid.uuid4().hex[:29]}",
346
  "object": "chat.completion.chunk",
347
  "created": int(time.time()),
348
- "model": request.model,
349
- "choices": [
350
- {
351
- "index": 0,
352
- "delta": {"content": f"Internal error: {str(e)}"},
353
- "finish_reason": "stop",
354
- }
355
- ],
356
  }
357
- yield f"data: {json.dumps(error_chunk)}\n\n"
358
- yield "data: [DONE]\n\n"
359
-
360
  finally:
361
- if client:
362
- await client.aclose()
363
 
364
- # ------------------------------------------------------------------
365
- # 非串流模式
366
- # ------------------------------------------------------------------
367
- async def non_stream_proxy_response(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
368
- """處理非串流回傳"""
369
  client = None
370
  try:
371
  client = httpx.AsyncClient(
@@ -373,98 +223,58 @@ class ProxyHandler:
373
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
374
  http2=True,
375
  )
376
- req_body, headers, cookie = await self._prepare_upstream(request)
377
-
378
- thinking_buf, answer_buf = [], []
379
- current_phase = None
380
-
381
- async with client.stream("POST", settings.UPSTREAM_URL, json=req_body, headers=headers) as response:
382
- if response.status_code != 200:
383
- await cookie_manager.mark_cookie_invalid(cookie)
384
- raise HTTPException(status_code=response.status_code, detail="Upstream API error")
385
-
386
- async for raw_chunk in response.aiter_text():
387
- if not raw_chunk or raw_chunk.isspace():
388
- continue
389
-
390
- for line in raw_chunk.split("\n"):
391
  line = line.strip()
392
- if not line or line.startswith(":") or not line.startswith("data: "):
393
- continue
394
-
395
  payload = line[6:]
396
- if payload == "[DONE]":
397
- break
398
-
399
- try:
400
- parsed = json.loads(payload)
401
- except json.JSONDecodeError:
402
- continue
403
-
404
- data = parsed.get("data", {})
405
- delta_content = data.get("delta_content", "")
406
- phase = data.get("phase")
407
-
408
- if phase != current_phase:
409
- current_phase = phase
410
-
411
- if delta_content:
412
- if phase == "thinking":
413
- thinking_buf.append(delta_content)
414
- elif phase == "answer":
415
- answer_buf.append(delta_content)
416
-
417
- # 組裝最終文字
418
- final_text = ""
419
- if settings.SHOW_THINK_TAGS and thinking_buf:
420
- thinking_text = self._clean_thinking_content("".join(thinking_buf))
421
- answer_text = self._clean_answer_content("".join(answer_buf)) if answer_buf else ""
422
- if thinking_text:
423
- # 在 </think> 與正文間補換行,避免首字被吃掉
424
- final_text = f"<think>{thinking_text}</think>\n{answer_text}"
425
- else:
426
- final_text = answer_text
427
  else:
428
- final_text = self._clean_answer_content("".join(answer_buf))
429
 
430
  return ChatCompletionResponse(
431
  id=f"chatcmpl-{uuid.uuid4().hex[:29]}",
432
  created=int(time.time()),
433
- model=request.model,
434
- choices=[
435
- {
436
- "index": 0,
437
- "message": {"role": "assistant", "content": final_text},
438
- "finish_reason": "stop",
439
- }
440
- ],
441
  )
442
-
443
  except httpx.RequestError as e:
444
- logger.error(f"Request error in non-stream: {e}")
445
- if "cookie" in locals():
446
- await cookie_manager.mark_cookie_invalid(cookie)
447
- raise HTTPException(status_code=502, detail="Upstream connection error")
448
-
449
  except Exception as e:
450
- logger.error(f"Unexpected error in non-stream: {e}")
451
- raise HTTPException(status_code=500, detail="Internal server error")
452
-
453
  finally:
454
- if client:
455
- await client.aclose()
456
 
457
- # ------------------------------------------------------------------
458
- # FastAPI 入口
459
- # ------------------------------------------------------------------
460
- async def handle_chat_completion(self, request: ChatCompletionRequest):
461
- """聊天補全主入口"""
462
- stream = bool(request.stream) if request.stream is not None else settings.DEFAULT_STREAM
463
  if stream:
464
  return StreamingResponse(
465
- self.stream_proxy_response(request),
466
  media_type="text/event-stream",
467
  headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
468
  )
469
- else:
470
- return await self.non_stream_proxy_response(request)
 
1
  """
2
  Proxy handler for Z.AI API requests
3
  """
4
+ import json, logging, re, time, uuid
 
 
 
 
5
  from typing import AsyncGenerator, Dict, Any, Tuple
6
 
7
  import httpx
 
10
 
11
  from config import settings
12
  from cookie_manager import cookie_manager
13
+ from models import ChatCompletionRequest, ChatCompletionResponse
 
 
 
14
 
15
  logger = logging.getLogger(__name__)
16
 
 
23
  http2=True,
24
  )
25
 
26
+ async def __aenter__(self): return self
27
+ async def __aexit__(self, exc_type, exc_val, exc_tb): await self.client.aclose()
28
+
29
+ # ---------- text utilities ----------
30
+ def _balance_think_tag(self, txt: str) -> str:
31
+ opens, closes = len(re.findall(r"
32
+
33
+ <details type="reasoning" done="true" duration="0">
34
+ <summary>Thought for 0 seconds</summary>
35
+ > ", txt)), len(re.findall(r"
36
+ </details>
37
+ ", txt))
38
+ if opens > closes: txt += "</think>" * (opens - closes)
39
+ elif closes > opens:
40
+ for _ in range(closes - opens):
41
+ txt = re.sub(r"</think>(?!</think>)(?![^<]*</think>)$", "", txt, 1)
42
+ return txt
43
+
44
+ def _clean_thinking(self, s: str) -> str:
45
+ if not s: return s
46
+ s = re.sub(r'<details[^>]*>|</details>|<summary[^>]*>.*?</summary>', '', s, flags=re.DOTALL)
47
+ s = re.sub(r'<[^>]+>', '', s)
48
+ s = re.sub(r'^>\s*|\n>\s*', '\n', s)
49
+ return s.strip()
50
+
51
+ def _clean_answer(self, s: str) -> str:
52
+ return re.sub(r"<details[^>]*>.*?</details>", "", s, flags=re.DOTALL) if s else s
53
+
54
+ def _serialize_msgs(self, msgs) -> list:
55
+ out = []
56
+ for m in msgs:
57
+ if hasattr(m, "dict"): out.append(m.dict())
58
+ elif hasattr(m, "model_dump"): out.append(m.model_dump())
59
+ elif isinstance(m, dict): out.append(m)
60
+ else: out.append({"role": getattr(m, "role", "user"), "content": getattr(m, "content", str(m))})
61
+ return out
62
+
63
+ # ---------- upstream ----------
64
+ async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str]:
65
+ ck = await cookie_manager.get_next_cookie()
66
+ if not ck: raise HTTPException(503, "No available cookies")
67
+ model = settings.UPSTREAM_MODEL if req.model == settings.MODEL_NAME else req.model
68
+ body = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  "stream": True,
70
+ "model": model,
71
+ "messages": self._serialize_msgs(req.messages),
72
  "background_tasks": {"title_generation": True, "tags_generation": True},
73
  "chat_id": str(uuid.uuid4()),
74
  "features": {
75
+ "image_generation": False, "code_interpreter": False, "web_search": False,
76
+ "auto_web_search": False, "enable_thinking": True,
 
 
 
77
  },
78
  "id": str(uuid.uuid4()),
79
  "mcp_servers": ["deep-web-search"],
80
+ "model_item": {"id": model, "name": "GLM-4.5", "owned_by": "openai"},
81
+ "params": {}, "tool_servers": [],
 
82
  "variables": {
83
+ "{{USER_NAME}}": "User", "{{USER_LOCATION}}": "Unknown",
 
84
  "{{CURRENT_DATETIME}}": time.strftime("%Y-%m-%d %H:%M:%S"),
85
  },
86
  }
 
87
  headers = {
88
  "Content-Type": "application/json",
89
+ "Authorization": f"Bearer {ck}",
90
  "User-Agent": (
91
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
92
  "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
 
100
  "Origin": "https://chat.z.ai",
101
  "Referer": "https://chat.z.ai/",
102
  }
103
+ return body, headers, ck
104
 
105
+ # ---------- stream ----------
106
+ async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
 
 
 
 
 
107
  client = None
108
  try:
109
  client = httpx.AsyncClient(
 
111
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
112
  http2=True,
113
  )
114
+ body, headers, ck = await self._prep_upstream(req)
115
+ comp_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
116
+ think_open, phase_cur, need_nl = False, None, False
117
+
118
+ async with client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
119
+ if resp.status_code != 200:
120
+ await cookie_manager.mark_cookie_invalid(ck)
121
+ err = {
122
+ "id": comp_id,
 
 
123
  "object": "chat.completion.chunk",
124
  "created": int(time.time()),
125
+ "model": req.model,
126
+ "choices": [{"index": 0, "delta": {"content": f"Error: {resp.status_code}"}, "finish_reason": "stop"}],
 
 
 
 
 
 
127
  }
128
+ yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"; return
 
 
129
 
130
+ async for raw in resp.aiter_text():
131
+ if not raw or raw.isspace(): continue
132
+ for line in raw.split('\n'):
 
 
133
  line = line.strip()
134
+ if not line or line.startswith(':') or not line.startswith('data: '): continue
 
 
 
 
135
  payload = line[6:]
136
+ if payload == '[DONE]':
 
137
  if think_open and settings.SHOW_THINK_TAGS:
138
+ close_c = {
139
+ "id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
140
+ "model": req.model,
141
+ "choices": [{"index": 0, "delta": {"content": "</think>"}, "finish_reason": None}],
 
 
 
 
 
 
 
 
142
  }
143
+ yield f"data: {json.dumps(close_c)}\n\n"
144
+ final_c = {
145
+ "id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
146
+ "model": req.model, "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
 
 
 
 
 
 
147
  }
148
+ yield f"data: {json.dumps(final_c)}\n\n"; yield "data: [DONE]\n\n"; return
 
 
149
 
150
  try:
151
  parsed = json.loads(payload)
152
  except json.JSONDecodeError:
153
  continue
154
+ dat = parsed.get("data", {})
155
+ delta, phase = dat.get("delta_content", ""), dat.get("phase")
156
 
157
+ if phase != phase_cur:
158
+ phase_cur = phase
159
+ if phase == "answer" and think_open and settings.SHOW_THINK_TAGS:
 
 
 
 
 
 
 
 
 
 
 
 
160
  auto_close = {
161
+ "id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
162
+ "model": req.model,
163
+ "choices": [{"index": 0, "delta": {"content": "</think>"}, "finish_reason": None}],
 
 
 
 
 
 
 
 
164
  }
165
  yield f"data: {json.dumps(auto_close)}\n\n"
166
+ think_open, need_nl = False, True # add \n before first answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
+ if phase == "thinking" and not settings.SHOW_THINK_TAGS: continue
169
+ if not delta: continue
 
 
170
 
171
+ if phase == "thinking" and settings.SHOW_THINK_TAGS:
172
+ if not think_open:
173
+ tk = {
174
+ "id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
175
+ "model": req.model,
176
+ "choices": [{"index": 0, "delta": {"content": "<think>"}, "finish_reason": None}],
 
 
 
 
 
 
 
177
  }
178
+ yield f"data: {json.dumps(tk)}\n\n"; think_open = True
179
+ text = self._clean_thinking(delta)
180
+ else:
181
+ if need_nl:
182
+ delta = "\n" + delta
183
+ need_nl = False
184
+ text = self._clean_answer(delta)
185
+
186
+ if text:
187
+ out = {
188
+ "id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
189
+ "model": req.model,
190
+ "choices": [{"index": 0, "delta": {"content": text}, "finish_reason": None}],
191
+ }
192
+ yield f"data: {json.dumps(out)}\n\n"
193
 
194
  except httpx.RequestError as e:
195
  logger.error(f"Request error: {e}")
196
+ err = {
197
  "id": f"chatcmpl-{uuid.uuid4().hex[:29]}",
198
  "object": "chat.completion.chunk",
199
  "created": int(time.time()),
200
+ "model": req.model,
201
+ "choices": [{"index": 0, "delta": {"content": f"Connection error: {e}"}, "finish_reason": "stop"}],
 
 
 
 
 
 
202
  }
203
+ yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"
 
 
204
  except Exception as e:
205
+ logger.error(f"Unexpected error: {e}")
206
+ err = {
207
  "id": f"chatcmpl-{uuid.uuid4().hex[:29]}",
208
  "object": "chat.completion.chunk",
209
  "created": int(time.time()),
210
+ "model": req.model,
211
+ "choices": [{"index": 0, "delta": {"content": f"Internal error: {e}"}, "finish_reason": "stop"}],
 
 
 
 
 
 
212
  }
213
+ yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"
 
 
214
  finally:
215
+ if client: await client.aclose()
 
216
 
217
+ # ---------- non-stream ----------
218
+ async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
 
 
 
219
  client = None
220
  try:
221
  client = httpx.AsyncClient(
 
223
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
224
  http2=True,
225
  )
226
+ body, headers, ck = await self._prep_upstream(req)
227
+ think_buf, answer_buf, phase_cur = [], [], None
228
+
229
+ async with client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
230
+ if resp.status_code != 200:
231
+ await cookie_manager.mark_cookie_invalid(ck)
232
+ raise HTTPException(resp.status_code, "Upstream error")
233
+ async for raw in resp.aiter_text():
234
+ if not raw or raw.isspace(): continue
235
+ for line in raw.split('\n'):
 
 
 
 
 
236
  line = line.strip()
237
+ if not line or line.startswith(':') or not line.startswith('data: '): continue
 
 
238
  payload = line[6:]
239
+ if payload == '[DONE]': break
240
+ try: parsed = json.loads(payload)
241
+ except json.JSONDecodeError: continue
242
+ dat = parsed.get("data", {})
243
+ delta, phase = dat.get("delta_content", ""), dat.get("phase")
244
+ if not delta: continue
245
+ if phase == "thinking": think_buf.append(delta)
246
+ elif phase == "answer": answer_buf.append(delta)
247
+
248
+ ans_text = self._clean_answer(''.join(answer_buf))
249
+ if settings.SHOW_THINK_TAGS and think_buf:
250
+ think_text = self._clean_thinking(''.join(think_buf))
251
+ final = f"<think>{think_text}</think>\n{ans_text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  else:
253
+ final = ans_text
254
 
255
  return ChatCompletionResponse(
256
  id=f"chatcmpl-{uuid.uuid4().hex[:29]}",
257
  created=int(time.time()),
258
+ model=req.model,
259
+ choices=[{"index": 0, "message": {"role": "assistant", "content": final}, "finish_reason": "stop"}],
 
 
 
 
 
 
260
  )
 
261
  except httpx.RequestError as e:
262
+ logger.error(f"Non-stream request error: {e}")
263
+ if "ck" in locals(): await cookie_manager.mark_cookie_invalid(ck)
264
+ raise HTTPException(502, "Connection error")
 
 
265
  except Exception as e:
266
+ logger.error(f"Non-stream unexpected error: {e}")
267
+ raise HTTPException(500, "Internal error")
 
268
  finally:
269
+ if client: await client.aclose()
 
270
 
271
+ # ---------- FastAPI entry ----------
272
+ async def handle_chat_completion(self, req: ChatCompletionRequest):
273
+ stream = bool(req.stream) if req.stream is not None else settings.DEFAULT_STREAM
 
 
 
274
  if stream:
275
  return StreamingResponse(
276
+ self.stream_proxy_response(req),
277
  media_type="text/event-stream",
278
  headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
279
  )
280
+ return await self.non_stream_proxy_response(req)