bluewinliang commited on
Commit
96877a8
·
verified ·
1 Parent(s): e462360

Upload proxy_handler.py

Browse files
Files changed (1) hide show
  1. proxy_handler.py +239 -171
proxy_handler.py CHANGED
@@ -1,7 +1,6 @@
1
  """
2
- Proxy handler for Z.AI API requests (patched version)
3
  """
4
-
5
  import json
6
  import logging
7
  import re
@@ -25,8 +24,6 @@ logger = logging.getLogger(__name__)
25
 
26
  class ProxyHandler:
27
  def __init__(self):
28
- # Consider moving shared client to FastAPI startup
29
- # But keeping old logic for convenience
30
  self.client = httpx.AsyncClient(
31
  timeout=httpx.Timeout(60.0, read=300.0),
32
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
@@ -39,83 +36,86 @@ class ProxyHandler:
39
  async def __aexit__(self, exc_type, exc_val, exc_tb):
40
  await self.client.aclose()
41
 
42
- # Content processing utilities
43
-
 
44
  def _balance_think_tag(self, text: str) -> str:
45
- """Ensure <think> and </think> tags are balanced"""
46
  open_cnt = len(re.findall(r"<think>", text))
47
  close_cnt = len(re.findall(r"</think>", text))
48
  if open_cnt > close_cnt:
49
  text += "</think>" * (open_cnt - close_cnt)
50
  elif close_cnt > open_cnt:
51
- # Remove extra </think> tags
52
- extra = close_cnt - open_cnt
53
- for _ in range(extra):
54
- text = re.sub(r"</think>(?!.*</think>)", "", text, count=1)
55
  return text
56
 
57
  def _clean_thinking_content(self, content: str) -> str:
58
- """Remove HTML tags, keep only thinking text"""
59
  if not content:
60
  return content
61
- content = re.sub(r"<details[^>]*>", "", content)
62
- content = re.sub(r"</details>", "", content)
63
- content = re.sub(r"<summary[^>]*>.*?</summary>", "", content, flags=re.DOTALL)
64
- content = re.sub(r"<[^>]+>", "", content)
65
- content = re.sub(r"^\s*>\s*", "", content)
66
- content = re.sub(r"\n\s*>\s*", "\n", content)
67
- return content
68
 
69
  def _clean_answer_content(self, content: str) -> str:
70
- """Remove <details>...</details>, keep the rest"""
71
  if not content:
72
  return content
73
  return re.sub(r"<details[^>]*>.*?</details>", "", content, flags=re.DOTALL)
74
 
75
  def transform_content(self, content: str) -> str:
76
- """Transform <details> to <think> tags if showing thinking"""
 
 
77
  if settings.SHOW_THINK_TAGS:
78
  content = re.sub(r"<details[^>]*>", "<think>", content)
79
  content = re.sub(r"</details>", "</think>", content)
80
- content = re.sub(r"<summary[^>]*>.*?</summary>", "", content, flags=re.DOTALL)
81
  content = self._balance_think_tag(content)
82
  else:
83
  content = re.sub(r"<details[^>]*>.*?</details>", "", content, flags=re.DOTALL)
84
- return content
85
 
86
  def _serialize_messages(self, messages) -> list:
87
- """Convert pydantic/dataclass/dict to pure dict"""
88
- result = []
89
- for m in messages:
90
- if hasattr(m, "dict"):
91
- result.append(m.dict())
92
- elif hasattr(m, "model_dump"):
93
- result.append(m.model_dump())
94
- elif isinstance(m, dict):
95
- result.append(m)
96
  else:
97
- result.append({
98
- "role": getattr(m, "role", "user"),
99
- "content": getattr(m, "content", str(m))
100
- })
101
- return result
102
-
103
- # Upstream communication
104
-
105
- async def _prepare_upstream(
106
- self, request: ChatCompletionRequest
107
- ) -> Tuple[Dict[str, Any], Dict[str, str], str]:
 
 
108
  cookie = await cookie_manager.get_next_cookie()
109
  if not cookie:
110
  raise HTTPException(status_code=503, detail="No available cookies")
111
 
112
  target_model = settings.UPSTREAM_MODEL if request.model == settings.MODEL_NAME else request.model
113
- serialized_messages = self._serialize_messages(request.messages)
114
 
115
- body = {
116
  "stream": True,
117
  "model": target_model,
118
- "messages": serialized_messages,
119
  "background_tasks": {"title_generation": True, "tags_generation": True},
120
  "chat_id": str(uuid.uuid4()),
121
  "features": {
@@ -153,103 +153,87 @@ class ProxyHandler:
153
  "Origin": "https://chat.z.ai",
154
  "Referer": "https://chat.z.ai/",
155
  }
156
- return body, headers, cookie
157
-
158
- # Utilities
159
-
160
- def _mk_chunk(self, cid: str, model: str, content: str) -> str:
161
- """Create a chunk and return with data: prefix"""
162
- chunk = {
163
- "id": cid,
164
- "object": "chat.completion.chunk",
165
- "created": int(time.time()),
166
- "model": model,
167
- "choices": [{
168
- "index": 0,
169
- "delta": {"content": content},
170
- "finish_reason": None
171
- }]
172
- }
173
- return f"data: {json.dumps(chunk)}\n\n"
174
-
175
- async def _error_chunks_async(self, model: str, msg: str):
176
- """Generate error output (async generator)"""
177
- cid = f"chatcmpl-{uuid.uuid4().hex[:29]}"
178
- err = {
179
- "id": cid,
180
- "object": "chat.completion.chunk",
181
- "created": int(time.time()),
182
- "model": model,
183
- "choices": [{
184
- "index": 0,
185
- "delta": {"content": msg},
186
- "finish_reason": "stop"
187
- }]
188
- }
189
- yield f"data: {json.dumps(err)}\n\n"
190
- yield "data: [DONE]\n\n"
191
 
192
- # Streaming response
193
 
 
 
 
194
  async def stream_proxy_response(self, request: ChatCompletionRequest) -> AsyncGenerator[str, None]:
 
195
  client = None
196
- cookie = None
197
  try:
198
  client = httpx.AsyncClient(
199
  timeout=httpx.Timeout(60.0, read=300.0),
200
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
201
  http2=True,
202
  )
203
- body, headers, cookie = await self._prepare_upstream(request)
204
-
205
  completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
206
- think_open = False
207
- current_phase = None
208
 
209
- async with client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
210
- if resp.status_code != 200:
 
 
 
211
  await cookie_manager.mark_cookie_invalid(cookie)
212
- err = {
213
  "id": completion_id,
214
  "object": "chat.completion.chunk",
215
  "created": int(time.time()),
216
  "model": request.model,
217
- "choices": [{
218
- "index": 0,
219
- "delta": {"content": f"Error: Upstream returned {resp.status_code}"},
220
- "finish_reason": "stop"
221
- }]
 
 
222
  }
223
- yield f"data: {json.dumps(err)}\n\n"
224
  yield "data: [DONE]\n\n"
225
  return
226
 
227
- async for raw in resp.aiter_text():
228
- if not raw or raw.isspace():
229
  continue
230
- for line in raw.split("\n"):
 
231
  line = line.strip()
232
- if not line or line.startswith(":") or not line.startswith("data: "):
233
  continue
 
 
 
234
  payload = line[6:]
235
  if payload == "[DONE]":
236
- # If still in thinking mode, close the tag
237
  if think_open and settings.SHOW_THINK_TAGS:
238
- yield self._mk_chunk(completion_id, request.model, "</think>")
239
- yield self._mk_chunk(completion_id, request.model, "\n")
240
- # Final stop chunk
241
- final = {
 
 
 
 
 
 
 
 
 
 
 
 
242
  "id": completion_id,
243
  "object": "chat.completion.chunk",
244
  "created": int(time.time()),
245
  "model": request.model,
246
- "choices": [{
247
- "index": 0,
248
- "delta": {},
249
- "finish_reason": "stop"
250
- }]
251
  }
252
- yield f"data: {json.dumps(final)}\n\n"
253
  yield "data: [DONE]\n\n"
254
  return
255
 
@@ -262,79 +246,152 @@ class ProxyHandler:
262
  delta_content = data.get("delta_content", "")
263
  phase = data.get("phase")
264
 
265
- # Phase change: thinking -> answer
 
 
266
  if phase != current_phase:
267
  current_phase = phase
268
- if phase == "answer" and think_open and settings.SHOW_THINK_TAGS:
269
- # Close </think> first
270
- yield self._mk_chunk(completion_id, request.model, "</think>")
271
- # Then send a separate newline
272
- yield self._mk_chunk(completion_id, request.model, "\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  think_open = False
274
 
 
 
 
275
  if phase == "thinking" and not settings.SHOW_THINK_TAGS:
 
276
  continue
277
 
278
- if not delta_content:
279
- continue
280
-
281
- # Content cleaning
282
- if phase == "thinking":
283
- if settings.SHOW_THINK_TAGS:
284
  if not think_open:
285
- # Open <think> tag
286
- yield self._mk_chunk(completion_id, request.model, "<think>\n")
 
 
 
 
 
 
 
 
 
 
 
 
287
  think_open = True
288
- processed_content = self._clean_thinking_content(delta_content)
289
- else:
290
- continue
291
- else:
292
- processed_content = self._clean_answer_content(delta_content)
293
 
294
- if processed_content:
295
- yield self._mk_chunk(completion_id, request.model, processed_content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
  except httpx.RequestError as e:
298
  logger.error(f"Request error: {e}")
299
- if cookie:
300
- await cookie_manager.mark_cookie_invalid(cookie)
301
- async for chunk in self._error_chunks_async(request.model, "Upstream connection error"):
302
- yield chunk
 
 
 
 
 
 
 
 
 
 
 
 
303
  except Exception as e:
304
- logger.error(f"Unexpected error: {e}")
305
- async for chunk in self._error_chunks_async(request.model, "Internal server error"):
306
- yield chunk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  finally:
308
  if client:
309
  await client.aclose()
310
 
311
- async def non_stream_proxy_response(self, request: ChatCompletionRequest):
 
 
 
 
312
  client = None
313
- cookie = None
314
  try:
315
  client = httpx.AsyncClient(
316
  timeout=httpx.Timeout(60.0, read=300.0),
317
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
318
  http2=True,
319
  )
320
- body, headers, cookie = await self._prepare_upstream(request)
321
 
322
- thinking_text = ""
323
- answer_text = ""
324
  current_phase = None
325
 
326
- async with client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
327
- if resp.status_code != 200:
328
  await cookie_manager.mark_cookie_invalid(cookie)
329
- raise HTTPException(status_code=502, detail=f"Upstream returned {resp.status_code}")
330
 
331
- async for raw in resp.aiter_text():
332
- if not raw or raw.isspace():
333
  continue
334
- for line in raw.split("\n"):
 
335
  line = line.strip()
336
  if not line or line.startswith(":") or not line.startswith("data: "):
337
  continue
 
338
  payload = line[6:]
339
  if payload == "[DONE]":
340
  break
@@ -351,46 +408,57 @@ class ProxyHandler:
351
  if phase != current_phase:
352
  current_phase = phase
353
 
354
- if phase == "thinking" and settings.SHOW_THINK_TAGS:
355
- thinking_text += self._clean_thinking_content(delta_content)
356
- elif phase == "answer":
357
- answer_text += self._clean_answer_content(delta_content)
358
-
359
- # Combine thinking and answer
360
- if thinking_text and settings.SHOW_THINK_TAGS:
361
- if answer_text:
362
- final_text = f"<think>\n{thinking_text}</think>\n{answer_text}"
 
 
 
 
 
363
  else:
364
- final_text = f"<think>\n{thinking_text}</think>"
365
  else:
366
- final_text = answer_text
367
 
368
  return ChatCompletionResponse(
369
  id=f"chatcmpl-{uuid.uuid4().hex[:29]}",
370
  created=int(time.time()),
371
  model=request.model,
372
- choices=[{
373
- "index": 0,
374
- "message": {"role": "assistant", "content": final_text},
375
- "finish_reason": "stop"
376
- }],
 
 
377
  )
378
 
379
  except httpx.RequestError as e:
380
- logger.error(f"Request error: {e}")
381
- if cookie:
382
  await cookie_manager.mark_cookie_invalid(cookie)
383
  raise HTTPException(status_code=502, detail="Upstream connection error")
 
384
  except Exception as e:
385
- logger.error(f"Unexpected error: {e}")
386
  raise HTTPException(status_code=500, detail="Internal server error")
 
387
  finally:
388
  if client:
389
  await client.aclose()
390
 
391
- # FastAPI entry point
392
-
 
393
  async def handle_chat_completion(self, request: ChatCompletionRequest):
 
394
  stream = bool(request.stream) if request.stream is not None else settings.DEFAULT_STREAM
395
  if stream:
396
  return StreamingResponse(
 
1
  """
2
+ Proxy handler for Z.AI API requests
3
  """
 
4
  import json
5
  import logging
6
  import re
 
24
 
25
  class ProxyHandler:
26
  def __init__(self):
 
 
27
  self.client = httpx.AsyncClient(
28
  timeout=httpx.Timeout(60.0, read=300.0),
29
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
 
36
  async def __aexit__(self, exc_type, exc_val, exc_tb):
37
  await self.client.aclose()
38
 
39
+ # ------------------------------------------------------------------
40
+ # 內容處理相關工具
41
+ # ------------------------------------------------------------------
42
  def _balance_think_tag(self, text: str) -> str:
43
+ """確保 <think> </think> 標籤數量相符"""
44
  open_cnt = len(re.findall(r"<think>", text))
45
  close_cnt = len(re.findall(r"</think>", text))
46
  if open_cnt > close_cnt:
47
  text += "</think>" * (open_cnt - close_cnt)
48
  elif close_cnt > open_cnt:
49
+ extra_closes = close_cnt - open_cnt
50
+ for _ in range(extra_closes):
51
+ text = re.sub(r"</think>(?!</think>)(?![^<]*</think>)$", "", text, count=1)
 
52
  return text
53
 
54
  def _clean_thinking_content(self, content: str) -> str:
55
+ """移除 HTML 標籤但保留思考文字"""
56
  if not content:
57
  return content
58
+ content = re.sub(r'<details[^>]*>', '', content)
59
+ content = re.sub(r'</details>', '', content)
60
+ content = re.sub(r'<summary[^>]*>.*?</summary>', '', content, flags=re.DOTALL)
61
+ content = re.sub(r'<[^>]+>', '', content)
62
+ content = re.sub(r'^>\s*', '', content)
63
+ content = re.sub(r'\n>\s*', '\n', content)
64
+ return content.strip()
65
 
66
  def _clean_answer_content(self, content: str) -> str:
67
+ """僅移除 <details> 區塊,保留其餘 Markdown"""
68
  if not content:
69
  return content
70
  return re.sub(r"<details[^>]*>.*?</details>", "", content, flags=re.DOTALL)
71
 
72
  def transform_content(self, content: str) -> str:
73
+ """將上游 HTML 轉成 <think> 標籤"""
74
+ if not content:
75
+ return content
76
  if settings.SHOW_THINK_TAGS:
77
  content = re.sub(r"<details[^>]*>", "<think>", content)
78
  content = re.sub(r"</details>", "</think>", content)
79
+ content = re.sub(r"<summary>.*?</summary>", "", content, flags=re.DOTALL)
80
  content = self._balance_think_tag(content)
81
  else:
82
  content = re.sub(r"<details[^>]*>.*?</details>", "", content, flags=re.DOTALL)
83
+ return content.strip()
84
 
85
  def _serialize_messages(self, messages) -> list:
86
+ """ ChatMessage 物件序列化成 dict 方便 JSON 傳輸"""
87
+ serialized_messages = []
88
+ for message in messages:
89
+ if hasattr(message, "dict"):
90
+ serialized_messages.append(message.dict())
91
+ elif hasattr(message, "model_dump"):
92
+ serialized_messages.append(message.model_dump())
93
+ elif isinstance(message, dict):
94
+ serialized_messages.append(message)
95
  else:
96
+ serialized_messages.append(
97
+ {
98
+ "role": getattr(message, "role", "user"),
99
+ "content": getattr(message, "content", str(message)),
100
+ }
101
+ )
102
+ return serialized_messages
103
+
104
+ # ------------------------------------------------------------------
105
+ # 與上游 API 溝通
106
+ # ------------------------------------------------------------------
107
+ async def _prepare_upstream(self, request: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str]:
108
+ """產生送往上游的 body、headers 與 cookie"""
109
  cookie = await cookie_manager.get_next_cookie()
110
  if not cookie:
111
  raise HTTPException(status_code=503, detail="No available cookies")
112
 
113
  target_model = settings.UPSTREAM_MODEL if request.model == settings.MODEL_NAME else request.model
 
114
 
115
+ req_body = {
116
  "stream": True,
117
  "model": target_model,
118
+ "messages": self._serialize_messages(request.messages),
119
  "background_tasks": {"title_generation": True, "tags_generation": True},
120
  "chat_id": str(uuid.uuid4()),
121
  "features": {
 
153
  "Origin": "https://chat.z.ai",
154
  "Referer": "https://chat.z.ai/",
155
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
+ return req_body, headers, cookie
158
 
159
+ # ------------------------------------------------------------------
160
+ # 串流模式
161
+ # ------------------------------------------------------------------
162
  async def stream_proxy_response(self, request: ChatCompletionRequest) -> AsyncGenerator[str, None]:
163
+ """處理串流回傳"""
164
  client = None
 
165
  try:
166
  client = httpx.AsyncClient(
167
  timeout=httpx.Timeout(60.0, read=300.0),
168
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
169
  http2=True,
170
  )
171
+ req_body, headers, cookie = await self._prepare_upstream(request)
 
172
  completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
 
 
173
 
174
+ think_open = False # 是否已開啟 <think>
175
+ current_phase = None # 追蹤 phase 切換
176
+
177
+ async with client.stream("POST", settings.UPSTREAM_URL, json=req_body, headers=headers) as response:
178
+ if response.status_code != 200:
179
  await cookie_manager.mark_cookie_invalid(cookie)
180
+ error_chunk = {
181
  "id": completion_id,
182
  "object": "chat.completion.chunk",
183
  "created": int(time.time()),
184
  "model": request.model,
185
+ "choices": [
186
+ {
187
+ "index": 0,
188
+ "delta": {"content": f"Error: Upstream API returned {response.status_code}"},
189
+ "finish_reason": "stop",
190
+ }
191
+ ],
192
  }
193
+ yield f"data: {json.dumps(error_chunk)}\n\n"
194
  yield "data: [DONE]\n\n"
195
  return
196
 
197
+ async for raw_chunk in response.aiter_text():
198
+ if not raw_chunk or raw_chunk.isspace():
199
  continue
200
+
201
+ for line in raw_chunk.split("\n"):
202
  line = line.strip()
203
+ if not line or line.startswith(":"):
204
  continue
205
+ if not line.startswith("data: "):
206
+ continue
207
+
208
  payload = line[6:]
209
  if payload == "[DONE]":
210
+ # 結尾時若 <think> 未關閉,自動補上
211
  if think_open and settings.SHOW_THINK_TAGS:
212
+ close_chunk = {
213
+ "id": completion_id,
214
+ "object": "chat.completion.chunk",
215
+ "created": int(time.time()),
216
+ "model": request.model,
217
+ "choices": [
218
+ {
219
+ "index": 0,
220
+ "delta": {"content": "</think>\n"},
221
+ "finish_reason": None,
222
+ }
223
+ ],
224
+ }
225
+ yield f"data: {json.dumps(close_chunk)}\n\n"
226
+
227
+ final_chunk = {
228
  "id": completion_id,
229
  "object": "chat.completion.chunk",
230
  "created": int(time.time()),
231
  "model": request.model,
232
+ "choices": [
233
+ {"index": 0, "delta": {}, "finish_reason": "stop"}
234
+ ],
 
 
235
  }
236
+ yield f"data: {json.dumps(final_chunk)}\n\n"
237
  yield "data: [DONE]\n\n"
238
  return
239
 
 
246
  delta_content = data.get("delta_content", "")
247
  phase = data.get("phase")
248
 
249
+ # --------------------------------------------------
250
+ # phase 變化處理
251
+ # --------------------------------------------------
252
  if phase != current_phase:
253
  current_phase = phase
254
+ # thinking answer 時,先補 </think> 並帶換行
255
+ if (
256
+ phase == "answer"
257
+ and think_open
258
+ and settings.SHOW_THINK_TAGS
259
+ ):
260
+ auto_close = {
261
+ "id": completion_id,
262
+ "object": "chat.completion.chunk",
263
+ "created": int(time.time()),
264
+ "model": request.model,
265
+ "choices": [
266
+ {
267
+ "index": 0,
268
+ "delta": {"content": "</think>\n"},
269
+ "finish_reason": None,
270
+ }
271
+ ],
272
+ }
273
+ yield f"data: {json.dumps(auto_close)}\n\n"
274
  think_open = False
275
 
276
+ # --------------------------------------------------
277
+ # 真正內容輸出
278
+ # --------------------------------------------------
279
  if phase == "thinking" and not settings.SHOW_THINK_TAGS:
280
+ # 不顯示思考內容就跳過
281
  continue
282
 
283
+ if delta_content:
284
+ if phase == "thinking" and settings.SHOW_THINK_TAGS:
285
+ # 若第一次進入思考階段,先輸出 <think>
 
 
 
286
  if not think_open:
287
+ think_chunk = {
288
+ "id": completion_id,
289
+ "object": "chat.completion.chunk",
290
+ "created": int(time.time()),
291
+ "model": request.model,
292
+ "choices": [
293
+ {
294
+ "index": 0,
295
+ "delta": {"content": "<think>"},
296
+ "finish_reason": None,
297
+ }
298
+ ],
299
+ }
300
+ yield f"data: {json.dumps(think_chunk)}\n\n"
301
  think_open = True
 
 
 
 
 
302
 
303
+ transformed_content = self._clean_thinking_content(delta_content)
304
+ else:
305
+ # answer 階段
306
+ transformed_content = self._clean_answer_content(delta_content)
307
+
308
+ if transformed_content:
309
+ chunk = {
310
+ "id": completion_id,
311
+ "object": "chat.completion.chunk",
312
+ "created": int(time.time()),
313
+ "model": request.model,
314
+ "choices": [
315
+ {
316
+ "index": 0,
317
+ "delta": {"content": transformed_content},
318
+ "finish_reason": None,
319
+ }
320
+ ],
321
+ }
322
+ yield f"data: {json.dumps(chunk)}\n\n"
323
 
324
  except httpx.RequestError as e:
325
  logger.error(f"Request error: {e}")
326
+ error_chunk = {
327
+ "id": f"chatcmpl-{uuid.uuid4().hex[:29]}",
328
+ "object": "chat.completion.chunk",
329
+ "created": int(time.time()),
330
+ "model": request.model,
331
+ "choices": [
332
+ {
333
+ "index": 0,
334
+ "delta": {"content": f"Connection error: {str(e)}"},
335
+ "finish_reason": "stop",
336
+ }
337
+ ],
338
+ }
339
+ yield f"data: {json.dumps(error_chunk)}\n\n"
340
+ yield "data: [DONE]\n\n"
341
+
342
  except Exception as e:
343
+ logger.error(f"Unexpected error in streaming: {e}")
344
+ error_chunk = {
345
+ "id": f"chatcmpl-{uuid.uuid4().hex[:29]}",
346
+ "object": "chat.completion.chunk",
347
+ "created": int(time.time()),
348
+ "model": request.model,
349
+ "choices": [
350
+ {
351
+ "index": 0,
352
+ "delta": {"content": f"Internal error: {str(e)}"},
353
+ "finish_reason": "stop",
354
+ }
355
+ ],
356
+ }
357
+ yield f"data: {json.dumps(error_chunk)}\n\n"
358
+ yield "data: [DONE]\n\n"
359
+
360
  finally:
361
  if client:
362
  await client.aclose()
363
 
364
+ # ------------------------------------------------------------------
365
+ # 非串流模式
366
+ # ------------------------------------------------------------------
367
+ async def non_stream_proxy_response(self, request: ChatCompletionRequest) -> ChatCompletionResponse:
368
+ """處理非串流回傳"""
369
  client = None
 
370
  try:
371
  client = httpx.AsyncClient(
372
  timeout=httpx.Timeout(60.0, read=300.0),
373
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
374
  http2=True,
375
  )
376
+ req_body, headers, cookie = await self._prepare_upstream(request)
377
 
378
+ thinking_buf, answer_buf = [], []
 
379
  current_phase = None
380
 
381
+ async with client.stream("POST", settings.UPSTREAM_URL, json=req_body, headers=headers) as response:
382
+ if response.status_code != 200:
383
  await cookie_manager.mark_cookie_invalid(cookie)
384
+ raise HTTPException(status_code=response.status_code, detail="Upstream API error")
385
 
386
+ async for raw_chunk in response.aiter_text():
387
+ if not raw_chunk or raw_chunk.isspace():
388
  continue
389
+
390
+ for line in raw_chunk.split("\n"):
391
  line = line.strip()
392
  if not line or line.startswith(":") or not line.startswith("data: "):
393
  continue
394
+
395
  payload = line[6:]
396
  if payload == "[DONE]":
397
  break
 
408
  if phase != current_phase:
409
  current_phase = phase
410
 
411
+ if delta_content:
412
+ if phase == "thinking":
413
+ thinking_buf.append(delta_content)
414
+ elif phase == "answer":
415
+ answer_buf.append(delta_content)
416
+
417
+ # 組裝最終文字
418
+ final_text = ""
419
+ if settings.SHOW_THINK_TAGS and thinking_buf:
420
+ thinking_text = self._clean_thinking_content("".join(thinking_buf))
421
+ answer_text = self._clean_answer_content("".join(answer_buf)) if answer_buf else ""
422
+ if thinking_text:
423
+ # 在 </think> 與正文間補換行,避免首字被吃掉
424
+ final_text = f"<think>{thinking_text}</think>\n{answer_text}"
425
  else:
426
+ final_text = answer_text
427
  else:
428
+ final_text = self._clean_answer_content("".join(answer_buf))
429
 
430
  return ChatCompletionResponse(
431
  id=f"chatcmpl-{uuid.uuid4().hex[:29]}",
432
  created=int(time.time()),
433
  model=request.model,
434
+ choices=[
435
+ {
436
+ "index": 0,
437
+ "message": {"role": "assistant", "content": final_text},
438
+ "finish_reason": "stop",
439
+ }
440
+ ],
441
  )
442
 
443
  except httpx.RequestError as e:
444
+ logger.error(f"Request error in non-stream: {e}")
445
+ if "cookie" in locals():
446
  await cookie_manager.mark_cookie_invalid(cookie)
447
  raise HTTPException(status_code=502, detail="Upstream connection error")
448
+
449
  except Exception as e:
450
+ logger.error(f"Unexpected error in non-stream: {e}")
451
  raise HTTPException(status_code=500, detail="Internal server error")
452
+
453
  finally:
454
  if client:
455
  await client.aclose()
456
 
457
+ # ------------------------------------------------------------------
458
+ # FastAPI 入口
459
+ # ------------------------------------------------------------------
460
  async def handle_chat_completion(self, request: ChatCompletionRequest):
461
+ """聊天補全主入口"""
462
  stream = bool(request.stream) if request.stream is not None else settings.DEFAULT_STREAM
463
  if stream:
464
  return StreamingResponse(