bluewinliang commited on
Commit
b075772
·
verified ·
1 Parent(s): 8061838

Upload proxy_handler.py

Browse files
Files changed (1) hide show
  1. proxy_handler.py +243 -124
proxy_handler.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- Proxy handler for Z.AI API requests fixed first-char loss after </think>
3
  """
4
 
5
  import json
@@ -15,14 +15,18 @@ from fastapi.responses import StreamingResponse
15
 
16
  from config import settings
17
  from cookie_manager import cookie_manager
18
- from models import ChatCompletionRequest, ChatCompletionResponse
 
 
 
19
 
20
  logger = logging.getLogger(__name__)
21
 
22
 
23
  class ProxyHandler:
24
- # -------------------- 初始化 --------------------
25
  def __init__(self):
 
 
26
  self.client = httpx.AsyncClient(
27
  timeout=httpx.Timeout(60.0, read=300.0),
28
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
@@ -35,73 +39,125 @@ class ProxyHandler:
35
  async def __aexit__(self, exc_type, exc_val, exc_tb):
36
  await self.client.aclose()
37
 
38
- # -------------------- 文字處理 --------------------
39
- def _balance_think_tag(self, s: str) -> str:
40
- o, c = len(re.findall(r"<think>", s)), len(re.findall(r"</think>", s))
41
- if o > c:
42
- s += "</think>" * (o - c)
43
- elif c > o:
44
- for _ in range(c - o):
45
- s = re.sub(r"</think>(?!.*</think>)", "", s, count=1)
46
- return s
47
-
48
- def _clean_thinking_content(self, s: str) -> str:
49
- if not s:
50
- return s
51
- s = re.sub(r'<details[^>]*>', '', s)
52
- s = re.sub(r'</details>', '', s)
53
- s = re.sub(r'<summary[^>]*>.*?</summary>', '', s, flags=re.DOTALL)
54
- s = re.sub(r'<[^>]+>', '', s)
55
- s = re.sub(r'^>\s*', '', s)
56
- s = re.sub(r'\n>\s*', '\n', s)
57
- return s
58
-
59
- def _clean_answer_content(self, s: str) -> str:
60
- if not s:
61
- return s
62
- return re.sub(r"<details[^>]*>.*?</details>", "", s, flags=re.DOTALL)
63
-
64
- def _serialize_messages(self, msgs) -> list:
65
- out = []
66
- for m in msgs:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  if hasattr(m, "dict"):
68
- out.append(m.dict())
69
  elif hasattr(m, "model_dump"):
70
- out.append(m.model_dump())
71
  elif isinstance(m, dict):
72
- out.append(m)
73
  else:
74
- out.append({"role": getattr(m, "role", "user"), "content": getattr(m, "content", str(m))})
75
- return out
 
 
 
76
 
77
- # -------------------- 上游準備 --------------------
78
- async def _prepare_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str]:
 
 
 
79
  cookie = await cookie_manager.get_next_cookie()
80
  if not cookie:
81
- raise HTTPException(503, "No available cookies")
 
 
 
82
 
83
- target_model = settings.UPSTREAM_MODEL if req.model == settings.MODEL_NAME else req.model
84
  body = {
85
  "stream": True,
86
  "model": target_model,
87
- "messages": self._serialize_messages(req.messages),
88
  "background_tasks": {"title_generation": True, "tags_generation": True},
89
  "chat_id": str(uuid.uuid4()),
90
- "features": {"enable_thinking": True},
 
 
 
 
 
 
91
  "id": str(uuid.uuid4()),
 
 
 
 
 
 
 
 
 
92
  }
 
93
  headers = {
94
  "Content-Type": "application/json",
95
  "Authorization": f"Bearer {cookie}",
96
- "User-Agent": "Mozilla/5.0",
 
 
 
97
  "Accept": "application/json, text/event-stream",
 
 
 
 
 
98
  "Origin": "https://chat.z.ai",
99
  "Referer": "https://chat.z.ai/",
100
  }
101
  return body, headers, cookie
102
 
103
- # -------------------- 串流處理 --------------------
104
- async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
 
105
  client = None
106
  try:
107
  client = httpx.AsyncClient(
@@ -109,7 +165,8 @@ class ProxyHandler:
109
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
110
  http2=True,
111
  )
112
- body, headers, cookie = await self._prepare_upstream(req)
 
113
  completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
114
  think_open = False
115
  current_phase = None
@@ -117,19 +174,48 @@ class ProxyHandler:
117
  async with client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
118
  if resp.status_code != 200:
119
  await cookie_manager.mark_cookie_invalid(cookie)
120
- yield self._error_sse(completion_id, req.model, f"Upstream {resp.status_code}")
 
 
 
 
 
 
 
 
 
 
 
 
121
  return
122
 
123
  async for raw in resp.aiter_text():
124
- for line in filter(None, (x.strip() for x in raw.split("\n"))):
125
- if not line.startswith("data: "):
 
 
 
126
  continue
127
  payload = line[6:]
128
  if payload == "[DONE]":
 
129
  if think_open and settings.SHOW_THINK_TAGS:
130
- # 若仍開啟,補閉標籤
131
- yield self._sse_chunk(completion_id, req.model, "</think>")
132
- yield self._done_sse(completion_id, req.model)
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  return
134
 
135
  try:
@@ -141,38 +227,51 @@ class ProxyHandler:
141
  delta_content = data.get("delta_content", "")
142
  phase = data.get("phase")
143
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  if phase == "thinking":
145
- if not settings.SHOW_THINK_TAGS:
 
 
 
 
 
 
146
  continue
147
- if not think_open:
148
- think_open = True
149
- yield self._sse_chunk(completion_id, req.model, "<think>")
150
- cleaned = self._clean_thinking_content(delta_content)
151
- if cleaned:
152
- yield self._sse_chunk(completion_id, req.model, cleaned)
153
 
154
- elif phase == "answer":
155
- # 如果剛從 thinking 轉入 answer,先把 </think> 前置到第一段答案
156
- prefix = ""
157
- if think_open:
158
- prefix = "</think>"
159
- think_open = False
160
- cleaned = self._clean_answer_content(delta_content)
161
- if cleaned or prefix:
162
- yield self._sse_chunk(completion_id, req.model, f"{prefix}{cleaned}")
163
-
164
- # 若意外結束
165
- yield self._done_sse(completion_id, req.model)
166
 
167
  except httpx.RequestError as e:
168
- logger.error(e)
169
- yield self._error_sse(f"chatcmpl-{uuid.uuid4().hex[:29]}", req.model, f"Connection error: {e}")
 
 
 
 
 
170
  finally:
171
  if client:
172
  await client.aclose()
173
 
174
- # -------------------- 非串流處理 --------------------
175
- async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
176
  client = None
177
  try:
178
  client = httpx.AsyncClient(
@@ -180,45 +279,58 @@ class ProxyHandler:
180
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
181
  http2=True,
182
  )
183
- body, headers, cookie = await self._prepare_upstream(req)
 
 
 
 
184
 
185
- think_buf, answer_buf = [], []
186
  async with client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
187
  if resp.status_code != 200:
188
  await cookie_manager.mark_cookie_invalid(cookie)
189
- raise HTTPException(resp.status_code, "Upstream error")
190
 
191
  async for raw in resp.aiter_text():
192
- for line in filter(None, (x.strip() for x in raw.split("\n"))):
193
- if not line.startswith("data: "):
 
 
 
194
  continue
195
  payload = line[6:]
196
  if payload == "[DONE]":
197
  break
 
198
  try:
199
  parsed = json.loads(payload)
200
  except json.JSONDecodeError:
201
  continue
 
202
  data = parsed.get("data", {})
203
- phase = data.get("phase")
204
  delta_content = data.get("delta_content", "")
205
- if phase == "thinking":
206
- think_buf.append(delta_content)
207
- elif phase == "answer":
208
- answer_buf.append(delta_content)
209
 
210
- # 組裝最終文字
211
- answer_text = self._clean_answer_content("".join(answer_buf))
212
- if settings.SHOW_THINK_TAGS and think_buf:
213
- think_text = self._clean_thinking_content("".join(think_buf))
214
- final_text = f"<think>{think_text}</think>{answer_text}"
 
 
 
 
 
 
215
  else:
216
  final_text = answer_text
217
 
218
  return ChatCompletionResponse(
219
  id=f"chatcmpl-{uuid.uuid4().hex[:29]}",
220
  created=int(time.time()),
221
- model=req.model,
222
  choices=[{
223
  "index": 0,
224
  "message": {"role": "assistant", "content": final_text},
@@ -226,54 +338,61 @@ class ProxyHandler:
226
  }],
227
  )
228
 
 
 
 
 
 
229
  except Exception as e:
230
- logger.error(e)
231
- raise HTTPException(500, "Internal server error")
232
  finally:
233
  if client:
234
  await client.aclose()
235
 
236
- # -------------------- FastAPI 入口 --------------------
237
- async def handle_chat_completion(self, request: ChatCompletionRequest):
238
- stream = bool(request.stream) if request.stream is not None else settings.DEFAULT_STREAM
239
- if stream:
240
- return StreamingResponse(
241
- self.stream_proxy_response(request),
242
- media_type="text/event-stream",
243
- headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
244
- )
245
- return await self.non_stream_proxy_response(request)
246
 
247
- # -------------------- 輔助 --------------------
248
- @staticmethod
249
- def _sse_chunk(cid: str, model: str, content: str) -> str:
250
  chunk = {
251
  "id": cid,
252
  "object": "chat.completion.chunk",
253
  "created": int(time.time()),
254
  "model": model,
255
- "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}],
 
 
 
 
256
  }
257
- return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
258
 
259
- @staticmethod
260
- def _done_sse(cid: str, model: str) -> str:
261
- done = {
262
- "id": cid,
263
- "object": "chat.completion.chunk",
264
- "created": int(time.time()),
265
- "model": model,
266
- "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
267
- }
268
- return f"data: {json.dumps(done)}\n\ndata: [DONE]\n\n"
269
-
270
- @staticmethod
271
- def _error_sse(cid: str, model: str, msg: str) -> str:
272
  err = {
273
  "id": cid,
274
  "object": "chat.completion.chunk",
275
  "created": int(time.time()),
276
  "model": model,
277
- "choices": [{"index": 0, "delta": {"content": msg}, "finish_reason": "stop"}],
 
 
 
 
278
  }
279
- return f"data: {json.dumps(err, ensure_ascii=False)}\n\ndata: [DONE]\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Proxy handler for Z.AI API requests (patched version)
3
  """
4
 
5
  import json
 
15
 
16
  from config import settings
17
  from cookie_manager import cookie_manager
18
+ from models import (
19
+ ChatCompletionRequest,
20
+ ChatCompletionResponse,
21
+ )
22
 
23
  logger = logging.getLogger(__name__)
24
 
25
 
26
  class ProxyHandler:
 
27
  def __init__(self):
28
+ # Consider moving shared client to FastAPI startup
29
+ # But keeping old logic for convenience
30
  self.client = httpx.AsyncClient(
31
  timeout=httpx.Timeout(60.0, read=300.0),
32
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
 
39
  async def __aexit__(self, exc_type, exc_val, exc_tb):
40
  await self.client.aclose()
41
 
42
+ # Content processing utilities
43
+
44
+ def _balance_think_tag(self, text: str) -> str:
45
+ """Ensure <think> and </think> tags are balanced"""
46
+ open_cnt = len(re.findall(r"<think>", text))
47
+ close_cnt = len(re.findall(r"</think>", text))
48
+ if open_cnt > close_cnt:
49
+ text += "</think>" * (open_cnt - close_cnt)
50
+ elif close_cnt > open_cnt:
51
+ # Remove extra </think> tags
52
+ extra = close_cnt - open_cnt
53
+ for _ in range(extra):
54
+ text = re.sub(r"</think>(?!.*</think>)", "", text, count=1)
55
+ return text
56
+
57
+ def _clean_thinking_content(self, content: str) -> str:
58
+ """Remove HTML tags, keep only thinking text"""
59
+ if not content:
60
+ return content
61
+ content = re.sub(r"<details[^>]*>", "", content)
62
+ content = re.sub(r"</details>", "", content)
63
+ content = re.sub(r"<summary[^>]*>.*?</summary>", "", content, flags=re.DOTALL)
64
+ content = re.sub(r"<[^>]+>", "", content)
65
+ content = re.sub(r"^\s*>\s*", "", content)
66
+ content = re.sub(r"\n\s*>\s*", "\n", content)
67
+ return content
68
+
69
+ def _clean_answer_content(self, content: str) -> str:
70
+ """Remove <details>...</details>, keep the rest"""
71
+ if not content:
72
+ return content
73
+ return re.sub(r"<details[^>]*>.*?</details>", "", content, flags=re.DOTALL)
74
+
75
+ def transform_content(self, content: str) -> str:
76
+ """Transform <details> to <think> tags if showing thinking"""
77
+ if settings.SHOW_THINK_TAGS:
78
+ content = re.sub(r"<details[^>]*>", "<think>", content)
79
+ content = re.sub(r"</details>", "</think>", content)
80
+ content = re.sub(r"<summary[^>]*>.*?</summary>", "", content, flags=re.DOTALL)
81
+ content = self._balance_think_tag(content)
82
+ else:
83
+ content = re.sub(r"<details[^>]*>.*?</details>", "", content, flags=re.DOTALL)
84
+ return content
85
+
86
+ def _serialize_messages(self, messages) -> list:
87
+ """Convert pydantic/dataclass/dict to pure dict"""
88
+ result = []
89
+ for m in messages:
90
  if hasattr(m, "dict"):
91
+ result.append(m.dict())
92
  elif hasattr(m, "model_dump"):
93
+ result.append(m.model_dump())
94
  elif isinstance(m, dict):
95
+ result.append(m)
96
  else:
97
+ result.append({
98
+ "role": getattr(m, "role", "user"),
99
+ "content": getattr(m, "content", str(m))
100
+ })
101
+ return result
102
 
103
+ # Upstream communication
104
+
105
+ async def _prepare_upstream(
106
+ self, request: ChatCompletionRequest
107
+ ) -> Tuple[Dict[str, Any], Dict[str, str], str]:
108
  cookie = await cookie_manager.get_next_cookie()
109
  if not cookie:
110
+ raise HTTPException(status_code=503, detail="No available cookies")
111
+
112
+ target_model = settings.UPSTREAM_MODEL if request.model == settings.MODEL_NAME else request.model
113
+ serialized_messages = self._serialize_messages(request.messages)
114
 
 
115
  body = {
116
  "stream": True,
117
  "model": target_model,
118
+ "messages": serialized_messages,
119
  "background_tasks": {"title_generation": True, "tags_generation": True},
120
  "chat_id": str(uuid.uuid4()),
121
+ "features": {
122
+ "image_generation": False,
123
+ "code_interpreter": False,
124
+ "web_search": False,
125
+ "auto_web_search": False,
126
+ "enable_thinking": True,
127
+ },
128
  "id": str(uuid.uuid4()),
129
+ "mcp_servers": ["deep-web-search"],
130
+ "model_item": {"id": target_model, "name": "GLM-4.5", "owned_by": "openai"},
131
+ "params": {},
132
+ "tool_servers": [],
133
+ "variables": {
134
+ "{{USER_NAME}}": "User",
135
+ "{{USER_LOCATION}}": "Unknown",
136
+ "{{CURRENT_DATETIME}}": time.strftime("%Y-%m-%d %H:%M:%S"),
137
+ },
138
  }
139
+
140
  headers = {
141
  "Content-Type": "application/json",
142
  "Authorization": f"Bearer {cookie}",
143
+ "User-Agent": (
144
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
145
+ "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
146
+ ),
147
  "Accept": "application/json, text/event-stream",
148
+ "Accept-Language": "zh-CN",
149
+ "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
150
+ "sec-ch-ua-mobile": "?0",
151
+ "sec-ch-ua-platform": '"macOS"',
152
+ "x-fe-version": "prod-fe-1.0.53",
153
  "Origin": "https://chat.z.ai",
154
  "Referer": "https://chat.z.ai/",
155
  }
156
  return body, headers, cookie
157
 
158
+ # Streaming response
159
+
160
+ async def stream_proxy_response(self, request: ChatCompletionRequest) -> AsyncGenerator[str, None]:
161
  client = None
162
  try:
163
  client = httpx.AsyncClient(
 
165
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
166
  http2=True,
167
  )
168
+ body, headers, cookie = await self._prepare_upstream(request)
169
+
170
  completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
171
  think_open = False
172
  current_phase = None
 
174
  async with client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
175
  if resp.status_code != 200:
176
  await cookie_manager.mark_cookie_invalid(cookie)
177
+ err = {
178
+ "id": completion_id,
179
+ "object": "chat.completion.chunk",
180
+ "created": int(time.time()),
181
+ "model": request.model,
182
+ "choices": [{
183
+ "index": 0,
184
+ "delta": {"content": f"Error: Upstream returned {resp.status_code}"},
185
+ "finish_reason": "stop"
186
+ }]
187
+ }
188
+ yield f"data: {json.dumps(err)}\n\n"
189
+ yield "data: [DONE]\n\n"
190
  return
191
 
192
  async for raw in resp.aiter_text():
193
+ if not raw or raw.isspace():
194
+ continue
195
+ for line in raw.split("\n"):
196
+ line = line.strip()
197
+ if not line or line.startswith(":") or not line.startswith("data: "):
198
  continue
199
  payload = line[6:]
200
  if payload == "[DONE]":
201
+ # If still in thinking mode, close the tag
202
  if think_open and settings.SHOW_THINK_TAGS:
203
+ yield self._mk_chunk(completion_id, request.model, "</think>")
204
+ yield self._mk_chunk(completion_id, request.model, "\n")
205
+ # Final stop chunk
206
+ final = {
207
+ "id": completion_id,
208
+ "object": "chat.completion.chunk",
209
+ "created": int(time.time()),
210
+ "model": request.model,
211
+ "choices": [{
212
+ "index": 0,
213
+ "delta": {},
214
+ "finish_reason": "stop"
215
+ }]
216
+ }
217
+ yield f"data: {json.dumps(final)}\n\n"
218
+ yield "data: [DONE]\n\n"
219
  return
220
 
221
  try:
 
227
  delta_content = data.get("delta_content", "")
228
  phase = data.get("phase")
229
 
230
+ # Phase change: thinking -> answer
231
+ if phase != current_phase:
232
+ current_phase = phase
233
+ if phase == "answer" and think_open and settings.SHOW_THINK_TAGS:
234
+ # Close </think> first
235
+ yield self._mk_chunk(completion_id, request.model, "</think>")
236
+ # Then send a separate newline
237
+ yield self._mk_chunk(completion_id, request.model, "\n")
238
+ think_open = False
239
+
240
+ if phase == "thinking" and not settings.SHOW_THINK_TAGS:
241
+ continue
242
+
243
+ if not delta_content:
244
+ continue
245
+
246
+ # Content cleaning
247
  if phase == "thinking":
248
+ if settings.SHOW_THINK_TAGS:
249
+ if not think_open:
250
+ # Open <think> tag
251
+ yield self._mk_chunk(completion_id, request.model, "<think>\n")
252
+ think_open = True
253
+ processed_content = self._clean_thinking_content(delta_content)
254
+ else:
255
  continue
256
+ else:
257
+ processed_content = self._clean_answer_content(delta_content)
 
 
 
 
258
 
259
+ if processed_content:
260
+ yield self._mk_chunk(completion_id, request.model, processed_content)
 
 
 
 
 
 
 
 
 
 
261
 
262
  except httpx.RequestError as e:
263
+ logger.error(f"Request error: {e}")
264
+ if 'cookie' in locals():
265
+ await cookie_manager.mark_cookie_invalid(cookie)
266
+ yield from self._error_chunks(request.model, "Upstream connection error")
267
+ except Exception as e:
268
+ logger.error(f"Unexpected error: {e}")
269
+ yield from self._error_chunks(request.model, "Internal server error")
270
  finally:
271
  if client:
272
  await client.aclose()
273
 
274
+ async def non_stream_proxy_response(self, request: ChatCompletionRequest):
 
275
  client = None
276
  try:
277
  client = httpx.AsyncClient(
 
279
  limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
280
  http2=True,
281
  )
282
+ body, headers, cookie = await self._prepare_upstream(request)
283
+
284
+ thinking_text = ""
285
+ answer_text = ""
286
+ current_phase = None
287
 
 
288
  async with client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
289
  if resp.status_code != 200:
290
  await cookie_manager.mark_cookie_invalid(cookie)
291
+ raise HTTPException(status_code=502, detail=f"Upstream returned {resp.status_code}")
292
 
293
  async for raw in resp.aiter_text():
294
+ if not raw or raw.isspace():
295
+ continue
296
+ for line in raw.split("\n"):
297
+ line = line.strip()
298
+ if not line or line.startswith(":") or not line.startswith("data: "):
299
  continue
300
  payload = line[6:]
301
  if payload == "[DONE]":
302
  break
303
+
304
  try:
305
  parsed = json.loads(payload)
306
  except json.JSONDecodeError:
307
  continue
308
+
309
  data = parsed.get("data", {})
 
310
  delta_content = data.get("delta_content", "")
311
+ phase = data.get("phase")
312
+
313
+ if phase != current_phase:
314
+ current_phase = phase
315
 
316
+ if phase == "thinking" and settings.SHOW_THINK_TAGS:
317
+ thinking_text += self._clean_thinking_content(delta_content)
318
+ elif phase == "answer":
319
+ answer_text += self._clean_answer_content(delta_content)
320
+
321
+ # Combine thinking and answer
322
+ if thinking_text and settings.SHOW_THINK_TAGS:
323
+ if answer_text:
324
+ final_text = f"<think>\n{thinking_text}</think>\n{answer_text}"
325
+ else:
326
+ final_text = f"<think>\n{thinking_text}</think>"
327
  else:
328
  final_text = answer_text
329
 
330
  return ChatCompletionResponse(
331
  id=f"chatcmpl-{uuid.uuid4().hex[:29]}",
332
  created=int(time.time()),
333
+ model=request.model,
334
  choices=[{
335
  "index": 0,
336
  "message": {"role": "assistant", "content": final_text},
 
338
  }],
339
  )
340
 
341
+ except httpx.RequestError as e:
342
+ logger.error(f"Request error: {e}")
343
+ if 'cookie' in locals():
344
+ await cookie_manager.mark_cookie_invalid(cookie)
345
+ raise HTTPException(status_code=502, detail="Upstream connection error")
346
  except Exception as e:
347
+ logger.error(f"Unexpected error: {e}")
348
+ raise HTTPException(status_code=500, detail="Internal server error")
349
  finally:
350
  if client:
351
  await client.aclose()
352
 
353
+ # Utilities
 
 
 
 
 
 
 
 
 
354
 
355
+ def _mk_chunk(self, cid: str, model: str, content: str) -> str:
356
+ """Create a chunk and return with data: prefix"""
 
357
  chunk = {
358
  "id": cid,
359
  "object": "chat.completion.chunk",
360
  "created": int(time.time()),
361
  "model": model,
362
+ "choices": [{
363
+ "index": 0,
364
+ "delta": {"content": content},
365
+ "finish_reason": None
366
+ }]
367
  }
368
+ return f"data: {json.dumps(chunk)}\n\n"
369
 
370
+ def _error_chunks(self, model: str, msg: str):
371
+ """Generate error output (generator)"""
372
+ cid = f"chatcmpl-{uuid.uuid4().hex[:29]}"
 
 
 
 
 
 
 
 
 
 
373
  err = {
374
  "id": cid,
375
  "object": "chat.completion.chunk",
376
  "created": int(time.time()),
377
  "model": model,
378
+ "choices": [{
379
+ "index": 0,
380
+ "delta": {"content": msg},
381
+ "finish_reason": "stop"
382
+ }]
383
  }
384
+ yield f"data: {json.dumps(err)}\n\n"
385
+ yield "data: [DONE]\n\n"
386
+
387
+ # FastAPI entry point
388
+
389
+ async def handle_chat_completion(self, request: ChatCompletionRequest):
390
+ stream = bool(request.stream) if request.stream is not None else settings.DEFAULT_STREAM
391
+ if stream:
392
+ return StreamingResponse(
393
+ self.stream_proxy_response(request),
394
+ media_type="text/event-stream",
395
+ headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
396
+ )
397
+ else:
398
+ return await self.non_stream_proxy_response(request)