zhaoxiaozhao07 commited on
Commit
b98a223
·
1 Parent(s): fbb9044

feat(core): 增强 OpenAI API 处理逻辑和工具支持

Browse files

- 在 .env.example 中添加 HTTP 和 HTTPS 代理配置示例
- 更新 .gitignore,添加参考文档目录忽略
- 删除 pyproject.toml 文件
- 重构 openai.py,集成 ZAITransformer 和 SSEToolHandler,优化请求处理和流式响应逻辑
- 增强响应处理器,支持工具调用和思考内容处理

.env.example CHANGED
@@ -39,3 +39,9 @@ TOOL_SUPPORT=true
39
 
40
  # 工具调用扫描限制(字符数)
41
  SCAN_LIMIT=200000
 
 
 
 
 
 
 
39
 
40
  # 工具调用扫描限制(字符数)
41
  SCAN_LIMIT=200000
42
+
43
+ # HTTP代理地址(可选)
44
+ HTTP_PROXY="http://admin:sk-123456@192.168.10.100:8282"
45
+ # HTTPS代理地址(可选)
46
+ HTTPS_PROXY="http://admin:sk-123456@192.168.10.100:8282"
47
+ # 示例: http://username:password@host:port
.gitignore CHANGED
@@ -1,4 +1,5 @@
1
  # Custom
 
2
  .vs/
3
  .vscode/
4
  .idea/
 
1
  # Custom
2
+ 参考文档/
3
  .vs/
4
  .vscode/
5
  .idea/
app/core/openai.py CHANGED
@@ -3,22 +3,25 @@ OpenAI API endpoints
3
  """
4
 
5
  import time
 
 
6
  from datetime import datetime
7
- from typing import List
8
  from fastapi import APIRouter, Header, HTTPException
9
  from fastapi.responses import StreamingResponse
 
10
 
11
  from app.core.config import settings
12
- from app.models.schemas import (
13
- OpenAIRequest, Message, UpstreamRequest, ModelItem,
14
- ModelsResponse, Model
15
- )
16
- from app.utils.helpers import debug_log, generate_request_ids, get_auth_token
17
- from app.utils.tools import process_messages_with_tools, content_to_string
18
- from app.core.response_handlers import StreamResponseHandler, NonStreamResponseHandler
19
 
20
  router = APIRouter()
21
 
 
 
 
22
 
23
  @router.get("/v1/models")
24
  async def list_models():
@@ -26,150 +29,453 @@ async def list_models():
26
  current_time = int(time.time())
27
  response = ModelsResponse(
28
  data=[
29
- Model(
30
- id=settings.PRIMARY_MODEL,
31
- created=current_time,
32
- owned_by="z.ai"
33
- ),
34
- Model(
35
- id=settings.THINKING_MODEL,
36
- created=current_time,
37
- owned_by="z.ai"
38
- ),
39
- Model(
40
- id=settings.SEARCH_MODEL,
41
- created=current_time,
42
- owned_by="z.ai"
43
- ),
44
- Model(
45
- id=settings.AIR_MODEL,
46
- created=current_time,
47
- owned_by="z.ai"
48
- ),
49
  ]
50
  )
51
  return response
52
 
53
 
54
  @router.post("/v1/chat/completions")
55
- async def chat_completions(
56
- request: OpenAIRequest,
57
- authorization: str = Header(...)
58
- ):
59
- """Handle chat completion requests"""
60
- debug_log("收到chat completions请求")
61
 
62
  try:
63
  # Validate API key (skip if SKIP_AUTH_TOKEN is enabled)
64
  if not settings.SKIP_AUTH_TOKEN:
65
  if not authorization.startswith("Bearer "):
66
- debug_log("缺少或无效的Authorization头")
67
  raise HTTPException(status_code=401, detail="Missing or invalid Authorization header")
68
 
69
  api_key = authorization[7:]
70
  if api_key != settings.AUTH_TOKEN:
71
- debug_log(f"无效的API key: {api_key}")
72
  raise HTTPException(status_code=401, detail="Invalid API key")
73
 
74
- debug_log(f"API key验证通过,AUTH_TOKEN={api_key[:8]}......")
75
- else:
76
- debug_log("SKIP_AUTH_TOKEN已启用,跳过API key验证")
77
- debug_log(f"请求解析成功 - 模型: {request.model}, 流式: {request.stream}, 消息数: {len(request.messages)}")
78
-
79
- # Generate IDs
80
- chat_id, msg_id = generate_request_ids()
81
-
82
- # Process messages with tools
83
- processed_messages = process_messages_with_tools(
84
- [m.model_dump() for m in request.messages],
85
- request.tools,
86
- request.tool_choice
87
- )
88
-
89
- # Convert back to Message objects
90
- upstream_messages: List[Message] = []
91
- for msg in processed_messages:
92
- content = content_to_string(msg.get("content"))
93
-
94
- upstream_messages.append(Message(
95
- role=msg["role"],
96
- content=content,
97
- reasoning_content=msg.get("reasoning_content")
98
- ))
99
 
100
- # Determine model features
101
- is_thinking = request.model == settings.THINKING_MODEL
102
- is_search = request.model == settings.SEARCH_MODEL
103
- is_air = request.model == settings.AIR_MODEL
104
- search_mcp = "deep-web-search" if is_search else ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
- # Determine upstream model ID based on requested model
107
- if is_air:
108
- upstream_model_id = "0727-106B-API" # AIR model upstream ID
109
- upstream_model_name = "GLM-4.5-Air"
110
- else:
111
- upstream_model_id = "0727-360B-API" # Default upstream model ID
112
- upstream_model_name = "GLM-4.5"
 
 
 
 
 
 
113
 
114
- # Build upstream request
115
- upstream_req = UpstreamRequest(
116
- stream=True, # Always use streaming from upstream
117
- chat_id=chat_id,
118
- id=msg_id,
119
- model=upstream_model_id, # Dynamic upstream model ID
120
- messages=upstream_messages,
121
- params={},
122
- features={
123
- "enable_thinking": is_thinking,
124
- "web_search": is_search,
125
- "auto_web_search": is_search,
126
  },
127
- background_tasks={
128
- "title_generation": False,
129
- "tags_generation": False,
130
- },
131
- mcp_servers=[search_mcp] if search_mcp else [],
132
- model_item=ModelItem(
133
- id=upstream_model_id,
134
- name=upstream_model_name,
135
- owned_by="openai"
136
- ),
137
- tool_servers=[],
138
- variables={
139
- "{{USER_NAME}}": "User",
140
- "{{USER_LOCATION}}": "Unknown",
141
- "{{CURRENT_DATETIME}}": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
142
- }
143
  )
144
-
145
- # Get authentication token
146
- auth_token = get_auth_token()
147
-
148
- # Check if tools are enabled and present
149
- has_tools = (settings.TOOL_SUPPORT and
150
- request.tools and
151
- len(request.tools) > 0 and
152
- request.tool_choice != "none")
153
-
154
- # Handle response based on stream flag
155
- if request.stream:
156
- handler = StreamResponseHandler(upstream_req, chat_id, auth_token, has_tools)
157
- return StreamingResponse(
158
- handler.handle(),
159
- media_type="text/event-stream",
160
- headers={
161
- "Cache-Control": "no-cache",
162
- "Connection": "keep-alive",
163
- }
164
- )
165
- else:
166
- handler = NonStreamResponseHandler(upstream_req, chat_id, auth_token, has_tools)
167
- return handler.handle()
168
-
169
  except HTTPException:
170
  raise
171
  except Exception as e:
172
- debug_log(f"处理请求时发生错误: {str(e)}")
173
  import traceback
174
- debug_log(f"错误堆栈: {traceback.format_exc()}")
 
175
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 
3
  """
4
 
5
  import time
6
+ import json
7
+ import asyncio
8
  from datetime import datetime
9
+ from typing import List, Dict, Any
10
  from fastapi import APIRouter, Header, HTTPException
11
  from fastapi.responses import StreamingResponse
12
+ import httpx
13
 
14
  from app.core.config import settings
15
+ from app.models.schemas import OpenAIRequest, Message, ModelsResponse, Model
16
+ from app.utils.helpers import debug_log
17
+ from app.core.zai_transformer import ZAITransformer, generate_uuid
18
+ from app.utils.sse_tool_handler import SSEToolHandler
 
 
 
19
 
20
  router = APIRouter()
21
 
22
+ # 全局转换器实例
23
+ transformer = ZAITransformer()
24
+
25
 
26
  @router.get("/v1/models")
27
  async def list_models():
 
29
  current_time = int(time.time())
30
  response = ModelsResponse(
31
  data=[
32
+ Model(id=settings.PRIMARY_MODEL, created=current_time, owned_by="z.ai"),
33
+ Model(id=settings.THINKING_MODEL, created=current_time, owned_by="z.ai"),
34
+ Model(id=settings.SEARCH_MODEL, created=current_time, owned_by="z.ai"),
35
+ Model(id=settings.AIR_MODEL, created=current_time, owned_by="z.ai"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  ]
37
  )
38
  return response
39
 
40
 
41
  @router.post("/v1/chat/completions")
42
+ async def chat_completions(request: OpenAIRequest, authorization: str = Header(...)):
43
+ """Handle chat completion requests with ZAI transformer"""
44
+ role = request.messages[0].role if request.messages else "unknown"
45
+ debug_log(f"😶‍🌫️ 收到 客户端 请求 - 模型: {request.model}, 流式: {request.stream}, 消息数: {len(request.messages)}, 角色: {role}, 工具数: {len(request.tools) if request.tools else 0}")
 
 
46
 
47
  try:
48
  # Validate API key (skip if SKIP_AUTH_TOKEN is enabled)
49
  if not settings.SKIP_AUTH_TOKEN:
50
  if not authorization.startswith("Bearer "):
 
51
  raise HTTPException(status_code=401, detail="Missing or invalid Authorization header")
52
 
53
  api_key = authorization[7:]
54
  if api_key != settings.AUTH_TOKEN:
 
55
  raise HTTPException(status_code=401, detail="Invalid API key")
56
 
57
+ # 使用新的转换器转换请求
58
+ request_dict = request.model_dump()
59
+ debug_log("🔄 开始转换请求格式: OpenAI -> Z.AI")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ transformed = await transformer.transform_request_in(request_dict)
62
+
63
+ # 调用上游API
64
+ async def stream_response():
65
+ """流式响���生成器(包含重试机制)"""
66
+ retry_count = 0
67
+ last_error = None
68
+ current_token = transformed.get("token", "") # 获取当前使用的token
69
+
70
+ while retry_count <= settings.MAX_RETRIES:
71
+ try:
72
+ # 如果是重试,重新获取令牌并更新请求
73
+ if retry_count > 0:
74
+ delay = 2.0
75
+ debug_log(f"重试请求 ({retry_count}/{settings.MAX_RETRIES}) - 等待 {delay:.1f}s")
76
+ await asyncio.sleep(delay)
77
+
78
+ # 标记前一个token失败
79
+ if current_token:
80
+ transformer.mark_token_failure(current_token, Exception(f"Retry {retry_count}: {last_error}"))
81
+
82
+ # 重新获取令牌
83
+ debug_log("🔑 重新获取令牌用于重试...")
84
+ new_token = await transformer.get_token()
85
+ if not new_token:
86
+ debug_log("❌ 重试时无法获取有效的认证令牌")
87
+ raise Exception("重试时无法获取有效的认证令牌")
88
+ transformed["config"]["headers"]["Authorization"] = f"Bearer {new_token}"
89
+ current_token = new_token
90
+
91
+ async with httpx.AsyncClient(timeout=60.0) as client:
92
+ # 发送请求到上游
93
+ debug_log(f"🎯 发送请求到 Z.AI: {transformed['config']['url']}")
94
+ async with client.stream(
95
+ "POST",
96
+ transformed["config"]["url"],
97
+ json=transformed["body"],
98
+ headers=transformed["config"]["headers"],
99
+ ) as response:
100
+ # 检查响应状态码
101
+ if response.status_code == 400:
102
+ # 400 错误,触发重试
103
+ error_text = await response.aread()
104
+ error_msg = error_text.decode('utf-8', errors='ignore')
105
+ debug_log(f"❌ 上游返回 400 错误 (尝试 {retry_count + 1}/{settings.MAX_RETRIES + 1})")
106
+ debug_log(f"上游错误响应: {error_msg}")
107
+
108
+ retry_count += 1
109
+ last_error = f"400 Bad Request: {error_msg}"
110
+
111
+ # 如果还有重试机会,继续循环
112
+ if retry_count <= settings.MAX_RETRIES:
113
+ continue
114
+ else:
115
+ # 达到最大重试次数,抛出错误
116
+ debug_log(f"❌ 达到最大重试次数 ({settings.MAX_RETRIES}),请求失败")
117
+ error_response = {
118
+ "error": {
119
+ "message": f"Request failed after {settings.MAX_RETRIES} retries: {last_error}",
120
+ "type": "upstream_error",
121
+ "code": 400
122
+ }
123
+ }
124
+ yield f"data: {json.dumps(error_response)}\n\n"
125
+ yield "data: [DONE]\n\n"
126
+ return
127
+
128
+ elif response.status_code != 200:
129
+ # 其他错误,直接返回
130
+ debug_log(f"❌ 上游返回错误: {response.status_code}")
131
+ error_text = await response.aread()
132
+ error_msg = error_text.decode('utf-8', errors='ignore')
133
+ debug_log(f"❌ 错误详情: {error_msg}")
134
+
135
+ error_response = {
136
+ "error": {
137
+ "message": f"Upstream error: {response.status_code}",
138
+ "type": "upstream_error",
139
+ "code": response.status_code
140
+ }
141
+ }
142
+ yield f"data: {json.dumps(error_response)}\n\n"
143
+ yield "data: [DONE]\n\n"
144
+ return
145
+
146
+ # 200 成功,处理响应
147
+ debug_log(f"✅ Z.AI 响应成功,开始处理 SSE 流")
148
+ if retry_count > 0:
149
+ debug_log(f"✨ 第 {retry_count} 次重试成功")
150
+
151
+ # 标记token使用成功
152
+ if current_token:
153
+ transformer.mark_token_success(current_token)
154
+
155
+ # 初始化工具处理器(如果需要)
156
+ has_tools = transformed["body"].get("tools") is not None
157
+ has_mcp_servers = bool(transformed["body"].get("mcp_servers"))
158
+ tool_handler = None
159
+
160
+ # 如果有工具定义或MCP服务器,都需要工具处理器
161
+ if has_tools or has_mcp_servers:
162
+ chat_id = transformed["body"]["chat_id"]
163
+ model = request.model
164
+ tool_handler = SSEToolHandler(chat_id, model)
165
+
166
+ if has_tools and has_mcp_servers:
167
+ debug_log(f"🔧 初始化工具处理器: {len(transformed['body'].get('tools', []))} 个OpenAI工具 + {len(transformed['body'].get('mcp_servers', []))} 个MCP服务器")
168
+ elif has_tools:
169
+ debug_log(f"🔧 初始化工具处理器: {len(transformed['body'].get('tools', []))} 个OpenAI工具")
170
+ elif has_mcp_servers:
171
+ debug_log(f"🔧 初始化工具处理器: {len(transformed['body'].get('mcp_servers', []))} 个MCP服务器")
172
+
173
+ # 处理状态
174
+ has_thinking = False
175
+ thinking_signature = None
176
+
177
+ # 处理SSE流
178
+ buffer = ""
179
+ line_count = 0
180
+ debug_log("📡 开始接收 SSE 流数据...")
181
+
182
+ async for line in response.aiter_lines():
183
+ line_count += 1
184
+ if not line:
185
+ continue
186
+
187
+ # 累积到buffer处理完整的数据行
188
+ buffer += line + "\n"
189
+
190
+ # 检查是否有完整的data行
191
+ while "\n" in buffer:
192
+ current_line, buffer = buffer.split("\n", 1)
193
+ if not current_line.strip():
194
+ continue
195
+
196
+ if current_line.startswith("data:"):
197
+ chunk_str = current_line[5:].strip()
198
+ if not chunk_str or chunk_str == "[DONE]":
199
+ if chunk_str == "[DONE]":
200
+ yield "data: [DONE]\n\n"
201
+ continue
202
+
203
+ debug_log(f"📦 解析数据块: {chunk_str[:200]}..." if len(chunk_str) > 200 else f"📦 解析数据块: {chunk_str}")
204
+
205
+ try:
206
+ chunk = json.loads(chunk_str)
207
+
208
+ if chunk.get("type") == "chat:completion":
209
+ data = chunk.get("data", {})
210
+ phase = data.get("phase")
211
+
212
+ # 记录每个阶段(只在阶段变化时记录)
213
+ if phase and phase != getattr(stream_response, '_last_phase', None):
214
+ debug_log(f"📈 SSE 阶段: {phase}")
215
+ stream_response._last_phase = phase
216
+
217
+ # 处理工具调用
218
+ if phase == "tool_call" and tool_handler:
219
+ for output in tool_handler.process_tool_call_phase(data, True):
220
+ yield output
221
+
222
+ # 处理其他阶段(工具结束)
223
+ elif phase == "other" and tool_handler:
224
+ for output in tool_handler.process_other_phase(data, True):
225
+ yield output
226
+
227
+ # 处理思考内容
228
+ elif phase == "thinking":
229
+ if not has_thinking:
230
+ has_thinking = True
231
+ # 发送初始角色
232
+ role_chunk = {
233
+ "choices": [
234
+ {
235
+ "delta": {"role": "assistant"},
236
+ "finish_reason": None,
237
+ "index": 0,
238
+ "logprobs": None,
239
+ }
240
+ ],
241
+ "created": int(time.time()),
242
+ "id": transformed["body"]["chat_id"],
243
+ "model": request.model,
244
+ "object": "chat.completion.chunk",
245
+ "system_fingerprint": "fp_zai_001",
246
+ }
247
+ yield f"data: {json.dumps(role_chunk)}\n\n"
248
+
249
+ delta_content = data.get("delta_content", "")
250
+ if delta_content:
251
+ # 处理思考内容格式
252
+ if delta_content.startswith("<details"):
253
+ content = (
254
+ delta_content.split("</summary>\n>")[-1].strip()
255
+ if "</summary>\n>" in delta_content
256
+ else delta_content
257
+ )
258
+ else:
259
+ content = delta_content
260
+
261
+ thinking_chunk = {
262
+ "choices": [
263
+ {
264
+ "delta": {
265
+ "role": "assistant",
266
+ "thinking": {"content": content},
267
+ },
268
+ "finish_reason": None,
269
+ "index": 0,
270
+ "logprobs": None,
271
+ }
272
+ ],
273
+ "created": int(time.time()),
274
+ "id": transformed["body"]["chat_id"],
275
+ "model": request.model,
276
+ "object": "chat.completion.chunk",
277
+ "system_fingerprint": "fp_zai_001",
278
+ }
279
+ yield f"data: {json.dumps(thinking_chunk)}\n\n"
280
+
281
+ # 处理答案内容
282
+ elif phase == "answer":
283
+ edit_content = data.get("edit_content", "")
284
+ delta_content = data.get("delta_content", "")
285
+
286
+ # 处理思考结束和答案开始
287
+ if edit_content and "</details>\n" in edit_content:
288
+ if has_thinking:
289
+ # 发送思考签名
290
+ thinking_signature = str(int(time.time() * 1000))
291
+ sig_chunk = {
292
+ "choices": [
293
+ {
294
+ "delta": {
295
+ "role": "assistant",
296
+ "thinking": {
297
+ "content": "",
298
+ "signature": thinking_signature,
299
+ },
300
+ },
301
+ "finish_reason": None,
302
+ "index": 0,
303
+ "logprobs": None,
304
+ }
305
+ ],
306
+ "created": int(time.time()),
307
+ "id": transformed["body"]["chat_id"],
308
+ "model": request.model,
309
+ "object": "chat.completion.chunk",
310
+ "system_fingerprint": "fp_zai_001",
311
+ }
312
+ yield f"data: {json.dumps(sig_chunk)}\n\n"
313
+
314
+ # 提取答案内容
315
+ content_after = edit_content.split("</details>\n")[-1]
316
+ if content_after:
317
+ content_chunk = {
318
+ "choices": [
319
+ {
320
+ "delta": {
321
+ "role": "assistant",
322
+ "content": content_after,
323
+ },
324
+ "finish_reason": None,
325
+ "index": 0,
326
+ "logprobs": None,
327
+ }
328
+ ],
329
+ "created": int(time.time()),
330
+ "id": transformed["body"]["chat_id"],
331
+ "model": request.model,
332
+ "object": "chat.completion.chunk",
333
+ "system_fingerprint": "fp_zai_001",
334
+ }
335
+ yield f"data: {json.dumps(content_chunk)}\n\n"
336
+
337
+ # 处理增量内容
338
+ elif delta_content:
339
+ # 如果还没有发送角色
340
+ if not has_thinking:
341
+ role_chunk = {
342
+ "choices": [
343
+ {
344
+ "delta": {"role": "assistant"},
345
+ "finish_reason": None,
346
+ "index": 0,
347
+ "logprobs": None,
348
+ }
349
+ ],
350
+ "created": int(time.time()),
351
+ "id": transformed["body"]["chat_id"],
352
+ "model": request.model,
353
+ "object": "chat.completion.chunk",
354
+ "system_fingerprint": "fp_zai_001",
355
+ }
356
+ yield f"data: {json.dumps(role_chunk)}\n\n"
357
+
358
+ content_chunk = {
359
+ "choices": [
360
+ {
361
+ "delta": {
362
+ "role": "assistant",
363
+ "content": delta_content,
364
+ },
365
+ "finish_reason": None,
366
+ "index": 0,
367
+ "logprobs": None,
368
+ }
369
+ ],
370
+ "created": int(time.time()),
371
+ "id": transformed["body"]["chat_id"],
372
+ "model": request.model,
373
+ "object": "chat.completion.chunk",
374
+ "system_fingerprint": "fp_zai_001",
375
+ }
376
+ output_data = f"data: {json.dumps(content_chunk)}\n\n"
377
+ debug_log(f"➡️ 输出内容块到客户端: {output_data[:200]}...")
378
+ yield output_data
379
+
380
+ # 处理完成
381
+ if data.get("usage"):
382
+ debug_log(f"📦 完成响应 - 使用统计: {json.dumps(data['usage'])}")
383
+
384
+ # 只有在非工具调用模式下才发送普通完成信号
385
+ if not tool_handler or not tool_handler.has_tool_call:
386
+ finish_chunk = {
387
+ "choices": [
388
+ {
389
+ "delta": {"role": "assistant", "content": ""},
390
+ "finish_reason": "stop",
391
+ "index": 0,
392
+ "logprobs": None,
393
+ }
394
+ ],
395
+ "usage": data["usage"],
396
+ "created": int(time.time()),
397
+ "id": transformed["body"]["chat_id"],
398
+ "model": request.model,
399
+ "object": "chat.completion.chunk",
400
+ "system_fingerprint": "fp_zai_001",
401
+ }
402
+ finish_output = f"data: {json.dumps(finish_chunk)}\n\n"
403
+ debug_log(f"➡️ 发送完成信号: {finish_output[:200]}...")
404
+ yield finish_output
405
+ debug_log("➡️ 发送 [DONE]")
406
+ yield "data: [DONE]\n\n"
407
+
408
+ except json.JSONDecodeError as e:
409
+ debug_log(f"❌ JSON解析错误: {e}, 内容: {chunk_str[:200]}")
410
+ except Exception as e:
411
+ debug_log(f"❌ 处理chunk错误: {e}")
412
+
413
+ # 确保发送结束信号
414
+ if not tool_handler or not tool_handler.has_tool_call:
415
+ debug_log("📤 发送最终 [DONE] 信号")
416
+ yield "data: [DONE]\n\n"
417
+
418
+ debug_log(f"✅ SSE 流处理完成,共处理 {line_count} 行数据")
419
+ # 成功处理完成,退出重试循环
420
+ return
421
+
422
+ except Exception as e:
423
+ debug_log(f"❌ 流处理错误: {e}")
424
+ import traceback
425
+ debug_log(traceback.format_exc())
426
+
427
+ # 标记token失败
428
+ if current_token:
429
+ transformer.mark_token_failure(current_token, e)
430
+
431
+ # 检查是否还可以重试
432
+ retry_count += 1
433
+ last_error = str(e)
434
+
435
+ if retry_count > settings.MAX_RETRIES:
436
+ # 达到最大重试次数,返回错误
437
+ debug_log(f"❌ 达到最大重试次数 ({settings.MAX_RETRIES}),流处理失败")
438
+ error_response = {
439
+ "error": {
440
+ "message": f"Stream processing failed after {settings.MAX_RETRIES} retries: {last_error}",
441
+ "type": "stream_error"
442
+ }
443
+ }
444
+ yield f"data: {json.dumps(error_response)}\n\n"
445
+ yield "data: [DONE]\n\n"
446
+ return
447
+
448
+ # 返回流式响应
449
+ debug_log("🚀 启动 SSE 流式响应")
450
 
451
+ # 创建一个包装的生成器来追踪数据流
452
+ async def logged_stream():
453
+ chunk_count = 0
454
+ try:
455
+ debug_log("📤 开始向客户端流式传输数据...")
456
+ async for chunk in stream_response():
457
+ chunk_count += 1
458
+ debug_log(f"📤 发送块[{chunk_count}]: {chunk[:200]}..." if len(chunk) > 200 else f" 📤 发送块[{chunk_count}]: {chunk}")
459
+ yield chunk
460
+ debug_log(f"✅ 流式传输完成,共发送 {chunk_count} 个数据块")
461
+ except Exception as e:
462
+ debug_log(f"❌ 流式传输中断: {e}")
463
+ raise
464
 
465
+ return StreamingResponse(
466
+ logged_stream(),
467
+ media_type="text/event-stream",
468
+ headers={
469
+ "Cache-Control": "no-cache",
470
+ "Connection": "keep-alive",
 
 
 
 
 
 
471
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
  )
473
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
  except HTTPException:
475
  raise
476
  except Exception as e:
477
+ debug_log(f"处理请求时发生错误: {str(e)}")
478
  import traceback
479
+
480
+ debug_log(f"❌ 错误堆栈: {traceback.format_exc()}")
481
  raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
app/core/response_handlers.py CHANGED
@@ -1,468 +1,499 @@
1
- """
2
- Response handlers for streaming and non-streaming responses
3
- """
4
-
5
- import json
6
- import time
7
- from typing import Generator, Optional
8
- import requests
9
- from fastapi import HTTPException
10
- from fastapi.responses import JSONResponse, StreamingResponse
11
-
12
- from app.core.config import settings
13
- from app.models.schemas import (
14
- Message, Delta, Choice, Usage, OpenAIResponse,
15
- UpstreamRequest, UpstreamData, UpstreamError, ModelItem
16
- )
17
- from app.utils.helpers import debug_log, call_upstream_api, transform_thinking_content
18
- from app.core.token_manager import token_manager
19
- from app.utils.sse_parser import SSEParser
20
  from app.utils.tools import extract_tool_invocations, remove_tool_json_content
21
-
22
-
23
- def create_openai_response_chunk(
24
- model: str,
25
- delta: Optional[Delta] = None,
26
- finish_reason: Optional[str] = None
27
- ) -> OpenAIResponse:
28
- """Create OpenAI response chunk for streaming"""
29
- return OpenAIResponse(
30
- id=f"chatcmpl-{int(time.time())}",
31
- object="chat.completion.chunk",
32
- created=int(time.time()),
33
- model=model,
34
- choices=[Choice(
35
- index=0,
36
- delta=delta or Delta(),
37
- finish_reason=finish_reason
38
- )]
39
- )
40
-
41
-
42
- def handle_upstream_error(error: UpstreamError) -> Generator[str, None, None]:
43
- """Handle upstream error response"""
44
- debug_log(f"上游错误: code={error.code}, detail={error.detail}")
45
-
46
- # Send end chunk
47
- end_chunk = create_openai_response_chunk(
48
- model=settings.PRIMARY_MODEL,
49
- finish_reason="stop"
50
- )
51
- yield f"data: {end_chunk.model_dump_json()}\n\n"
52
- yield "data: [DONE]\n\n"
53
-
54
-
55
- class ResponseHandler:
56
- """Base class for response handling"""
57
-
58
- def __init__(self, upstream_req: UpstreamRequest, chat_id: str, auth_token: str):
59
- self.upstream_req = upstream_req
60
- self.chat_id = chat_id
61
- self.auth_token = auth_token
62
-
63
- def _call_upstream(self) -> requests.Response:
64
- """Call upstream API with error handling"""
65
- max_retries = settings.MAX_RETRIES
66
- retry_count = 0
67
-
68
- while retry_count < max_retries:
69
- try:
70
- debug_log(f"尝试调用上游API (第 {retry_count + 1}/{max_retries} 次)")
71
- response = call_upstream_api(self.upstream_req, self.chat_id, self.auth_token)
72
-
73
- # Check if response is successful
74
- if response.status_code == 200:
75
- # Mark token as successful
76
- token_manager.mark_token_success(self.auth_token)
77
- debug_log("上游API调用成功")
78
- return response
79
- elif response.status_code in [401, 403]:
80
- # Authentication/authorization error - mark token as failed
81
- debug_log(f"Token认证失败 (状态码: {response.status_code}): {self.auth_token[:20]}...")
82
- token_manager.mark_token_failed(self.auth_token)
83
-
84
- # Try to get a new token
85
- new_token = token_manager.get_next_token()
86
- if new_token and new_token != self.auth_token:
87
- debug_log(f"尝试使用新token: {new_token[:20]}...")
88
- self.auth_token = new_token
89
- retry_count += 1
90
- continue
91
- else:
92
- debug_log("没有更多可用token")
93
- return response
94
- elif response.status_code in [429]:
95
- # Rate limit - don't mark token as failed, just retry
96
- debug_log(f"遇到速率限制 (状态码: {response.status_code}),等待后重试")
97
- if retry_count < max_retries - 1:
98
- import time
99
- time.sleep(2 ** retry_count) # 指数退避
100
- retry_count += 1
101
- continue
102
- else:
103
- return response
104
- elif response.status_code >= 500:
105
- # Server error - retry without marking token as failed
106
- debug_log(f"服务器错误 (状态码: {response.status_code}),稍后重试")
107
- if retry_count < max_retries - 1:
108
- import time
109
- time.sleep(1)
110
- retry_count += 1
111
- continue
112
- else:
113
- return response
114
- else:
115
- # Other client errors, return response as-is
116
- debug_log(f"客户端错误 (状态码: {response.status_code})")
117
- return response
118
-
119
- except Exception as e:
120
- error_msg = str(e)
121
- debug_log(f"调用上游失败 (尝试 {retry_count + 1}/{max_retries}): {error_msg}")
122
-
123
- # 判断是否是连接问题还是token问题
124
- is_connection_error = any(keyword in error_msg.lower() for keyword in [
125
- 'connection', 'timeout', 'network', 'dns', 'socket', 'ssl'
126
- ])
127
-
128
- if is_connection_error:
129
- debug_log("检测到网络连接问题,不标记token失败")
130
- # 网络问题不标记token失败,直接重试
131
- if retry_count < max_retries - 1:
132
- import time
133
- time.sleep(2) # 等待2秒后重试
134
- retry_count += 1
135
- continue
136
- else:
137
- raise Exception(f"网络连接问题,重试{max_retries}次后仍失败: {error_msg}")
138
- else:
139
- # 其他错误可能是token问题,标记失败并尝试新token
140
- debug_log("检测到可能token问题,标记token失败")
141
- token_manager.mark_token_failed(self.auth_token)
142
-
143
- # Try to get a new token
144
- new_token = token_manager.get_next_token()
145
- if new_token and new_token != self.auth_token and retry_count < max_retries - 1:
146
- debug_log(f"尝试使用新token: {new_token[:20]}...")
147
- self.auth_token = new_token
148
- retry_count += 1
149
- continue
150
- else:
151
- raise
152
-
153
- # If we get here, all retries failed
154
- raise Exception("所有重试尝试均失败")
155
-
156
- def _handle_upstream_error(self, response: requests.Response) -> None:
157
- """Handle upstream error response"""
158
- debug_log(f"上游返回错误状态: {response.status_code}")
159
- if settings.DEBUG_LOGGING:
160
- debug_log(f"上游错误响应: {response.text}")
161
-
162
-
163
- class StreamResponseHandler(ResponseHandler):
164
- """Handler for streaming responses"""
165
-
166
- def __init__(self, upstream_req: UpstreamRequest, chat_id: str, auth_token: str, has_tools: bool = False):
167
- super().__init__(upstream_req, chat_id, auth_token)
168
- self.has_tools = has_tools
169
- self.buffered_content = ""
 
170
  self.tool_calls = None
171
-
172
- def handle(self) -> Generator[str, None, None]:
173
- """Handle streaming response"""
174
- debug_log(f"开始处理流式响应 (chat_id={self.chat_id})")
175
-
176
- try:
177
- response = self._call_upstream()
178
- except Exception:
179
- yield "data: {\"error\": \"Failed to call upstream\"}\n\n"
180
- return
181
-
182
- if response.status_code != 200:
183
- self._handle_upstream_error(response)
184
- yield "data: {\"error\": \"Upstream error\"}\n\n"
185
- return
186
-
187
- # Send initial role chunk
188
- first_chunk = create_openai_response_chunk(
189
- model=settings.PRIMARY_MODEL,
190
- delta=Delta(role="assistant")
191
- )
192
- yield f"data: {first_chunk.model_dump_json()}\n\n"
193
-
194
- # Process stream
195
- debug_log("开始读取上游SSE流")
196
- sent_initial_answer = False
197
- stream_ended_normally = False
198
-
199
- try:
200
- with SSEParser(response, debug_mode=settings.DEBUG_LOGGING) as parser:
201
- for event in parser.iter_json_data(UpstreamData):
202
- upstream_data = event['data']
203
-
204
- # Check for errors
205
- if self._has_error(upstream_data):
206
- error = self._get_error(upstream_data)
207
- yield from handle_upstream_error(error)
208
- stream_ended_normally = True
209
- break
210
-
211
- debug_log(f"解析成功 - 类型: {upstream_data.type}, 阶段: {upstream_data.data.phase}, "
212
- f"内容长度: {len(upstream_data.data.delta_content or '')}, 完成: {upstream_data.data.done}")
213
-
214
- # Process content
215
- yield from self._process_content(upstream_data, sent_initial_answer)
216
-
217
- # Update sent_initial_answer flag if we sent content
218
- if not sent_initial_answer and (upstream_data.data.delta_content or upstream_data.data.edit_content):
219
- sent_initial_answer = True
220
-
221
- # Check if done
222
- if upstream_data.data.done or upstream_data.data.phase == "done":
223
- debug_log("检测到流结束信号")
224
- yield from self._send_end_chunk()
225
- stream_ended_normally = True
226
- break
227
-
228
- except Exception as e:
229
- debug_log(f"SSE流处理异常: {e}")
230
- # 流异常结束,发送错误响应
231
- if not stream_ended_normally:
232
- error_chunk = create_openai_response_chunk(
233
- model=settings.PRIMARY_MODEL,
234
- delta=Delta(content=f"\n\n[系统提示: 连接中断,响应可能不完整]")
235
- )
236
- yield f"data: {error_chunk.model_dump_json()}\n\n"
237
-
238
- # 确保流正常结束
239
- if not stream_ended_normally:
240
- debug_log("正常结束,发送结束信号")
241
- yield from self._send_end_chunk(force_stop=True)
242
-
243
- def _has_error(self, upstream_data: UpstreamData) -> bool:
244
- """Check if upstream data contains error"""
245
- return bool(
246
- upstream_data.error or
247
- upstream_data.data.error or
248
- (upstream_data.data.inner and upstream_data.data.inner.error)
249
- )
250
-
251
- def _get_error(self, upstream_data: UpstreamData) -> UpstreamError:
252
- """Get error from upstream data"""
253
- return (
254
- upstream_data.error or
255
- upstream_data.data.error or
256
- (upstream_data.data.inner.error if upstream_data.data.inner else None)
257
- )
258
-
259
- def _process_content(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
 
261
  self,
262
  upstream_data: UpstreamData,
263
  sent_initial_answer: bool
264
  ) -> Generator[str, None, None]:
265
- """Process content from upstream data"""
266
- content = upstream_data.data.delta_content or upstream_data.data.edit_content
267
-
268
- if not content:
269
- return
270
-
271
- # Transform thinking content
272
- if upstream_data.data.phase == "thinking":
273
- content = transform_thinking_content(content)
274
-
275
- # Buffer content if tools are enabled
276
- if self.has_tools:
277
- self.buffered_content += content
278
- else:
279
- # Handle initial answer content
280
- if (not sent_initial_answer and
281
- upstream_data.data.edit_content and
282
- upstream_data.data.phase == "answer"):
283
-
284
- content = self._extract_edit_content(upstream_data.data.edit_content)
285
- if content:
286
- debug_log(f"发送普通内容: {content}")
287
- chunk = create_openai_response_chunk(
288
- model=settings.PRIMARY_MODEL,
289
- delta=Delta(content=content)
290
- )
291
- yield f"data: {chunk.model_dump_json()}\n\n"
292
- sent_initial_answer = True
293
-
294
- # Handle delta content
295
- if upstream_data.data.delta_content:
296
- if content:
297
- if upstream_data.data.phase == "thinking":
298
- debug_log(f"发送思考内容: {content}")
299
- chunk = create_openai_response_chunk(
300
- model=settings.PRIMARY_MODEL,
301
- delta=Delta(reasoning_content=content)
302
- )
303
- else:
304
- debug_log(f"发送普通内容: {content}")
305
- chunk = create_openai_response_chunk(
306
- model=settings.PRIMARY_MODEL,
307
- delta=Delta(content=content)
308
- )
309
- yield f"data: {chunk.model_dump_json()}\n\n"
310
-
311
- def _extract_edit_content(self, edit_content: str) -> str:
312
- """Extract content from edit_content field"""
313
- parts = edit_content.split("</details>")
314
- return parts[1] if len(parts) > 1 else ""
315
-
316
- def _send_end_chunk(self, force_stop: bool = False) -> Generator[str, None, None]:
317
- """Send end chunk and DONE signal"""
318
- finish_reason = "stop"
319
-
320
- if self.has_tools and not force_stop:
321
- # Try to extract tool calls from buffered content
322
- self.tool_calls = extract_tool_invocations(self.buffered_content)
323
-
324
- if self.tool_calls:
325
- debug_log(f"检测到工具调用: {len(self.tool_calls)} 个")
326
- # Send tool calls with proper format
327
- for i, tc in enumerate(self.tool_calls):
328
- tool_call_delta = {
329
- "index": i,
330
- "id": tc.get("id"),
331
- "type": tc.get("type", "function"),
332
- "function": tc.get("function", {}),
333
- }
334
-
335
- out_chunk = create_openai_response_chunk(
336
- model=settings.PRIMARY_MODEL,
337
- delta=Delta(tool_calls=[tool_call_delta])
338
- )
339
- yield f"data: {out_chunk.model_dump_json()}\n\n"
340
-
341
- finish_reason = "tool_calls"
342
- else:
343
- # Send regular content
344
- trimmed_content = remove_tool_json_content(self.buffered_content)
345
- if trimmed_content:
346
- debug_log(f"发送常规内容: {len(trimmed_content)} 字符")
347
- content_chunk = create_openai_response_chunk(
348
- model=settings.PRIMARY_MODEL,
349
- delta=Delta(content=trimmed_content)
350
- )
351
- yield f"data: {content_chunk.model_dump_json()}\n\n"
352
- elif force_stop:
353
- # 强制结束时,发送缓冲的内容(如果有)
354
- if self.buffered_content:
355
- debug_log(f"强制结束,发送缓冲内容: {len(self.buffered_content)} 字符")
356
- content_chunk = create_openai_response_chunk(
357
- model=settings.PRIMARY_MODEL,
358
- delta=Delta(content=self.buffered_content)
359
  )
360
- yield f"data: {content_chunk.model_dump_json()}\n\n"
361
 
362
- # Send final chunk
363
- end_chunk = create_openai_response_chunk(
364
- model=settings.PRIMARY_MODEL,
365
- finish_reason=finish_reason
366
- )
367
- yield f"data: {end_chunk.model_dump_json()}\n\n"
368
- yield "data: [DONE]\n\n"
369
- debug_log(f"流式响应完成 (finish_reason: {finish_reason})")
370
 
371
 
372
- class NonStreamResponseHandler(ResponseHandler):
373
- """Handler for non-streaming responses"""
374
-
375
- def __init__(self, upstream_req: UpstreamRequest, chat_id: str, auth_token: str, has_tools: bool = False):
376
- super().__init__(upstream_req, chat_id, auth_token)
377
- self.has_tools = has_tools
378
-
379
- def handle(self) -> JSONResponse:
380
- """Handle non-streaming response"""
381
- debug_log(f"开始处理非流式响应 (chat_id={self.chat_id})")
382
-
383
- try:
384
- response = self._call_upstream()
385
- except Exception as e:
386
- debug_log(f"调用上游失败: {e}")
387
- raise HTTPException(status_code=502, detail="Failed to call upstream")
388
-
389
- if response.status_code != 200:
390
- self._handle_upstream_error(response)
391
- raise HTTPException(status_code=502, detail="Upstream error")
392
-
393
- # Collect full response
394
- full_content = []
395
- debug_log("开始收集完整响应内容")
396
- response_completed = False
397
-
398
- try:
399
- with SSEParser(response, debug_mode=settings.DEBUG_LOGGING) as parser:
400
- for event in parser.iter_json_data(UpstreamData):
401
- upstream_data = event['data']
402
-
403
- if upstream_data.data.delta_content:
404
- content = upstream_data.data.delta_content
405
-
406
- if upstream_data.data.phase == "thinking":
407
- content = transform_thinking_content(content)
408
-
409
- if content:
410
- full_content.append(content)
411
-
412
- if upstream_data.data.done or upstream_data.data.phase == "done":
413
- debug_log("检测到完成信号,停止收集")
414
- response_completed = True
415
- break
416
-
417
- except Exception as e:
418
- debug_log(f"非流式响应收集异常: {e}")
419
- if not full_content:
420
- # 如果没有收集到任何内容,抛出异常
421
- raise HTTPException(status_code=502, detail=f"Response collection failed: {str(e)}")
422
- else:
423
- debug_log(f"部分内容收集成功,继续处理 ({len(full_content)} 个片段)")
424
-
425
- if not response_completed and not full_content:
426
- debug_log("响应未完成且无内容,可能是连接问题")
427
- raise HTTPException(status_code=502, detail="Incomplete response from upstream")
428
-
429
- final_content = "".join(full_content)
430
- debug_log(f"内容收集完成,最终长度: {len(final_content)}")
431
-
432
- # Handle tool calls for non-streaming
433
- tool_calls = None
434
- finish_reason = "stop"
435
- message_content = final_content
436
-
437
- if self.has_tools:
438
- tool_calls = extract_tool_invocations(final_content)
439
- if tool_calls:
440
- # Content must be null when tool_calls are present (OpenAI spec)
441
- message_content = None
442
- finish_reason = "tool_calls"
443
- debug_log(f"提取到工具调用: {json.dumps(tool_calls, ensure_ascii=False)}")
444
- else:
445
- # Remove tool JSON from content
446
- message_content = remove_tool_json_content(final_content)
447
- if not message_content:
448
- message_content = final_content # 保留原内容如果清理后为空
449
-
450
- # Build response
451
- response_data = OpenAIResponse(
452
- id=f"chatcmpl-{int(time.time())}",
453
- object="chat.completion",
454
- created=int(time.time()),
455
- model=settings.PRIMARY_MODEL,
456
- choices=[Choice(
457
- index=0,
458
- message=Message(
459
- role="assistant",
460
- content=message_content,
461
- tool_calls=tool_calls
462
- ),
463
- finish_reason=finish_reason
464
- )],
465
- usage=Usage()
466
- )
467
-
468
- debug_log("非流式响应发送完成")
469
  return JSONResponse(content=response_data.model_dump(exclude_none=True))
 
1
+ """
2
+ Response handlers for streaming and non-streaming responses
3
+ """
4
+
5
+ import json
6
+ import time
7
+ from typing import Generator, Optional
8
+ import requests
9
+ from fastapi import HTTPException
10
+ from fastapi.responses import JSONResponse, StreamingResponse
11
+
12
+ from app.core.config import settings
13
+ from app.models.schemas import (
14
+ Message, Delta, Choice, Usage, OpenAIResponse,
15
+ UpstreamRequest, UpstreamData, UpstreamError, ModelItem
16
+ )
17
+ from app.utils.helpers import debug_log, call_upstream_api, transform_thinking_content
18
+ from app.core.token_manager import token_manager
19
+ from app.utils.sse_parser import SSEParser
20
  from app.utils.tools import extract_tool_invocations, remove_tool_json_content
21
+ from app.utils.sse_tool_handler import SSEToolHandler
22
+
23
+
24
+ def create_openai_response_chunk(
25
+ model: str,
26
+ delta: Optional[Delta] = None,
27
+ finish_reason: Optional[str] = None
28
+ ) -> OpenAIResponse:
29
+ """Create OpenAI response chunk for streaming"""
30
+ return OpenAIResponse(
31
+ id=f"chatcmpl-{int(time.time())}",
32
+ object="chat.completion.chunk",
33
+ created=int(time.time()),
34
+ model=model,
35
+ choices=[Choice(
36
+ index=0,
37
+ delta=delta or Delta(),
38
+ finish_reason=finish_reason
39
+ )]
40
+ )
41
+
42
+
43
+ def handle_upstream_error(error: UpstreamError) -> Generator[str, None, None]:
44
+ """Handle upstream error response"""
45
+ debug_log(f"上游错误: code={error.code}, detail={error.detail}")
46
+
47
+ # Send end chunk
48
+ end_chunk = create_openai_response_chunk(
49
+ model=settings.PRIMARY_MODEL,
50
+ finish_reason="stop"
51
+ )
52
+ yield f"data: {end_chunk.model_dump_json()}\n\n"
53
+ yield "data: [DONE]\n\n"
54
+
55
+
56
+ class ResponseHandler:
57
+ """Base class for response handling"""
58
+
59
+ def __init__(self, upstream_req: UpstreamRequest, chat_id: str, auth_token: str):
60
+ self.upstream_req = upstream_req
61
+ self.chat_id = chat_id
62
+ self.auth_token = auth_token
63
+
64
+ def _call_upstream(self) -> requests.Response:
65
+ """Call upstream API with error handling"""
66
+ max_retries = settings.MAX_RETRIES
67
+ retry_count = 0
68
+
69
+ while retry_count < max_retries:
70
+ try:
71
+ debug_log(f"尝试调用上游API ( {retry_count + 1}/{max_retries} 次)")
72
+ response = call_upstream_api(self.upstream_req, self.chat_id, self.auth_token)
73
+
74
+ # Check if response is successful
75
+ if response.status_code == 200:
76
+ # Mark token as successful
77
+ token_manager.mark_token_success(self.auth_token)
78
+ debug_log("上游API调用成功")
79
+ return response
80
+ elif response.status_code in [401, 403]:
81
+ # Authentication/authorization error - mark token as failed
82
+ debug_log(f"Token认证失败 (状态码: {response.status_code}): {self.auth_token[:20]}...")
83
+ token_manager.mark_token_failed(self.auth_token)
84
+
85
+ # Try to get a new token
86
+ new_token = token_manager.get_next_token()
87
+ if new_token and new_token != self.auth_token:
88
+ debug_log(f"尝试使用新token: {new_token[:20]}...")
89
+ self.auth_token = new_token
90
+ retry_count += 1
91
+ continue
92
+ else:
93
+ debug_log("没有更多可用token")
94
+ return response
95
+ elif response.status_code in [429]:
96
+ # Rate limit - don't mark token as failed, just retry
97
+ debug_log(f"遇到速率限制 (状态码: {response.status_code}),等待后重试")
98
+ if retry_count < max_retries - 1:
99
+ import time
100
+ time.sleep(2 ** retry_count) # 指数退避
101
+ retry_count += 1
102
+ continue
103
+ else:
104
+ return response
105
+ elif response.status_code >= 500:
106
+ # Server error - retry without marking token as failed
107
+ debug_log(f"服务器错误 (状态码: {response.status_code}),稍后重试")
108
+ if retry_count < max_retries - 1:
109
+ import time
110
+ time.sleep(1)
111
+ retry_count += 1
112
+ continue
113
+ else:
114
+ return response
115
+ else:
116
+ # Other client errors, return response as-is
117
+ debug_log(f"客户端错误 (状态码: {response.status_code})")
118
+ return response
119
+
120
+ except Exception as e:
121
+ error_msg = str(e)
122
+ debug_log(f"调用上游失败 (尝试 {retry_count + 1}/{max_retries}): {error_msg}")
123
+
124
+ # 判断是否是连接问题还是token问题
125
+ is_connection_error = any(keyword in error_msg.lower() for keyword in [
126
+ 'connection', 'timeout', 'network', 'dns', 'socket', 'ssl'
127
+ ])
128
+
129
+ if is_connection_error:
130
+ debug_log("检测到网络连接问题不标记token失败")
131
+ # 网络问题不标记token失败,直接重试
132
+ if retry_count < max_retries - 1:
133
+ import time
134
+ time.sleep(2) # 等待2秒后重试
135
+ retry_count += 1
136
+ continue
137
+ else:
138
+ raise Exception(f"网络连接问题,重试{max_retries}次后仍失败: {error_msg}")
139
+ else:
140
+ # 其他错误可能token问题,标记失败并尝试新token
141
+ debug_log("检测到可能的token问题,标记token失败")
142
+ token_manager.mark_token_failed(self.auth_token)
143
+
144
+ # Try to get a new token
145
+ new_token = token_manager.get_next_token()
146
+ if new_token and new_token != self.auth_token and retry_count < max_retries - 1:
147
+ debug_log(f"尝试使用新token: {new_token[:20]}...")
148
+ self.auth_token = new_token
149
+ retry_count += 1
150
+ continue
151
+ else:
152
+ raise
153
+
154
+ # If we get here, all retries failed
155
+ raise Exception("所有重试尝试均失败")
156
+
157
+ def _handle_upstream_error(self, response: requests.Response) -> None:
158
+ """Handle upstream error response"""
159
+ debug_log(f"上游返回错误状态: {response.status_code}")
160
+ if settings.DEBUG_LOGGING:
161
+ debug_log(f"上游错误响应: {response.text}")
162
+
163
+
164
+ class StreamResponseHandler(ResponseHandler):
165
+ """Handler for streaming responses"""
166
+
167
+ def __init__(self, upstream_req: UpstreamRequest, chat_id: str, auth_token: str, has_tools: bool = False):
168
+ super().__init__(upstream_req, chat_id, auth_token)
169
+ self.has_tools = has_tools
170
+ self.buffered_content = ""
171
  self.tool_calls = None
172
+ # Initialize SSE tool handler for improved tool processing
173
+ self.tool_handler = SSEToolHandler(chat_id, settings.PRIMARY_MODEL) if has_tools else None
174
+
175
+ def handle(self) -> Generator[str, None, None]:
176
+ """Handle streaming response"""
177
+ debug_log(f"开始处理流式响应 (chat_id={self.chat_id})")
178
+
179
+ try:
180
+ response = self._call_upstream()
181
+ except Exception:
182
+ yield "data: {\"error\": \"Failed to call upstream\"}\n\n"
183
+ return
184
+
185
+ if response.status_code != 200:
186
+ self._handle_upstream_error(response)
187
+ yield "data: {\"error\": \"Upstream error\"}\n\n"
188
+ return
189
+
190
+ # Send initial role chunk
191
+ first_chunk = create_openai_response_chunk(
192
+ model=settings.PRIMARY_MODEL,
193
+ delta=Delta(role="assistant")
194
+ )
195
+ yield f"data: {first_chunk.model_dump_json()}\n\n"
196
+
197
+ # Process stream
198
+ debug_log("开始读取上游SSE流")
199
+ sent_initial_answer = False
200
+ stream_ended_normally = False
201
+
202
+ try:
203
+ with SSEParser(response, debug_mode=settings.DEBUG_LOGGING) as parser:
204
+ for event in parser.iter_json_data(UpstreamData):
205
+ upstream_data = event['data']
206
+
207
+ # Check for errors
208
+ if self._has_error(upstream_data):
209
+ error = self._get_error(upstream_data)
210
+ yield from handle_upstream_error(error)
211
+ stream_ended_normally = True
212
+ break
213
+
214
+ debug_log(f"解析成功 - 类型: {upstream_data.type}, 阶段: {upstream_data.data.phase}, "
215
+ f"内容长度: {len(upstream_data.data.delta_content or '')}, 完成: {upstream_data.data.done}")
216
+
217
+ # Process content
218
+ yield from self._process_content_with_tools(upstream_data, sent_initial_answer)
219
+
220
+ # Update sent_initial_answer flag if we sent content
221
+ if not sent_initial_answer and (upstream_data.data.delta_content or upstream_data.data.edit_content):
222
+ sent_initial_answer = True
223
+
224
+ # Check if done
225
+ if upstream_data.data.done or upstream_data.data.phase == "done":
226
+ debug_log("检测到流结束信号")
227
+ yield from self._send_end_chunk()
228
+ stream_ended_normally = True
229
+ break
230
+
231
+ except Exception as e:
232
+ debug_log(f"SSE流处理异常: {e}")
233
+ # 流异常结束,发送错误响应
234
+ if not stream_ended_normally:
235
+ error_chunk = create_openai_response_chunk(
236
+ model=settings.PRIMARY_MODEL,
237
+ delta=Delta(content=f"\n\n[系统提示: 连接中断,响应可能不完整]")
238
+ )
239
+ yield f"data: {error_chunk.model_dump_json()}\n\n"
240
+
241
+ # 确保流正常结束
242
+ if not stream_ended_normally:
243
+ debug_log("流未正常结束,发送结束信号")
244
+ yield from self._send_end_chunk(force_stop=True)
245
+
246
+ def _has_error(self, upstream_data: UpstreamData) -> bool:
247
+ """Check if upstream data contains error"""
248
+ return bool(
249
+ upstream_data.error or
250
+ upstream_data.data.error or
251
+ (upstream_data.data.inner and upstream_data.data.inner.error)
252
+ )
253
+
254
+ def _get_error(self, upstream_data: UpstreamData) -> UpstreamError:
255
+ """Get error from upstream data"""
256
+ return (
257
+ upstream_data.error or
258
+ upstream_data.data.error or
259
+ (upstream_data.data.inner.error if upstream_data.data.inner else None)
260
+ )
261
+
262
+ def _process_content(
263
+ self,
264
+ upstream_data: UpstreamData,
265
+ sent_initial_answer: bool
266
+ ) -> Generator[str, None, None]:
267
+ """Process content from upstream data"""
268
+ content = upstream_data.data.delta_content or upstream_data.data.edit_content
269
+
270
+ if not content:
271
+ return
272
+
273
+ # Transform thinking content
274
+ if upstream_data.data.phase == "thinking":
275
+ content = transform_thinking_content(content)
276
+
277
+ # Buffer content if tools are enabled
278
+ if self.has_tools:
279
+ self.buffered_content += content
280
+ else:
281
+ # Handle initial answer content
282
+ if (not sent_initial_answer and
283
+ upstream_data.data.edit_content and
284
+ upstream_data.data.phase == "answer"):
285
+
286
+ content = self._extract_edit_content(upstream_data.data.edit_content)
287
+ if content:
288
+ debug_log(f"发送普通内容: {content}")
289
+ chunk = create_openai_response_chunk(
290
+ model=settings.PRIMARY_MODEL,
291
+ delta=Delta(content=content)
292
+ )
293
+ yield f"data: {chunk.model_dump_json()}\n\n"
294
+ sent_initial_answer = True
295
+
296
+ # Handle delta content
297
+ if upstream_data.data.delta_content:
298
+ if content:
299
+ if upstream_data.data.phase == "thinking":
300
+ debug_log(f"发送思考内容: {content}")
301
+ chunk = create_openai_response_chunk(
302
+ model=settings.PRIMARY_MODEL,
303
+ delta=Delta(reasoning_content=content)
304
+ )
305
+ else:
306
+ debug_log(f"发送普通内容: {content}")
307
+ chunk = create_openai_response_chunk(
308
+ model=settings.PRIMARY_MODEL,
309
+ delta=Delta(content=content)
310
+ )
311
+ yield f"data: {chunk.model_dump_json()}\n\n"
312
+
313
+ def _extract_edit_content(self, edit_content: str) -> str:
314
+ """Extract content from edit_content field"""
315
+ parts = edit_content.split("</details>")
316
+ return parts[1] if len(parts) > 1 else ""
317
+
318
+ def _send_end_chunk(self, force_stop: bool = False) -> Generator[str, None, None]:
319
+ """Send end chunk and DONE signal"""
320
+ finish_reason = "stop"
321
+
322
+ if self.has_tools and not force_stop:
323
+ # Try to extract tool calls from buffered content
324
+ self.tool_calls = extract_tool_invocations(self.buffered_content)
325
+
326
+ if self.tool_calls:
327
+ debug_log(f"检测到工具调用: {len(self.tool_calls)} 个")
328
+ # Send tool calls with proper format
329
+ for i, tc in enumerate(self.tool_calls):
330
+ tool_call_delta = {
331
+ "index": i,
332
+ "id": tc.get("id"),
333
+ "type": tc.get("type", "function"),
334
+ "function": tc.get("function", {}),
335
+ }
336
+
337
+ out_chunk = create_openai_response_chunk(
338
+ model=settings.PRIMARY_MODEL,
339
+ delta=Delta(tool_calls=[tool_call_delta])
340
+ )
341
+ yield f"data: {out_chunk.model_dump_json()}\n\n"
342
+
343
+ finish_reason = "tool_calls"
344
+ else:
345
+ # Send regular content
346
+ trimmed_content = remove_tool_json_content(self.buffered_content)
347
+ if trimmed_content:
348
+ debug_log(f"发送常规内容: {len(trimmed_content)} 字符")
349
+ content_chunk = create_openai_response_chunk(
350
+ model=settings.PRIMARY_MODEL,
351
+ delta=Delta(content=trimmed_content)
352
+ )
353
+ yield f"data: {content_chunk.model_dump_json()}\n\n"
354
+ elif force_stop:
355
+ # 强制结束时,发送缓冲的内容(如果有)
356
+ if self.buffered_content:
357
+ debug_log(f"强制结束,发送缓冲内容: {len(self.buffered_content)} 字符")
358
+ content_chunk = create_openai_response_chunk(
359
+ model=settings.PRIMARY_MODEL,
360
+ delta=Delta(content=self.buffered_content)
361
+ )
362
+ yield f"data: {content_chunk.model_dump_json()}\n\n"
363
+
364
+ # Send final chunk
365
+ end_chunk = create_openai_response_chunk(
366
+ model=settings.PRIMARY_MODEL,
367
+ finish_reason=finish_reason
368
+ )
369
+ yield f"data: {end_chunk.model_dump_json()}\n\n"
370
+ yield "data: [DONE]\n\n"
371
+ debug_log(f"流式响应完成 (finish_reason: {finish_reason})")
372
+
373
+
374
 
375
+ def _process_content_with_tools(
376
  self,
377
  upstream_data: UpstreamData,
378
  sent_initial_answer: bool
379
  ) -> Generator[str, None, None]:
380
+ """Process content with improved tool handling"""
381
+ # Handle tool calls with improved SSE tool handler
382
+ if self.has_tools and self.tool_handler:
383
+ # Check if this is a tool_call phase
384
+ if upstream_data.data.phase == "tool_call":
385
+ # Use the improved tool handler for tool call processing
386
+ yield from self.tool_handler.process_tool_call_phase(
387
+ upstream_data.data.model_dump(),
388
+ is_stream=True
389
+ )
390
+ return
391
+ elif upstream_data.data.phase == "other":
392
+ # Handle other phase which may contain tool completion signals
393
+ yield from self.tool_handler.process_other_phase(
394
+ upstream_data.data.model_dump(),
395
+ is_stream=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396
  )
397
+ return
398
 
399
+ # Fall back to original content processing
400
+ yield from self._process_content(upstream_data, sent_initial_answer)
 
 
 
 
 
 
401
 
402
 
403
+ class NonStreamResponseHandler(ResponseHandler):
404
+ """Handler for non-streaming responses"""
405
+
406
+ def __init__(self, upstream_req: UpstreamRequest, chat_id: str, auth_token: str, has_tools: bool = False):
407
+ super().__init__(upstream_req, chat_id, auth_token)
408
+ self.has_tools = has_tools
409
+
410
+ def handle(self) -> JSONResponse:
411
+ """Handle non-streaming response"""
412
+ debug_log(f"开始处理非流式响应 (chat_id={self.chat_id})")
413
+
414
+ try:
415
+ response = self._call_upstream()
416
+ except Exception as e:
417
+ debug_log(f"调用上游失败: {e}")
418
+ raise HTTPException(status_code=502, detail="Failed to call upstream")
419
+
420
+ if response.status_code != 200:
421
+ self._handle_upstream_error(response)
422
+ raise HTTPException(status_code=502, detail="Upstream error")
423
+
424
+ # Collect full response
425
+ full_content = []
426
+ debug_log("开始收集完整响应内容")
427
+ response_completed = False
428
+
429
+ try:
430
+ with SSEParser(response, debug_mode=settings.DEBUG_LOGGING) as parser:
431
+ for event in parser.iter_json_data(UpstreamData):
432
+ upstream_data = event['data']
433
+
434
+ if upstream_data.data.delta_content:
435
+ content = upstream_data.data.delta_content
436
+
437
+ if upstream_data.data.phase == "thinking":
438
+ content = transform_thinking_content(content)
439
+
440
+ if content:
441
+ full_content.append(content)
442
+
443
+ if upstream_data.data.done or upstream_data.data.phase == "done":
444
+ debug_log("检测到完成信号,停止收集")
445
+ response_completed = True
446
+ break
447
+
448
+ except Exception as e:
449
+ debug_log(f"非流式响应收集异常: {e}")
450
+ if not full_content:
451
+ # 如果没有收集到任何内容,抛出异常
452
+ raise HTTPException(status_code=502, detail=f"Response collection failed: {str(e)}")
453
+ else:
454
+ debug_log(f"部分内容收集成功,继续处理 ({len(full_content)} 个片段)")
455
+
456
+ if not response_completed and not full_content:
457
+ debug_log("响应未完成且无内容,可能是连接问题")
458
+ raise HTTPException(status_code=502, detail="Incomplete response from upstream")
459
+
460
+ final_content = "".join(full_content)
461
+ debug_log(f"内容收集完成,最终长度: {len(final_content)}")
462
+
463
+ # Handle tool calls for non-streaming
464
+ tool_calls = None
465
+ finish_reason = "stop"
466
+ message_content = final_content
467
+
468
+ if self.has_tools:
469
+ tool_calls = extract_tool_invocations(final_content)
470
+ if tool_calls:
471
+ # Content must be null when tool_calls are present (OpenAI spec)
472
+ message_content = None
473
+ finish_reason = "tool_calls"
474
+ debug_log(f"提取到工具调用: {json.dumps(tool_calls, ensure_ascii=False)}")
475
+ else:
476
+ # Remove tool JSON from content
477
+ message_content = remove_tool_json_content(final_content)
478
+ if not message_content:
479
+ message_content = final_content # 保留原内容如果清理后为空
480
+
481
+ # Build response
482
+ response_data = OpenAIResponse(
483
+ id=f"chatcmpl-{int(time.time())}",
484
+ object="chat.completion",
485
+ created=int(time.time()),
486
+ model=settings.PRIMARY_MODEL,
487
+ choices=[Choice(
488
+ index=0,
489
+ message=Message(
490
+ role="assistant",
491
+ content=message_content,
492
+ tool_calls=tool_calls
493
+ ),
494
+ finish_reason=finish_reason
495
+ )],
496
+ usage=Usage()
497
+ )
498
+
499
+ debug_log("非流式响应发送完成")
500
  return JSONResponse(content=response_data.model_dump(exclude_none=True))
app/core/zai_transformer.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import json
5
+ import time
6
+ import uuid
7
+ import random
8
+ from datetime import datetime
9
+ from typing import Dict, List, Any, Optional, Generator, AsyncGenerator
10
+ import httpx
11
+ import asyncio
12
+ from fake_useragent import UserAgent
13
+
14
+ from app.core.config import settings
15
+ from app.utils.helpers import debug_log
16
+ from app.core.token_manager import token_manager
17
+
18
+ # 全局 UserAgent 实例(单例模式)
19
+ _user_agent_instance = None
20
+
21
+
22
+ def get_user_agent_instance() -> UserAgent:
23
+ """获取或创建 UserAgent 实例(单例模式)"""
24
+ global _user_agent_instance
25
+ if _user_agent_instance is None:
26
+ _user_agent_instance = UserAgent()
27
+ return _user_agent_instance
28
+
29
+
30
+ def get_dynamic_headers(chat_id: str = "") -> Dict[str, str]:
31
+ """生成动态浏览器headers,包含随机User-Agent"""
32
+ ua = get_user_agent_instance()
33
+
34
+ # 随机选择浏览器类型,偏向Chrome和Edge
35
+ browser_choices = ["chrome", "chrome", "chrome", "edge", "edge", "firefox", "safari"]
36
+ browser_type = random.choice(browser_choices)
37
+
38
+ try:
39
+ if browser_type == "chrome":
40
+ user_agent = ua.chrome
41
+ elif browser_type == "edge":
42
+ user_agent = ua.edge
43
+ elif browser_type == "firefox":
44
+ user_agent = ua.firefox
45
+ elif browser_type == "safari":
46
+ user_agent = ua.safari
47
+ else:
48
+ user_agent = ua.random
49
+ except:
50
+ user_agent = ua.random
51
+
52
+ # 提取版本信息
53
+ chrome_version = "139"
54
+ edge_version = "139"
55
+
56
+ if "Chrome/" in user_agent:
57
+ try:
58
+ chrome_version = user_agent.split("Chrome/")[1].split(".")[0]
59
+ except:
60
+ pass
61
+
62
+ if "Edg/" in user_agent:
63
+ try:
64
+ edge_version = user_agent.split("Edg/")[1].split(".")[0]
65
+ sec_ch_ua = f'"Microsoft Edge";v="{edge_version}", "Chromium";v="{chrome_version}", "Not_A Brand";v="24"'
66
+ except:
67
+ sec_ch_ua = f'"Not_A Brand";v="8", "Chromium";v="{chrome_version}", "Google Chrome";v="{chrome_version}"'
68
+ elif "Firefox/" in user_agent:
69
+ sec_ch_ua = None # Firefox不使用sec-ch-ua
70
+ else:
71
+ sec_ch_ua = f'"Not_A Brand";v="8", "Chromium";v="{chrome_version}", "Google Chrome";v="{chrome_version}"'
72
+
73
+ headers = {
74
+ "Content-Type": "application/json",
75
+ "Accept": "application/json, text/event-stream",
76
+ "User-Agent": user_agent,
77
+ "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
78
+ "X-FE-Version": "prod-fe-1.0.79",
79
+ "Origin": "https://chat.z.ai",
80
+ }
81
+
82
+ if sec_ch_ua:
83
+ headers["sec-ch-ua"] = sec_ch_ua
84
+ headers["sec-ch-ua-mobile"] = "?0"
85
+ headers["sec-ch-ua-platform"] = '"Windows"'
86
+
87
+ if chat_id:
88
+ headers["Referer"] = f"https://chat.z.ai/c/{chat_id}"
89
+ else:
90
+ headers["Referer"] = "https://chat.z.ai/"
91
+
92
+ return headers
93
+
94
+
95
+ def generate_uuid() -> str:
96
+ """生成UUID v4"""
97
+ return str(uuid.uuid4())
98
+
99
+
100
+ def get_auth_token_sync() -> str:
101
+ """同步获取认证令牌(用于非异步场景)"""
102
+ if settings.ANONYMOUS_MODE:
103
+ try:
104
+ headers = get_dynamic_headers()
105
+ with httpx.Client() as client:
106
+ response = client.get("https://chat.z.ai/api/v1/auths/", headers=headers, timeout=10.0)
107
+ if response.status_code == 200:
108
+ data = response.json()
109
+ token = data.get("token", "")
110
+ if token:
111
+ debug_log(f"获取访客令牌成功: {token[:20]}...")
112
+ return token
113
+ except Exception as e:
114
+ debug_log(f"获取访客令牌失败: {e}")
115
+
116
+ # 使用token管理器获取备份令牌
117
+ token = token_manager.get_next_token()
118
+ if token:
119
+ debug_log(f"从token池获取令牌: {token[:20]}...")
120
+ return token
121
+
122
+ # 没有可用的token
123
+ debug_log("⚠️ 没有可用的备份token")
124
+ return ""
125
+
126
+
127
+ class ZAITransformer:
128
+ """ZAI转换器类"""
129
+
130
+ def __init__(self):
131
+ """初始化转换器"""
132
+ self.name = "zai"
133
+ self.base_url = "https://chat.z.ai"
134
+ self.api_url = settings.API_ENDPOINT
135
+ self.auth_url = f"{self.base_url}/api/v1/auths/"
136
+
137
+ # 模型映射
138
+ self.model_mapping = {
139
+ settings.PRIMARY_MODEL: "0727-360B-API", # GLM-4.5
140
+ settings.THINKING_MODEL: "0727-360B-API", # GLM-4.5-Thinking
141
+ settings.SEARCH_MODEL: "0727-360B-API", # GLM-4.5-Search
142
+ settings.AIR_MODEL: "0727-106B-API", # GLM-4.5-Air
143
+ }
144
+
145
+ async def get_token(self) -> str:
146
+ """异步获取认证令牌"""
147
+ if settings.ANONYMOUS_MODE:
148
+ try:
149
+ headers = get_dynamic_headers()
150
+ async with httpx.AsyncClient() as client:
151
+ response = await client.get(self.auth_url, headers=headers, timeout=10.0)
152
+ if response.status_code == 200:
153
+ data = response.json()
154
+ token = data.get("token", "")
155
+ if token:
156
+ debug_log(f"获取访客令牌成功: {token[:20]}...")
157
+ return token
158
+ except Exception as e:
159
+ debug_log(f"异步获取访客令牌失败: {e}")
160
+
161
+ # 使用token管理器获取备份令牌
162
+ token = token_manager.get_next_token()
163
+ if token:
164
+ debug_log(f"从token池获取令牌: {token[:20]}...")
165
+ return token
166
+
167
+ # 没有可用的token
168
+ debug_log("⚠️ 没有可用的备份token")
169
+ return ""
170
+
171
+ def mark_token_success(self, token: str):
172
+ """标记token使用成功"""
173
+ token_manager.mark_token_success(token)
174
+
175
+ def mark_token_failure(self, token: str, error: Exception = None):
176
+ """标记token使用失败"""
177
+ token_manager.mark_token_failed(token)
178
+
179
+ async def transform_request_in(self, request: Dict[str, Any]) -> Dict[str, Any]:
180
+ """
181
+ 转换OpenAI请求为z.ai格式
182
+ 整合现有功能:模型映射、MCP服务器等
183
+ """
184
+ debug_log(f"🔄 开始转换 OpenAI 请求到 Z.AI 格式: {request.get('model', settings.PRIMARY_MODEL)} -> Z.AI")
185
+
186
+ # 获取认证令牌
187
+ token = await self.get_token()
188
+ debug_log(f" 使用令牌: {token[:20] if token else 'None'}...")
189
+
190
+ # 检查token是否有效
191
+ if not token:
192
+ debug_log("❌ 无法获取有效的认证令牌")
193
+ raise Exception("无法获取有效的认证令牌,请检查匿名模式配置或token池配置")
194
+
195
+ # 确定请求的模型特性
196
+ requested_model = request.get("model", settings.PRIMARY_MODEL)
197
+ is_thinking = requested_model == settings.THINKING_MODEL or request.get("reasoning", False)
198
+ is_search = requested_model == settings.SEARCH_MODEL
199
+ is_air = requested_model == settings.AIR_MODEL
200
+
201
+ # 获取上游模型ID(使用模型映射)
202
+ upstream_model_id = self.model_mapping.get(requested_model, "0727-360B-API")
203
+ debug_log(f" 模型映射: {requested_model} -> {upstream_model_id}")
204
+ debug_log(f" 模型特性检测: is_search={is_search}, is_thinking={is_thinking}, is_air={is_air}")
205
+
206
+ # 处理消息列表
207
+ debug_log(f" 开始处理 {len(request.get('messages', []))} 条消息")
208
+ messages = []
209
+ for idx, orig_msg in enumerate(request.get("messages", [])):
210
+ msg = orig_msg.copy()
211
+
212
+ # 处理system角色转换
213
+ if msg.get("role") == "system":
214
+ msg["role"] = "user"
215
+ content = msg.get("content")
216
+
217
+ if isinstance(content, list):
218
+ msg["content"] = [
219
+ {"type": "text", "text": "This is a system command, you must enforce compliance."}
220
+ ] + content
221
+ elif isinstance(content, str):
222
+ msg["content"] = f"This is a system command, you must enforce compliance.{content}"
223
+
224
+ # 处理user角色的图片内容
225
+ elif msg.get("role") == "user":
226
+ content = msg.get("content")
227
+ if isinstance(content, list):
228
+ new_content = []
229
+ for part_idx, part in enumerate(content):
230
+ # 处理图片URL(支持base64和http URL)
231
+ if (
232
+ part.get("type") == "image_url"
233
+ and part.get("image_url", {}).get("url")
234
+ and isinstance(part["image_url"]["url"], str)
235
+ ):
236
+ debug_log(f" 消息[{idx}]内容[{part_idx}]: 检测到图片URL")
237
+ # 直接传递图片内容
238
+ new_content.append(part)
239
+ else:
240
+ new_content.append(part)
241
+ msg["content"] = new_content
242
+
243
+ # 处理assistant消息中的reasoning_content
244
+ elif msg.get("role") == "assistant" and msg.get("reasoning_content"):
245
+ # 如果有reasoning_content,保留它
246
+ pass
247
+
248
+ messages.append(msg)
249
+
250
+ # 构建MCP服务器列表
251
+ mcp_servers = []
252
+ if is_search:
253
+ mcp_servers.append("deep-web-search")
254
+ debug_log(f"🔍 检测到搜索模型,添加 deep-web-search MCP 服务器")
255
+
256
+ debug_log(f" MCP服务器列表: {mcp_servers}")
257
+
258
+ # 构建上游请求体
259
+ chat_id = generate_uuid()
260
+
261
+ body = {
262
+ "stream": True, # 总是使用流式
263
+ "model": upstream_model_id, # 使用映射后的模型ID
264
+ "messages": messages,
265
+ "params": {},
266
+ "features": {
267
+ "image_generation": False,
268
+ "web_search": is_search,
269
+ "auto_web_search": is_search,
270
+ "preview_mode": False,
271
+ "flags": [],
272
+ "features": [],
273
+ "enable_thinking": is_thinking,
274
+ },
275
+ "background_tasks": {
276
+ "title_generation": False,
277
+ "tags_generation": False,
278
+ },
279
+ "mcp_servers": mcp_servers, # 保留MCP服务器支持
280
+ "variables": {
281
+ "{{USER_NAME}}": "Guest",
282
+ "{{USER_LOCATION}}": "Unknown",
283
+ "{{CURRENT_DATETIME}}": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
284
+ "{{CURRENT_DATE}}": datetime.now().strftime("%Y-%m-%d"),
285
+ "{{CURRENT_TIME}}": datetime.now().strftime("%H:%M:%S"),
286
+ "{{CURRENT_WEEKDAY}}": datetime.now().strftime("%A"),
287
+ "{{CURRENT_TIMEZONE}}": "Asia/Shanghai", # 使用更合适的时区
288
+ "{{USER_LANGUAGE}}": "zh-CN",
289
+ },
290
+ "model_item": {
291
+ "id": upstream_model_id,
292
+ "name": requested_model,
293
+ "owned_by": "z.ai"
294
+ },
295
+ "chat_id": chat_id,
296
+ "id": generate_uuid(),
297
+ }
298
+
299
+ # 处理工具支持
300
+ if settings.TOOL_SUPPORT and not is_thinking and request.get("tools"):
301
+ body["tools"] = request["tools"]
302
+ debug_log(f"启用工具支持: {len(request['tools'])} 个工具")
303
+ else:
304
+ body["tools"] = None
305
+
306
+ # 构建请求配置
307
+ dynamic_headers = get_dynamic_headers(chat_id)
308
+
309
+ config = {
310
+ "url": self.api_url, # 使用原始URL
311
+ "headers": {
312
+ **dynamic_headers, # 使用动态生成的headers
313
+ "Authorization": f"Bearer {token}",
314
+ "Cache-Control": "no-cache",
315
+ "Connection": "keep-alive",
316
+ "Pragma": "no-cache",
317
+ "Sec-Fetch-Dest": "empty",
318
+ "Sec-Fetch-Mode": "cors",
319
+ "Sec-Fetch-Site": "same-origin",
320
+ },
321
+ }
322
+
323
+ debug_log("✅ 请求转换完成")
324
+
325
+ # 记录关键的请求信息用于调试
326
+ debug_log(f" 📋 发送到Z.AI的关键信息:")
327
+ debug_log(f" - 上游模型: {body['model']}")
328
+ debug_log(f" - MCP服务器: {body['mcp_servers']}")
329
+ debug_log(f" - web_search: {body['features']['web_search']}")
330
+ debug_log(f" - auto_web_search: {body['features']['auto_web_search']}")
331
+ debug_log(f" - 消息数量: {len(body['messages'])}")
332
+ tools_count = len(body.get('tools') or [])
333
+ debug_log(f" - 工具数量: {tools_count}")
334
+
335
+ # 返回转换后的请求数据
336
+ return {
337
+ "body": body,
338
+ "config": config,
339
+ "token": token
340
+ }
app/utils/sse_tool_handler.py ADDED
@@ -0,0 +1,692 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """
5
+ SSE Tool Handler - 处理工具调用的SSE流
6
+ 基于 Z.AI 原生的 edit_index 和 edit_content 机制,更原生地处理工具调用
7
+ """
8
+
9
+ import json
10
+ import re
11
+ import time
12
+ from typing import Dict, Any, Optional, Generator, List
13
+
14
+ from app.utils.helpers import debug_log
15
+
16
+
17
+ class SSEToolHandler:
18
+
19
+ def __init__(self, chat_id: str, model: str):
20
+ self.chat_id = chat_id
21
+ self.model = model
22
+
23
+ # 工具调用状态
24
+ self.has_tool_call = False
25
+ self.tool_call_usage = None # 工具调用的usage信息
26
+ self.content_index = 0
27
+ self.has_thinking = False
28
+
29
+ self.content_buffer = bytearray() # 使用字节数组提高性能
30
+ self.last_edit_index = 0 # 上次编辑的位置
31
+
32
+ # 工具调用解析状态
33
+ self.active_tools = {} # 活跃的工具调用 {tool_id: tool_info}
34
+ self.completed_tools = [] # 已完成的工具调用
35
+ self.tool_blocks_cache = {} # 缓存解析的工具块
36
+
37
+ def process_tool_call_phase(self, data: Dict[str, Any], is_stream: bool = True) -> Generator[str, None, None]:
38
+ """
39
+ 处理tool_call阶段
40
+ """
41
+ if not self.has_tool_call:
42
+ self.has_tool_call = True
43
+ debug_log("🔧 进入工具调用阶段")
44
+
45
+ edit_content = data.get("edit_content", "")
46
+ edit_index = data.get("edit_index", 0)
47
+
48
+ if not edit_content:
49
+ return
50
+
51
+ # debug_log(f"📦 接收内容片段 [index={edit_index}]: {edit_content[:1000]}...")
52
+
53
+ # 更新内容缓冲区
54
+ self._apply_edit_to_buffer(edit_index, edit_content)
55
+
56
+ # 尝试解析和处理工具调用
57
+ yield from self._process_tool_calls_from_buffer(is_stream)
58
+
59
+ def _apply_edit_to_buffer(self, edit_index: int, edit_content: str):
60
+ """
61
+ 在指定位置替换/插入内容更新内容缓冲区
62
+ """
63
+ edit_bytes = edit_content.encode('utf-8')
64
+ required_length = edit_index + len(edit_bytes)
65
+
66
+ # 扩展缓冲区到所需长度(如果需要)
67
+ if len(self.content_buffer) < edit_index:
68
+ # 如果edit_index超出当前缓冲区,用空字节填充
69
+ self.content_buffer.extend(b'\x00' * (edit_index - len(self.content_buffer)))
70
+
71
+ # 确保缓冲区足够长以容纳新内容
72
+ if len(self.content_buffer) < required_length:
73
+ self.content_buffer.extend(b'\x00' * (required_length - len(self.content_buffer)))
74
+
75
+ # 在指定位置替换内容(不是插入,而是覆盖)
76
+ end_index = edit_index + len(edit_bytes)
77
+ self.content_buffer[edit_index:end_index] = edit_bytes
78
+
79
+ # debug_log(f"📝 缓冲区更新 [index={edit_index}, 长度={len(self.content_buffer)}]")
80
+
81
+ def _process_tool_calls_from_buffer(self, is_stream: bool) -> Generator[str, None, None]:
82
+ """
83
+ 从内容缓冲区中解析和处理工具调用
84
+ """
85
+ try:
86
+ # 解码内容并清理空字节
87
+ content_str = self.content_buffer.decode('utf-8', errors='ignore').replace('\x00', '')
88
+ yield from self._extract_and_process_tools(content_str, is_stream)
89
+ except Exception as e:
90
+ debug_log(f"📦 内容解析暂时失败,等待更多数据: {e}")
91
+ # 不抛出异常,继续等待更多数据
92
+
93
+ def _extract_and_process_tools(self, content_str: str, is_stream: bool) -> Generator[str, None, None]:
94
+ """
95
+ 从内容字符串中提取和处理工具调用
96
+ """
97
+ # 查找所有 glm_block,包括不完整的
98
+ pattern = r'<glm_block\s*>(.*?)(?:</glm_block>|$)'
99
+ matches = re.findall(pattern, content_str, re.DOTALL)
100
+
101
+ for block_content in matches:
102
+ # 尝试解析每个块
103
+ yield from self._process_single_tool_block(block_content, is_stream)
104
+
105
+ def _process_single_tool_block(self, block_content: str, is_stream: bool) -> Generator[str, None, None]:
106
+ """
107
+ 处理单个工具块,支持增量解析
108
+ """
109
+ try:
110
+ # 尝试修复和解析完整的JSON
111
+ fixed_content = self._fix_json_structure(block_content)
112
+ tool_data = json.loads(fixed_content)
113
+ metadata = tool_data.get("data", {}).get("metadata", {})
114
+
115
+ tool_id = metadata.get("id", "")
116
+ tool_name = metadata.get("name", "")
117
+ arguments_raw = metadata.get("arguments", "{}")
118
+
119
+ if not tool_id or not tool_name:
120
+ return
121
+
122
+ debug_log(f"🎯 解析完整工具块: {tool_name}(id={tool_id}), 参数: {arguments_raw}")
123
+
124
+ # 检查是否是新工具或更新的工具
125
+ yield from self._handle_tool_update(tool_id, tool_name, arguments_raw, is_stream)
126
+
127
+ except json.JSONDecodeError as e:
128
+ debug_log(f"📦 JSON解析失败: {e}, 尝试部分解析")
129
+ # JSON 不完整,��试部分解析
130
+ yield from self._handle_partial_tool_block(block_content, is_stream)
131
+ except Exception as e:
132
+ debug_log(f"📦 工具块处理失败: {e}")
133
+
134
+ def _fix_json_structure(self, content: str) -> str:
135
+ """
136
+ 修复JSON结构中的常见问题
137
+ """
138
+ if not content:
139
+ return content
140
+
141
+ # 计算括号平衡
142
+ open_braces = content.count('{')
143
+ close_braces = content.count('}')
144
+
145
+ # 如果闭括号多于开括号,移除多余的闭括号
146
+ if close_braces > open_braces:
147
+ excess = close_braces - open_braces
148
+ fixed_content = content
149
+ for _ in range(excess):
150
+ # 从右侧移除多余的闭括号
151
+ last_brace_pos = fixed_content.rfind('}')
152
+ if last_brace_pos != -1:
153
+ fixed_content = fixed_content[:last_brace_pos] + fixed_content[last_brace_pos + 1:]
154
+ return fixed_content
155
+
156
+ return content
157
+
158
+ def _handle_tool_update(self, tool_id: str, tool_name: str, arguments_raw: str, is_stream: bool) -> Generator[str, None, None]:
159
+ """
160
+ 处理工具的创建或更新 - 更可靠的参数完整性检查
161
+ """
162
+ # 解析参数
163
+ try:
164
+ if isinstance(arguments_raw, str):
165
+ # 先处理转义和清理
166
+ cleaned_args = self._clean_arguments_string(arguments_raw)
167
+ arguments = json.loads(cleaned_args) if cleaned_args.strip() else {}
168
+ else:
169
+ arguments = arguments_raw
170
+ except json.JSONDecodeError:
171
+ debug_log(f"📦 参数解析失败,暂不处理: {arguments_raw}")
172
+ # 参数解析失败时,不创建或更新工具,等待更完整的数据
173
+ return
174
+
175
+ # 检查参数是否看起来完整(基本的完整性验证)
176
+ is_args_complete = self._is_arguments_complete(arguments, arguments_raw)
177
+
178
+ # 检查是否是新工具
179
+ if tool_id not in self.active_tools:
180
+ debug_log(f"🎯 发现新工具: {tool_name}(id={tool_id}), 参数完整性: {is_args_complete}")
181
+
182
+ self.active_tools[tool_id] = {
183
+ "id": tool_id,
184
+ "name": tool_name,
185
+ "arguments": arguments,
186
+ "arguments_raw": arguments_raw,
187
+ "status": "active",
188
+ "sent_start": False,
189
+ "last_sent_args": {}, # 跟踪上次发送的参数
190
+ "args_complete": is_args_complete,
191
+ "pending_send": True # 标记需要发送
192
+ }
193
+
194
+ # 只有在参数看起来完整时才发送工具开始信号
195
+ if is_stream and is_args_complete:
196
+ yield self._create_tool_start_chunk(tool_id, tool_name, arguments)
197
+ self.active_tools[tool_id]["sent_start"] = True
198
+ self.active_tools[tool_id]["last_sent_args"] = arguments.copy()
199
+ self.active_tools[tool_id]["pending_send"] = False
200
+ debug_log(f"📤 发送完整工具开始: {tool_name}(id={tool_id})")
201
+
202
+ else:
203
+ # 更新现有工具
204
+ current_tool = self.active_tools[tool_id]
205
+
206
+ # 检查是否有实质性改进
207
+ if self._is_significant_improvement(current_tool["arguments"], arguments,
208
+ current_tool["arguments_raw"], arguments_raw):
209
+ debug_log(f"🔄 工具参数有实质性改进: {tool_name}(id={tool_id})")
210
+
211
+ current_tool["arguments"] = arguments
212
+ current_tool["arguments_raw"] = arguments_raw
213
+ current_tool["args_complete"] = is_args_complete
214
+
215
+ # 如果之前没有发送过开始信号,且现在参数完整,发送开始信号
216
+ if is_stream and not current_tool["sent_start"] and is_args_complete:
217
+ yield self._create_tool_start_chunk(tool_id, tool_name, arguments)
218
+ current_tool["sent_start"] = True
219
+ current_tool["last_sent_args"] = arguments.copy()
220
+ current_tool["pending_send"] = False
221
+ debug_log(f"📤 发送延迟的工具开始: {tool_name}(id={tool_id})")
222
+
223
+ # 如果已经发送过开始信号,且参数有显著改进,发送参数更新
224
+ elif is_stream and current_tool["sent_start"] and is_args_complete:
225
+ if self._should_send_argument_update(current_tool["last_sent_args"], arguments):
226
+ yield self._create_tool_arguments_chunk(tool_id, arguments)
227
+ current_tool["last_sent_args"] = arguments.copy()
228
+ debug_log(f"📤 发送参数更新: {tool_name}(id={tool_id})")
229
+
230
+ def _is_arguments_complete(self, arguments: Dict[str, Any], arguments_raw: str) -> bool:
231
+ """
232
+ 检查参数是否看起来完整
233
+ """
234
+ if not arguments:
235
+ return False
236
+
237
+ # 检查原始字符串是否看起来完整
238
+ if not arguments_raw or not arguments_raw.strip():
239
+ return False
240
+
241
+ # 检查是否有明显的截断迹象
242
+ raw_stripped = arguments_raw.strip()
243
+
244
+ # 如果原始字符串不以}结尾,可能是截断的
245
+ if not raw_stripped.endswith('}') and not raw_stripped.endswith('"'):
246
+ return False
247
+
248
+ # 检查是否有不完整的URL(常见的截断情况)
249
+ for key, value in arguments.items():
250
+ if isinstance(value, str):
251
+ # 检查URL是否看起来完整
252
+ if 'http' in value.lower():
253
+ # 如果URL太短或以不完整的域名结尾,可能是截断的
254
+ if len(value) < 10 or value.endswith('.go') or value.endswith('.goo'):
255
+ return False
256
+
257
+ # 检查其他可能的截断迹象
258
+ if len(value) > 0 and value[-1] in ['.', '/', ':', '=']:
259
+ # 以这些字符结尾可能表示截断
260
+ return False
261
+
262
+ return True
263
+
264
+ def _is_significant_improvement(self, old_args: Dict[str, Any], new_args: Dict[str, Any],
265
+ old_raw: str, new_raw: str) -> bool:
266
+ """
267
+ 检查新参数是否比旧参数有显著改进
268
+ """
269
+ # 如果新参数为空,不是改进
270
+ if not new_args:
271
+ return False
272
+
273
+ if len(new_args) > len(old_args):
274
+ return True
275
+
276
+ # 检查值的改进
277
+ for key, new_value in new_args.items():
278
+ old_value = old_args.get(key, "")
279
+
280
+ if isinstance(new_value, str) and isinstance(old_value, str):
281
+ # 如果新值明显更长且更完整,是改进
282
+ if len(new_value) > len(old_value) + 5: # 至少长5个字符才算显著改进
283
+ return True
284
+
285
+ # 如果旧值看起来是截断的,新值更完整,是改进
286
+ if old_value.endswith(('.go', '.goo', '.com/', 'http')) and len(new_value) > len(old_value):
287
+ return True
288
+
289
+ # 检查原始字符串的改进
290
+ if len(new_raw) > len(old_raw) + 10: # 原始字符串显著增长
291
+ return True
292
+
293
+ return False
294
+
295
+ def _should_send_argument_update(self, last_sent: Dict[str, Any], new_args: Dict[str, Any]) -> bool:
296
+ """
297
+ 判断是否应该发送参数更新 - 更严格的标准
298
+ """
299
+ # 如果参数完全相同,不发送
300
+ if last_sent == new_args:
301
+ return False
302
+
303
+ # 如果新参数为空但之前有参数,不发送(避免倒退)
304
+ if not new_args and last_sent:
305
+ return False
306
+
307
+ # 如果新参数有更多键,发送更新
308
+ if len(new_args) > len(last_sent):
309
+ return True
310
+
311
+ # 检查是否有值变得显著更完整
312
+ for key, new_value in new_args.items():
313
+ last_value = last_sent.get(key, "")
314
+ if isinstance(new_value, str) and isinstance(last_value, str):
315
+ # 只有在值显著增长时才发送更新(避免微小变化)
316
+ if len(new_value) > len(last_value) + 5:
317
+ return True
318
+ elif new_value != last_value and new_value: # 确保新值不为空
319
+ return True
320
+
321
+ return False
322
+
323
+ def _handle_partial_tool_block(self, block_content: str, is_stream: bool) -> Generator[str, None, None]:
324
+ """
325
+ 处理不完整的工具块,尝试提取可用信息
326
+ """
327
+ try:
328
+ # 尝试提取工具ID和名称
329
+ id_match = re.search(r'"id":\s*"([^"]+)"', block_content)
330
+ name_match = re.search(r'"name":\s*"([^"]+)"', block_content)
331
+
332
+ if id_match and name_match:
333
+ tool_id = id_match.group(1)
334
+ tool_name = name_match.group(1)
335
+
336
+ # 尝试提取参数部分
337
+ args_match = re.search(r'"arguments":\s*"([^"]*)', block_content)
338
+ partial_args = args_match.group(1) if args_match else ""
339
+
340
+ debug_log(f"📦 部分工具块: {tool_name}(id={tool_id}), 部分参数: {partial_args[:50]}")
341
+
342
+ # 如果是新工具,先创建记录
343
+ if tool_id not in self.active_tools:
344
+ # 尝试解析部分参数为字典
345
+ partial_args_dict = self._parse_partial_arguments(partial_args)
346
+
347
+ self.active_tools[tool_id] = {
348
+ "id": tool_id,
349
+ "name": tool_name,
350
+ "arguments": partial_args_dict,
351
+ "status": "partial",
352
+ "sent_start": False,
353
+ "last_sent_args": {},
354
+ "args_complete": False,
355
+ "partial_args": partial_args
356
+ }
357
+
358
+ if is_stream:
359
+ yield self._create_tool_start_chunk(tool_id, tool_name, partial_args_dict)
360
+ self.active_tools[tool_id]["sent_start"] = True
361
+ self.active_tools[tool_id]["last_sent_args"] = partial_args_dict.copy()
362
+ else:
363
+ # 更新部分参数
364
+ self.active_tools[tool_id]["partial_args"] = partial_args
365
+ # 尝试更新解析的参数
366
+ new_partial_dict = self._parse_partial_arguments(partial_args)
367
+ if new_partial_dict != self.active_tools[tool_id]["arguments"]:
368
+ self.active_tools[tool_id]["arguments"] = new_partial_dict
369
+
370
+ except Exception as e:
371
+ debug_log(f"📦 部分块解析失败: {e}")
372
+
373
+ def _clean_arguments_string(self, arguments_raw: str) -> str:
374
+ """
375
+ 清理和标准化参数字符串,改进对不完整JSON的处理
376
+ """
377
+ if not arguments_raw:
378
+ return "{}"
379
+
380
+ # 移除首尾空白
381
+ cleaned = arguments_raw.strip()
382
+
383
+ # 处理特殊值
384
+ if cleaned.lower() == "null":
385
+ return "{}"
386
+
387
+ # 处理转义的JSON字符串
388
+ if cleaned.startswith('{\\"') and cleaned.endswith('\\"}'):
389
+ # 这是一个转义的JSON字符串,需要反转义
390
+ cleaned = cleaned.replace('\\"', '"')
391
+ elif cleaned.startswith('"{\\"') and cleaned.endswith('\\"}'):
392
+ # 双重转义的情况
393
+ cleaned = cleaned[1:-1].replace('\\"', '"')
394
+ elif cleaned.startswith('"') and cleaned.endswith('"'):
395
+ # 简单的引号包围,去除外层引号
396
+ cleaned = cleaned[1:-1]
397
+
398
+ # 处理不完整的JSON字符串
399
+ cleaned = self._fix_incomplete_json(cleaned)
400
+
401
+ # 标准化空格(移除JSON中的多余空格,但保留字符串值中的空格)
402
+ try:
403
+ # 先尝试解析,然后重新序列化以标准化格式
404
+ parsed = json.loads(cleaned)
405
+ if parsed is None:
406
+ return "{}"
407
+ cleaned = json.dumps(parsed, ensure_ascii=False, separators=(',', ':'))
408
+ except json.JSONDecodeError:
409
+ # 如果解析失败,只做基本的空格清理
410
+ debug_log(f"📦 JSON标准化失败,保持原样: {cleaned[:50]}...")
411
+
412
+ return cleaned
413
+
414
+ def _fix_incomplete_json(self, json_str: str) -> str:
415
+ """
416
+ 修复不完整的JSON字符串
417
+ """
418
+ if not json_str:
419
+ return "{}"
420
+
421
+ # 确保以{开头
422
+ if not json_str.startswith('{'):
423
+ json_str = '{' + json_str
424
+
425
+ # 处理不完整的字符串值
426
+ if json_str.count('"') % 2 != 0:
427
+ # 奇数个引号,可能有未闭合的字符串
428
+ json_str += '"'
429
+
430
+ # 确保以}结尾
431
+ if not json_str.endswith('}'):
432
+ json_str += '}'
433
+
434
+ return json_str
435
+
436
+ def _parse_partial_arguments(self, arguments_raw: str) -> Dict[str, Any]:
437
+ """
438
+ 解析不完整的参数字符串,尽可能提取有效信息
439
+ """
440
+ if not arguments_raw or arguments_raw.strip() == "" or arguments_raw.strip().lower() == "null":
441
+ return {}
442
+
443
+ try:
444
+ # 先尝试清理字符串
445
+ cleaned = self._clean_arguments_string(arguments_raw)
446
+ result = json.loads(cleaned)
447
+ # 确保返回字典类型
448
+ return result if isinstance(result, dict) else {}
449
+ except json.JSONDecodeError:
450
+ pass
451
+
452
+ try:
453
+ # 尝试修复常见的JSON问题
454
+ fixed_args = arguments_raw.strip()
455
+
456
+ # 处理转义字符
457
+ if '\\' in fixed_args:
458
+ fixed_args = fixed_args.replace('\\"', '"')
459
+
460
+ # 如果不是以{开头,添加{
461
+ if not fixed_args.startswith('{'):
462
+ fixed_args = '{' + fixed_args
463
+
464
+ # 如果不是以}结尾,尝试添加}
465
+ if not fixed_args.endswith('}'):
466
+ # 计算未闭合的引号和括号
467
+ quote_count = fixed_args.count('"') - fixed_args.count('\\"')
468
+ if quote_count % 2 != 0:
469
+ fixed_args += '"'
470
+ fixed_args += '}'
471
+
472
+ return json.loads(fixed_args)
473
+ except json.JSONDecodeError:
474
+ # 尝试提取键值对
475
+ return self._extract_key_value_pairs(arguments_raw)
476
+ except Exception:
477
+ # 如果所有方法都失败,返回空字典
478
+ return {}
479
+
480
+ def _extract_key_value_pairs(self, text: str) -> Dict[str, Any]:
481
+ """
482
+ 从文本中提取键值对,作为最后的解析尝试
483
+ """
484
+ result = {}
485
+ try:
486
+ # 使用正则表达式提取简单的键值对
487
+ import re
488
+
489
+ # 匹配 "key": "value" 或 "key": value 格式
490
+ pattern = r'"([^"]+)":\s*"([^"]*)"'
491
+ matches = re.findall(pattern, text)
492
+
493
+ for key, value in matches:
494
+ result[key] = value
495
+
496
+ # 匹配数字值
497
+ pattern = r'"([^"]+)":\s*(\d+)'
498
+ matches = re.findall(pattern, text)
499
+
500
+ for key, value in matches:
501
+ try:
502
+ result[key] = int(value)
503
+ except ValueError:
504
+ result[key] = value
505
+
506
+ # 匹配布尔值
507
+ pattern = r'"([^"]+)":\s*(true|false)'
508
+ matches = re.findall(pattern, text)
509
+
510
+ for key, value in matches:
511
+ result[key] = value.lower() == 'true'
512
+
513
+ except Exception:
514
+ pass
515
+
516
+ return result
517
+
518
+ def _complete_active_tools(self, is_stream: bool) -> Generator[str, None, None]:
519
+ """
520
+ 完成所有活跃的工具调用 - 处理待发送的工具
521
+ """
522
+ tools_to_send = []
523
+
524
+ for tool_id, tool in self.active_tools.items():
525
+ # 如果工具还没有发送过且参数看起来完整,现在发送
526
+ if is_stream and tool.get("pending_send", False) and not tool.get("sent_start", False):
527
+ if tool.get("args_complete", False):
528
+ debug_log(f"📤 完成时发送待发送工具: {tool['name']}(id={tool_id})")
529
+ yield self._create_tool_start_chunk(tool_id, tool["name"], tool["arguments"])
530
+ tool["sent_start"] = True
531
+ tool["pending_send"] = False
532
+ tools_to_send.append(tool)
533
+ else:
534
+ debug_log(f"⚠️ 跳过不完整的工具: {tool['name']}(id={tool_id})")
535
+
536
+ tool["status"] = "completed"
537
+ self.completed_tools.append(tool)
538
+ debug_log(f"✅ 完成工具调用: {tool['name']}(id={tool_id})")
539
+
540
+ self.active_tools.clear()
541
+
542
+ if is_stream and (self.completed_tools or tools_to_send):
543
+ # 发送工具完成信号
544
+ yield self._create_tool_finish_chunk()
545
+
546
+ def process_other_phase(self, data: Dict[str, Any], is_stream: bool = True) -> Generator[str, None, None]:
547
+ """
548
+ 处理other阶段 - 检测工具调用结束和状态更新
549
+ """
550
+ edit_content = data.get("edit_content", "")
551
+ edit_index = data.get("edit_index", 0)
552
+ usage = data.get("usage")
553
+
554
+ # 保存usage信息
555
+ if self.has_tool_call and usage:
556
+ self.tool_call_usage = usage
557
+ debug_log(f"💾 保存工具调用usage: {usage}")
558
+
559
+ # 如果有edit_content,继续更新内容缓冲区
560
+ if edit_content:
561
+ self._apply_edit_to_buffer(edit_index, edit_content)
562
+ # 继续处理可能的工具调用更新
563
+ yield from self._process_tool_calls_from_buffer(is_stream)
564
+
565
+ # 检测工具调用结束的多种标记
566
+ if self.has_tool_call and self._is_tool_call_finished(edit_content):
567
+ debug_log("🏁 检测到工具调用结束")
568
+
569
+ # 完成所有活跃的工具
570
+ yield from self._complete_active_tools(is_stream)
571
+
572
+ if is_stream:
573
+ debug_log("🏁 发送工具调用完成信号")
574
+ yield "data: [DONE]"
575
+
576
+ # 重置工具调用状态
577
+ self.has_tool_call = False
578
+
579
+ def _is_tool_call_finished(self, edit_content: str) -> bool:
580
+ """
581
+ 检测工具调用是否结束的多种标记
582
+ """
583
+ if not edit_content:
584
+ return False
585
+
586
+ # 检测各种结束标记
587
+ end_markers = [
588
+ "null,", # 原有的结束标记
589
+ '"status": "completed"', # 状态完成标记
590
+ '"is_error": false', # 错误状态标记
591
+ ]
592
+
593
+ for marker in end_markers:
594
+ if marker in edit_content:
595
+ debug_log(f"🔍 检测到结束标记: {marker}")
596
+ return True
597
+
598
+ # 检查是否所有工具都有完整的结构
599
+ if self.active_tools and '"status": "completed"' in self.content_buffer:
600
+ return True
601
+
602
+ return False
603
+
604
+ def _reset_all_state(self):
605
+ """重置所有状态"""
606
+ self.has_tool_call = False
607
+ self.tool_call_usage = None
608
+ self.content_index = 0
609
+ self.content_buffer = bytearray()
610
+ self.last_edit_index = 0
611
+ self.active_tools.clear()
612
+ self.completed_tools.clear()
613
+ self.tool_blocks_cache.clear()
614
+
615
+ def _create_tool_start_chunk(self, tool_id: str, tool_name: str, initial_args: Dict[str, Any] = None) -> str:
616
+ """创建工具调用开始的chunk,支持初始参数"""
617
+ # 使用提供的初始参数,如果没有则使用空字典
618
+ args_dict = initial_args or {}
619
+ args_str = json.dumps(args_dict, ensure_ascii=False)
620
+
621
+ chunk = {
622
+ "choices": [
623
+ {
624
+ "delta": {
625
+ "role": "assistant",
626
+ "content": None,
627
+ "tool_calls": [
628
+ {
629
+ "id": tool_id,
630
+ "type": "function",
631
+ "function": {"name": tool_name, "arguments": args_str},
632
+ }
633
+ ],
634
+ },
635
+ "finish_reason": None,
636
+ "index": self.content_index,
637
+ "logprobs": None,
638
+ }
639
+ ],
640
+ "created": int(time.time()),
641
+ "id": self.chat_id,
642
+ "model": self.model,
643
+ "object": "chat.completion.chunk",
644
+ "system_fingerprint": "fp_zai_001",
645
+ }
646
+ return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
647
+
648
+ def _create_tool_arguments_chunk(self, tool_id: str, arguments: Dict) -> str:
649
+ """创建工具参数的chunk - 只包含参数更新,不包含函数名"""
650
+ chunk = {
651
+ "choices": [
652
+ {
653
+ "delta": {
654
+ "tool_calls": [
655
+ {
656
+ "id": tool_id,
657
+ "function": {"arguments": json.dumps(arguments, ensure_ascii=False)},
658
+ }
659
+ ],
660
+ },
661
+ "finish_reason": None,
662
+ "index": self.content_index,
663
+ "logprobs": None,
664
+ }
665
+ ],
666
+ "created": int(time.time()),
667
+ "id": self.chat_id,
668
+ "model": self.model,
669
+ "object": "chat.completion.chunk",
670
+ "system_fingerprint": "fp_zai_001",
671
+ }
672
+ return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
673
+
674
+ def _create_tool_finish_chunk(self) -> str:
675
+ """创建工具调用完成的chunk"""
676
+ chunk = {
677
+ "choices": [
678
+ {
679
+ "delta": {"role": "assistant", "content": None, "tool_calls": []},
680
+ "finish_reason": "tool_calls",
681
+ "index": 0,
682
+ "logprobs": None,
683
+ }
684
+ ],
685
+ "created": int(time.time()),
686
+ "id": self.chat_id,
687
+ "usage": self.tool_call_usage or None,
688
+ "model": self.model,
689
+ "object": "chat.completion.chunk",
690
+ "system_fingerprint": "fp_zai_001",
691
+ }
692
+ return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
pyproject.toml DELETED
@@ -1,64 +0,0 @@
1
- [build-system]
2
- requires = ["hatchling"]
3
- build-backend = "hatchling.build"
4
-
5
- [project]
6
- name = "z-ai2api-python"
7
- version = "0.1.0"
8
- description = "一个为 Z.ai 提供 OpenAI 兼容接口的 Python 代理服务"
9
- readme = "README.md"
10
- requires-python = ">=3.9,<=3.12"
11
- license = { text = "MIT" }
12
- authors = [{ name = "Contributors" }]
13
- classifiers = [
14
- "Development Status :: 4 - Beta",
15
- "Intended Audience :: Developers",
16
- "License :: OSI Approved :: MIT License",
17
- "Operating System :: OS Independent",
18
- "Programming Language :: Python :: 3",
19
- "Programming Language :: Python :: 3.9",
20
- "Programming Language :: Python :: 3.10",
21
- "Programming Language :: Python :: 3.11",
22
- "Programming Language :: Python :: 3.12",
23
- "Topic :: Internet :: WWW/HTTP :: HTTP Servers",
24
- "Topic :: Software Development :: Libraries :: Python Modules",
25
- ]
26
- dependencies = [
27
- "fastapi==0.104.1",
28
- "granian[reload]==2.5.2",
29
- "requests==2.32.5",
30
- "pydantic==2.11.7",
31
- "pydantic-settings==2.10.1",
32
- "pydantic-core==2.33.2",
33
- "typing-inspection==0.4.1",
34
- "fake-useragent==2.2.0",
35
- ]
36
-
37
- [project.scripts]
38
- z-ai2api = "main:app"
39
-
40
- [tool.hatch.build.targets.wheel]
41
- packages = ["."]
42
-
43
- [tool.uv]
44
- dev-dependencies = [
45
- "pytest>=7.0.0",
46
- "pytest-asyncio>=0.21.0",
47
- "requests>=2.30.0",
48
- "ruff>=0.1.0",
49
- ]
50
-
51
- [tool.ruff]
52
- line-length = 88
53
- target-version = "py38"
54
- select = ["E", "F", "I", "B"]
55
- ignore = []
56
-
57
- [tool.ruff.isort]
58
- known-first-party = []
59
-
60
- [tool.pytest.ini_options]
61
- asyncio_mode = "auto"
62
- testpaths = ["tests"]
63
- python_files = ["test_*.py"]
64
- python_functions = ["test_*"]