ZyphrZero commited on
Commit
2ae0d3b
·
1 Parent(s): 9752c22

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +599 -494
main.py CHANGED
@@ -1,11 +1,18 @@
1
  # -*- coding: utf-8 -*-
2
 
 
 
 
 
 
 
 
 
3
  import json
4
  import re
5
  import time
6
  from datetime import datetime
7
- from typing import Dict, List, Optional, Any, Union, Generator
8
- from urllib.parse import urljoin
9
 
10
  import requests
11
  from fastapi import FastAPI, Request, Response, HTTPException, Header
@@ -13,68 +20,199 @@ from fastapi.responses import StreamingResponse, JSONResponse
13
  from pydantic import BaseModel, Field
14
 
15
 
16
- # 配置常量
17
- UPSTREAM_URL = "https://chat.z.ai/api/chat/completions"
18
- DEFAULT_KEY = "sk-tbkFoKzk9a531YyUNNF5"
19
- UPSTREAM_TOKEN = "eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IjMxNmJjYjQ4LWZmMmYtNGExNS04NTNkLWYyYTI5YjY3ZmYwZiIsImVtYWlsIjoiR3Vlc3QtMTc1NTg0ODU4ODc4OEBndWVzdC5jb20ifQ.PktllDySS3trlyuFpTeIZf-7hl8Qu1qYF3BxjgIul0BrNux2nX9hVzIjthLXKMWAf9V0qM8Vm_iyDqkjPGsaiQ"
20
- DEFAULT_MODEL_NAME = "GLM-4.5"
21
- THINKING_MODEL_NAME = "GLM-4.5-Thinking"
22
- SEARCH_MODEL_NAME = "GLM-4.5-Search"
23
- PORT = 8080
24
- DEBUG_MODE = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- # 思考内容处理策略
27
- THINK_TAGS_MODE = "think" # strip: 去除<details>标签;think: 转为<think>标签;raw: 保留原样
28
 
29
- # 伪装前端头部
30
- X_FE_VERSION = "prod-fe-1.0.70"
31
- BROWSER_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36 Edg/139.0.0.0"
32
- SEC_CH_UA = '"Not;A=Brand";v="99", "Microsoft Edge";v="139", "Chromium";v="139"'
33
- SEC_CH_UA_MOB = "?0"
34
- SEC_CH_UA_PLAT = '"Windows"'
35
- ORIGIN_BASE = "https://chat.z.ai"
36
 
37
- # 匿名token开关
38
- ANON_TOKEN_ENABLED = True
39
 
 
 
 
40
 
41
- # SSE 解析生成器
42
  class SSEParser:
43
- """统一的 SSE (Server-Sent Events) 解析生成器"""
44
 
45
- def __init__(self, response, debug_mode=False):
46
- """初始化 SSE 解析器
47
 
48
  Args:
49
- response: requests.Response 对象,需要设置 stream=True
50
- debug_mode: 是否启用调试模式
51
  """
52
  self.response = response
53
  self.debug_mode = debug_mode
54
  self.buffer = ""
55
  self.line_count = 0
56
 
57
- def debug_log(self, format_str: str, *args):
58
- """调试日志"""
59
  if self.debug_mode:
60
  print(f"[SSE_PARSER] {format_str % args}")
61
 
62
- def iter_events(self):
63
- """生成器,逐个产生 SSE 事件
64
 
65
  Yields:
66
- dict: 解析后的 SSE 事件数据
67
  """
68
  self.debug_log("开始解析 SSE 流")
69
 
70
  for line in self.response.iter_lines():
71
  self.line_count += 1
72
 
73
- # 处理空行
74
  if not line:
75
  continue
76
 
77
- # 解码字节串
78
  if isinstance(line, bytes):
79
  try:
80
  line = line.decode('utf-8')
@@ -82,21 +220,20 @@ class SSEParser:
82
  self.debug_log(f"第{self.line_count}行解码失败,跳过")
83
  continue
84
 
85
- # 处理注释行
86
  if line.startswith(':'):
87
  continue
88
 
89
- # 解析字段
90
  if ':' in line:
91
  field, value = line.split(':', 1)
92
  field = field.strip()
93
- value = value.lstrip() # 去掉冒号后的空格
94
 
95
  if field == 'data':
96
- # 处理数据字段
97
  self.debug_log(f"收到数据 (第{self.line_count}行): {value}")
98
 
99
- # 尝试解析 JSON
100
  try:
101
  data = json.loads(value)
102
  yield {
@@ -105,7 +242,6 @@ class SSEParser:
105
  'raw': value
106
  }
107
  except json.JSONDecodeError:
108
- # 不是 JSON,作为原始数据返回
109
  yield {
110
  'type': 'data',
111
  'data': value,
@@ -114,54 +250,37 @@ class SSEParser:
114
  }
115
 
116
  elif field == 'event':
117
- # 处理事件类型
118
- yield {
119
- 'type': 'event',
120
- 'event': value
121
- }
122
 
123
  elif field == 'id':
124
- # 处理事件 ID
125
- yield {
126
- 'type': 'id',
127
- 'id': value
128
- }
129
 
130
  elif field == 'retry':
131
- # 处理重试时间
132
  try:
133
  retry = int(value)
134
- yield {
135
- 'type': 'retry',
136
- 'retry': retry
137
- }
138
  except ValueError:
139
  self.debug_log(f"无效的 retry 值: {value}")
140
 
141
- def iter_data_only(self):
142
- """生成器,只产生数据事件
143
-
144
- Yields:
145
- dict: 仅包含数据的 SSE 事件
146
- """
147
  for event in self.iter_events():
148
  if event['type'] == 'data':
149
  yield event
150
 
151
- def iter_json_data(self, model_class=None):
152
- """生成器,只产生 JSON 数据事件
153
 
154
  Args:
155
- model_class: 可选的 Pydantic 模型类,用于验证数据
156
 
157
  Yields:
158
- dict: 包含解析后的 JSON 数据的事件
159
  """
160
  for event in self.iter_events():
161
  if event['type'] == 'data' and event.get('is_json', True):
162
  try:
163
  if model_class:
164
- # 使用 Pydantic 模型验证
165
  data = model_class.model_validate_json(event['raw'])
166
  yield {
167
  'type': 'data',
@@ -174,195 +293,454 @@ class SSEParser:
174
  self.debug_log(f"数据验证失败: {e}")
175
  continue
176
 
177
- def close(self):
178
- """关闭响应连接"""
179
  if hasattr(self.response, 'close'):
180
  self.response.close()
181
 
182
  def __enter__(self):
183
- """支持上下文管理器"""
184
  return self
185
 
186
- def __exit__(self, exc_type, exc_val, exc_tb):
187
- """退出上下文时自动关闭连接"""
188
  self.close()
189
 
190
 
191
- # 数据结构定义
192
- class Message(BaseModel):
193
- role: str
194
- content: str
195
- reasoning_content: Optional[str] = None
196
 
197
-
198
- class OpenAIRequest(BaseModel):
199
- model: str
200
- messages: List[Message]
201
- stream: Optional[bool] = False
202
- temperature: Optional[float] = None
203
- max_tokens: Optional[int] = None
204
-
205
-
206
- class ModelItem(BaseModel):
207
- id: str
208
- name: str
209
- owned_by: str
210
 
211
 
212
- class UpstreamRequest(BaseModel):
213
- stream: bool
214
- model: str
215
- messages: List[Message]
216
- params: Dict[str, Any] = {}
217
- features: Dict[str, Any] = {}
218
- background_tasks: Optional[Dict[str, bool]] = None
219
- chat_id: Optional[str] = None
220
- id: Optional[str] = None
221
- mcp_servers: Optional[List[str]] = None
222
- model_item: Optional[ModelItem] = None
223
- tool_servers: Optional[List[str]] = None
224
- variables: Optional[Dict[str, str]] = None
225
- model_config = {'protected_namespaces': ()}
226
 
227
 
228
- class Delta(BaseModel):
229
- role: Optional[str] = None
230
- content: Optional[str] = None
231
- reasoning_content: Optional[str] = None
232
-
233
-
234
- class Choice(BaseModel):
235
- index: int
236
- message: Optional[Message] = None
237
- delta: Optional[Delta] = None
238
- finish_reason: Optional[str] = None
 
 
 
 
 
 
 
239
 
240
 
241
- class Usage(BaseModel):
242
- prompt_tokens: int = 0
243
- completion_tokens: int = 0
244
- total_tokens: int = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
245
 
246
 
247
- class OpenAIResponse(BaseModel):
248
- id: str
249
- object: str
250
- created: int
251
- model: str
252
- choices: List[Choice]
253
- usage: Optional[Usage] = None
 
 
 
 
254
 
255
 
256
- class UpstreamError(BaseModel):
257
- detail: str
258
- code: int
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
 
261
- class UpstreamDataInner(BaseModel):
262
- error: Optional[UpstreamError] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
 
265
- class UpstreamDataData(BaseModel):
266
- delta_content: str = ""
267
- edit_content: str = ""
268
- phase: str = ""
269
- done: bool = False
270
- usage: Optional[Usage] = None
271
- error: Optional[UpstreamError] = None
272
- inner: Optional[UpstreamDataInner] = None
 
 
 
273
 
274
 
275
- class UpstreamData(BaseModel):
276
- type: str
277
- data: UpstreamDataData
278
- error: Optional[UpstreamError] = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
 
281
- class Model(BaseModel):
282
- id: str
283
- object: str = "model"
284
- created: int
285
- owned_by: str
286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
- class ModelsResponse(BaseModel):
289
- object: str = "list"
290
- data: List[Model]
291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
- # FastAPI应用
294
- app = FastAPI()
295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
- # 调试日志函数
298
- def debug_log(format_str: str, *args):
299
- if DEBUG_MODE:
300
- print(f"[DEBUG] {format_str % args}")
301
 
 
 
 
302
 
303
- # 获取匿名token
304
- def get_anonymous_token() -> str:
305
- """获取匿名token(每次对话使用不同token,避免共享记忆)"""
306
- headers = {
307
- "User-Agent": BROWSER_UA,
308
- "Accept": "*/*",
309
- "Accept-Language": "zh-CN,zh;q=0.9",
310
- "X-FE-Version": X_FE_VERSION,
311
- "sec-ch-ua": SEC_CH_UA,
312
- "sec-ch-ua-mobile": SEC_CH_UA_MOB,
313
- "sec-ch-ua-platform": SEC_CH_UA_PLAT,
314
- "Origin": ORIGIN_BASE,
315
- "Referer": f"{ORIGIN_BASE}/",
316
- }
317
-
318
- response = requests.get(f"{ORIGIN_BASE}/api/v1/auths/", headers=headers, timeout=10.0)
319
-
320
- if response.status_code != 200:
321
- raise Exception(f"anon token status={response.status_code}")
322
-
323
- data = response.json()
324
- token = data.get("token")
325
- if not token:
326
- raise Exception("anon token empty")
327
-
328
- return token
329
 
330
 
331
- # CORS中间件
332
  @app.middleware("http")
333
  async def add_cors_headers(request: Request, call_next):
 
334
  response = await call_next(request)
335
- response.headers["Access-Control-Allow-Origin"] = "*"
336
- response.headers["Access-Control-Allow-Methods"] = "GET, POST, PUT, DELETE, OPTIONS"
337
- response.headers["Access-Control-Allow-Headers"] = "Content-Type, Authorization"
338
- response.headers["Access-Control-Allow-Credentials"] = "true"
 
 
339
  return response
340
 
341
 
342
- # OPTIONS处理器
 
 
 
343
  @app.options("/")
344
  async def handle_options():
 
345
  return Response(status_code=200)
346
 
347
 
348
- # 模型列表接口
 
 
 
 
 
349
  @app.get("/v1/models")
350
- async def handle_models():
 
 
351
  response = ModelsResponse(
352
  data=[
353
  Model(
354
- id=DEFAULT_MODEL_NAME,
355
- created=int(time.time()),
356
  owned_by="z.ai"
357
  ),
358
  Model(
359
- id=THINKING_MODEL_NAME,
360
- created=int(time.time()),
361
  owned_by="z.ai"
362
  ),
363
  Model(
364
- id=SEARCH_MODEL_NAME,
365
- created=int(time.time()),
366
  owned_by="z.ai"
367
  ),
368
  ]
@@ -370,42 +748,41 @@ async def handle_models():
370
  return response
371
 
372
 
373
- # 聊天完成接口
374
  @app.post("/v1/chat/completions")
375
- async def handle_chat_completions(
376
  request: OpenAIRequest,
377
  authorization: str = Header(...)
378
  ):
 
379
  debug_log("收到chat completions请求")
380
 
381
- # 验证API Key
382
  if not authorization.startswith("Bearer "):
383
  debug_log("缺少或无效的Authorization头")
384
  raise HTTPException(status_code=401, detail="Missing or invalid Authorization header")
385
 
386
- api_key = authorization[7:] # 去掉"Bearer "
387
- if api_key != DEFAULT_KEY:
388
  debug_log(f"无效的API key: {api_key}")
389
  raise HTTPException(status_code=401, detail="Invalid API key")
390
 
391
  debug_log("API key验证通过")
392
  debug_log(f"请求解析成功 - 模型: {request.model}, 流式: {request.stream}, 消息数: {len(request.messages)}")
393
 
394
- # 生成会话相关ID
395
- chat_id = f"{int(time.time() * 1000)}-{int(time.time())}"
396
- msg_id = str(int(time.time() * 1000000))
397
 
398
- # 确定模型特性
399
- is_thinking = request.model == THINKING_MODEL_NAME
400
- is_search = request.model == SEARCH_MODEL_NAME
401
  search_mcp = "deep-web-search" if is_search else ""
402
 
403
- # 构造上游请求
404
  upstream_req = UpstreamRequest(
405
- stream=True, # 总是使用流式从上游获取
406
  chat_id=chat_id,
407
  id=msg_id,
408
- model="0727-360B-API", # 上游实际模型ID
409
  messages=request.messages,
410
  params={},
411
  features={
@@ -431,20 +808,14 @@ async def handle_chat_completions(
431
  }
432
  )
433
 
434
- # 选择本次对话使用的token
435
- auth_token = UPSTREAM_TOKEN
436
- if ANON_TOKEN_ENABLED:
437
- try:
438
- token = get_anonymous_token()
439
- auth_token = token
440
- debug_log(f"匿名token获取成功: {token[:10]}...")
441
- except Exception as e:
442
- debug_log(f"匿名token获取失败,回退固定token: {e}")
443
 
444
- # 调用上游API
445
  if request.stream:
 
446
  return StreamingResponse(
447
- handle_stream_response(upstream_req, chat_id, auth_token),
448
  media_type="text/event-stream",
449
  headers={
450
  "Cache-Control": "no-cache",
@@ -452,280 +823,14 @@ async def handle_chat_completions(
452
  }
453
  )
454
  else:
455
- return handle_non_stream_response(upstream_req, chat_id, auth_token)
456
-
457
-
458
- def call_upstream_with_headers(upstream_req: UpstreamRequest, referer_chat_id: str, auth_token: str) -> requests.Response:
459
- """调用上游API"""
460
- headers = {
461
- "Content-Type": "application/json",
462
- "Accept": "application/json, text/event-stream",
463
- "User-Agent": BROWSER_UA,
464
- "Authorization": f"Bearer {auth_token}",
465
- "Accept-Language": "zh-CN",
466
- "sec-ch-ua": SEC_CH_UA,
467
- "sec-ch-ua-mobile": SEC_CH_UA_MOB,
468
- "sec-ch-ua-platform": SEC_CH_UA_PLAT,
469
- "X-FE-Version": X_FE_VERSION,
470
- "Origin": ORIGIN_BASE,
471
- "Referer": f"{ORIGIN_BASE}/c/{referer_chat_id}",
472
- }
473
-
474
- debug_log(f"调用上游API: {UPSTREAM_URL}")
475
- debug_log(f"上游请求体: {upstream_req.model_dump_json()}")
476
-
477
- response = requests.post(
478
- UPSTREAM_URL,
479
- json=upstream_req.model_dump(exclude_none=True),
480
- headers=headers,
481
- timeout=60.0,
482
- stream=True
483
- )
484
-
485
- debug_log(f"上游响应状态: {response.status_code}")
486
- return response
487
-
488
-
489
- def transform_thinking(s: str) -> str:
490
- """转换思考内容"""
491
- # 去 <summary>…</summary>
492
- s = re.sub(r'(?s)<summary>.*?</summary>', '', s)
493
- # 清理残留自定义标签
494
- s = s.replace("</thinking>", "").replace("<Full>", "").replace("</Full>", "")
495
- s = s.strip()
496
-
497
- if THINK_TAGS_MODE == "think":
498
- s = re.sub(r'<details[^>]*>', '<think>', s)
499
- s = s.replace("</details>", "</think>")
500
- elif THINK_TAGS_MODE == "strip":
501
- s = re.sub(r'<details[^>]*>', '', s)
502
- s = s.replace("</details>", "")
503
-
504
- # 处理每行前缀 "> "
505
- s = s.lstrip("> ")
506
- s = s.replace("\n> ", "\n")
507
- return s.strip()
508
-
509
-
510
- def handle_stream_response(upstream_req: UpstreamRequest, chat_id: str, auth_token: str):
511
- """处理流式响应"""
512
- debug_log(f"开始处理流式响应 (chat_id={chat_id})")
513
-
514
- try:
515
- response = call_upstream_with_headers(upstream_req, chat_id, auth_token)
516
- except Exception as e:
517
- debug_log(f"调用上游失败: {e}")
518
- yield "data: {\"error\": \"Failed to call upstream\"}\n\n"
519
- return
520
-
521
- if response.status_code != 200:
522
- debug_log(f"上游返回错误状态: {response.status_code}")
523
- if DEBUG_MODE:
524
- debug_log(f"上游错误响应: {response.text}")
525
- yield "data: {\"error\": \"Upstream error\"}\n\n"
526
- return
527
-
528
- # 发送第一个chunk(role)
529
- first_chunk = OpenAIResponse(
530
- id=f"chatcmpl-{int(time.time())}",
531
- object="chat.completion.chunk",
532
- created=int(time.time()),
533
- model=DEFAULT_MODEL_NAME,
534
- choices=[Choice(
535
- index=0,
536
- delta=Delta(role="assistant")
537
- )]
538
- )
539
- yield f"data: {first_chunk.model_dump_json()}\n\n"
540
-
541
- # 使用 SSE 解析器处理流
542
- debug_log("开始读取上游SSE流")
543
- sent_initial_answer = False
544
-
545
- with SSEParser(response, debug_mode=DEBUG_MODE) as parser:
546
- for event in parser.iter_json_data(UpstreamData):
547
- upstream_data = event['data']
548
-
549
- # 错误检测
550
- if (upstream_data.error or
551
- upstream_data.data.error or
552
- (upstream_data.data.inner and upstream_data.data.inner.error)):
553
-
554
- err_obj = upstream_data.error or upstream_data.data.error
555
- if not err_obj and upstream_data.data.inner:
556
- err_obj = upstream_data.data.inner.error
557
-
558
- debug_log(f"上游错误: code={err_obj.code}, detail={err_obj.detail}")
559
-
560
- # 结束下游流
561
- end_chunk = OpenAIResponse(
562
- id=f"chatcmpl-{int(time.time())}",
563
- object="chat.completion.chunk",
564
- created=int(time.time()),
565
- model=DEFAULT_MODEL_NAME,
566
- choices=[Choice(
567
- index=0,
568
- delta=Delta(),
569
- finish_reason="stop"
570
- )]
571
- )
572
- yield f"data: {end_chunk.model_dump_json()}\n\n"
573
- yield "data: [DONE]\n\n"
574
- break
575
-
576
- debug_log(f"解析成功 - 类型: {upstream_data.type}, 阶段: {upstream_data.data.phase}, "
577
- f"内容长度: {len(upstream_data.data.delta_content)}, 完成: {upstream_data.data.done}")
578
-
579
- # 处理EditContent在最初的answer信息(只发送一次)
580
- if (not sent_initial_answer and
581
- upstream_data.data.edit_content and
582
- upstream_data.data.phase == "answer"):
583
-
584
- out = upstream_data.data.edit_content
585
- if out:
586
- parts = out.split("</details>")
587
- if len(parts) > 1:
588
- content = parts[1]
589
- if content:
590
- debug_log(f"发送普通内容: {content}")
591
- chunk = OpenAIResponse(
592
- id=f"chatcmpl-{int(time.time())}",
593
- object="chat.completion.chunk",
594
- created=int(time.time()),
595
- model=DEFAULT_MODEL_NAME,
596
- choices=[Choice(
597
- index=0,
598
- delta=Delta(content=content)
599
- )]
600
- )
601
- yield f"data: {chunk.model_dump_json()}\n\n"
602
- sent_initial_answer = True
603
-
604
- # 处理DeltaContent
605
- if upstream_data.data.delta_content:
606
- out = upstream_data.data.delta_content
607
-
608
- if upstream_data.data.phase == "thinking":
609
- out = transform_thinking(out)
610
- # 思考内容使用 reasoning_content 字段
611
- if out:
612
- debug_log(f"发送思考内容: {out}")
613
- chunk = OpenAIResponse(
614
- id=f"chatcmpl-{int(time.time())}",
615
- object="chat.completion.chunk",
616
- created=int(time.time()),
617
- model=DEFAULT_MODEL_NAME,
618
- choices=[Choice(
619
- index=0,
620
- delta=Delta(reasoning_content=out)
621
- )]
622
- )
623
- yield f"data: {chunk.model_dump_json()}\n\n"
624
- else:
625
- # 普通内容使用 content 字段
626
- if out:
627
- debug_log(f"发送普通内容: {out}")
628
- chunk = OpenAIResponse(
629
- id=f"chatcmpl-{int(time.time())}",
630
- object="chat.completion.chunk",
631
- created=int(time.time()),
632
- model=DEFAULT_MODEL_NAME,
633
- choices=[Choice(
634
- index=0,
635
- delta=Delta(content=out)
636
- )]
637
- )
638
- yield f"data: {chunk.model_dump_json()}\n\n"
639
-
640
- # 检查是否结束
641
- if upstream_data.data.done or upstream_data.data.phase == "done":
642
- debug_log("检测到流结束信号")
643
-
644
- # 发送结束chunk
645
- end_chunk = OpenAIResponse(
646
- id=f"chatcmpl-{int(time.time())}",
647
- object="chat.completion.chunk",
648
- created=int(time.time()),
649
- model=DEFAULT_MODEL_NAME,
650
- choices=[Choice(
651
- index=0,
652
- delta=Delta(),
653
- finish_reason="stop"
654
- )]
655
- )
656
- yield f"data: {end_chunk.model_dump_json()}\n\n"
657
- yield "data: [DONE]\n\n"
658
- debug_log(f"流式响应完成")
659
- break
660
-
661
-
662
- def handle_non_stream_response(upstream_req: UpstreamRequest, chat_id: str, auth_token: str) -> JSONResponse:
663
- """处理非流式响应"""
664
- debug_log(f"开始处理非流式响应 (chat_id={chat_id})")
665
-
666
- try:
667
- response = call_upstream_with_headers(upstream_req, chat_id, auth_token)
668
- except Exception as e:
669
- debug_log(f"调用上游失败: {e}")
670
- raise HTTPException(status_code=502, detail="Failed to call upstream")
671
-
672
- if response.status_code != 200:
673
- debug_log(f"上游返回错误状态: {response.status_code}")
674
- if DEBUG_MODE:
675
- debug_log(f"上游错误响应: {response.text}")
676
- raise HTTPException(status_code=502, detail="Upstream error")
677
-
678
- # 收集完整响应
679
- full_content = []
680
- debug_log("开始收集完整响应内容")
681
-
682
- with SSEParser(response, debug_mode=DEBUG_MODE) as parser:
683
- for event in parser.iter_json_data(UpstreamData):
684
- upstream_data = event['data']
685
-
686
- if upstream_data.data.delta_content:
687
- out = upstream_data.data.delta_content
688
-
689
- if upstream_data.data.phase == "thinking":
690
- out = transform_thinking(out)
691
-
692
- if out:
693
- full_content.append(out)
694
-
695
- if upstream_data.data.done or upstream_data.data.phase == "done":
696
- debug_log("检测到完成信号,停止收集")
697
- break
698
-
699
- final_content = "".join(full_content)
700
- debug_log(f"内容收集完成,最终长度: {len(final_content)}")
701
-
702
- # 构造完整响应
703
- response_data = OpenAIResponse(
704
- id=f"chatcmpl-{int(time.time())}",
705
- object="chat.completion",
706
- created=int(time.time()),
707
- model=DEFAULT_MODEL_NAME,
708
- choices=[Choice(
709
- index=0,
710
- message=Message(
711
- role="assistant",
712
- content=final_content
713
- ),
714
- finish_reason="stop"
715
- )],
716
- usage=Usage()
717
- )
718
-
719
- debug_log("非流式响应发送完成")
720
- return JSONResponse(content=response_data.model_dump(exclude_none=True))
721
-
722
 
723
- # 根路径处理器
724
- @app.get("/")
725
- async def root():
726
- return {"message": "OpenAI Compatible API Server"}
727
 
 
 
 
728
 
729
  if __name__ == "__main__":
730
  import uvicorn
731
- uvicorn.run(app, host="0.0.0.0", port=PORT)
 
1
  # -*- coding: utf-8 -*-
2
 
3
+ """
4
+ OpenAI Compatible API Server for Z.AI
5
+ =====================================
6
+
7
+ This module provides an OpenAI-compatible API server that forwards requests
8
+ to the Z.AI chat service with proper authentication and response formatting.
9
+ """
10
+
11
  import json
12
  import re
13
  import time
14
  from datetime import datetime
15
+ from typing import Dict, List, Optional, Any, Union, Generator, Tuple
 
16
 
17
  import requests
18
  from fastapi import FastAPI, Request, Response, HTTPException, Header
 
20
  from pydantic import BaseModel, Field
21
 
22
 
23
+ # =============================================================================
24
+ # Configuration Constants
25
+ # =============================================================================
26
+
27
+ class Config:
28
+ """Centralized configuration constants"""
29
+
30
+ # API Configuration
31
+ UPSTREAM_URL: str = "https://chat.z.ai/api/chat/completions"
32
+ DEFAULT_KEY: str = "sk-tbkFoKzk9a531YyUNNF5"
33
+ UPSTREAM_TOKEN: str = "eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6IjMxNmJjYjQ4LWZmMmYtNGExNS04NTNkLWYyYTI5YjY3ZmYwZiIsImVtYWlsIjoiR3Vlc3QtMTc1NTg0ODU4ODc4OEBndWVzdC5jb20ifQ.PktllDySS3trlyuFpTeIZf-7hl8Qu1qYF3BxjgIul0BrNux2nX9hVzIjthLXKMWAf9V0qM8Vm_iyDqkjPGsaiQ"
34
+
35
+ # Model Configuration
36
+ DEFAULT_MODEL_NAME: str = "GLM-4.5"
37
+ THINKING_MODEL_NAME: str = "GLM-4.5-Thinking"
38
+ SEARCH_MODEL_NAME: str = "GLM-4.5-Search"
39
+
40
+ # Server Configuration
41
+ PORT: int = 8080
42
+ DEBUG_MODE: bool = True
43
+
44
+ # Feature Configuration
45
+ THINK_TAGS_MODE: str = "think" # strip: 去除<details>标签;think: 转为<think>标签;raw: 保留原样
46
+ ANON_TOKEN_ENABLED: bool = True
47
+
48
+ # Browser Headers
49
+ X_FE_VERSION: str = "prod-fe-1.0.70"
50
+ BROWSER_UA: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36 Edg/139.0.0.0"
51
+ SEC_CH_UA: str = '"Not;A=Brand";v="99", "Microsoft Edge";v="139", "Chromium";v="139"'
52
+ SEC_CH_UA_MOB: str = "?0"
53
+ SEC_CH_UA_PLAT: str = '"Windows"'
54
+ ORIGIN_BASE: str = "https://chat.z.ai"
55
+
56
+
57
+ # =============================================================================
58
+ # Data Models
59
+ # =============================================================================
60
+
61
+ class Message(BaseModel):
62
+ """Chat message model"""
63
+ role: str
64
+ content: str
65
+ reasoning_content: Optional[str] = None
66
+
67
+
68
+ class OpenAIRequest(BaseModel):
69
+ """OpenAI-compatible request model"""
70
+ model: str
71
+ messages: List[Message]
72
+ stream: Optional[bool] = False
73
+ temperature: Optional[float] = None
74
+ max_tokens: Optional[int] = None
75
+
76
+
77
+ class ModelItem(BaseModel):
78
+ """Model information item"""
79
+ id: str
80
+ name: str
81
+ owned_by: str
82
+
83
+
84
+ class UpstreamRequest(BaseModel):
85
+ """Upstream service request model"""
86
+ stream: bool
87
+ model: str
88
+ messages: List[Message]
89
+ params: Dict[str, Any] = {}
90
+ features: Dict[str, Any] = {}
91
+ background_tasks: Optional[Dict[str, bool]] = None
92
+ chat_id: Optional[str] = None
93
+ id: Optional[str] = None
94
+ mcp_servers: Optional[List[str]] = None
95
+ model_item: Optional[ModelItem] = None
96
+ tool_servers: Optional[List[str]] = None
97
+ variables: Optional[Dict[str, str]] = None
98
+ model_config = {'protected_namespaces': ()}
99
+
100
+
101
+ class Delta(BaseModel):
102
+ """Stream delta model"""
103
+ role: Optional[str] = None
104
+ content: Optional[str] = None
105
+ reasoning_content: Optional[str] = None
106
+
107
+
108
+ class Choice(BaseModel):
109
+ """Response choice model"""
110
+ index: int
111
+ message: Optional[Message] = None
112
+ delta: Optional[Delta] = None
113
+ finish_reason: Optional[str] = None
114
+
115
+
116
+ class Usage(BaseModel):
117
+ """Token usage statistics"""
118
+ prompt_tokens: int = 0
119
+ completion_tokens: int = 0
120
+ total_tokens: int = 0
121
+
122
+
123
+ class OpenAIResponse(BaseModel):
124
+ """OpenAI-compatible response model"""
125
+ id: str
126
+ object: str
127
+ created: int
128
+ model: str
129
+ choices: List[Choice]
130
+ usage: Optional[Usage] = None
131
+
132
+
133
+ class UpstreamError(BaseModel):
134
+ """Upstream error model"""
135
+ detail: str
136
+ code: int
137
+
138
+
139
+ class UpstreamDataInner(BaseModel):
140
+ """Inner upstream data model"""
141
+ error: Optional[UpstreamError] = None
142
+
143
+
144
+ class UpstreamDataData(BaseModel):
145
+ """Upstream data content model"""
146
+ delta_content: str = ""
147
+ edit_content: str = ""
148
+ phase: str = ""
149
+ done: bool = False
150
+ usage: Optional[Usage] = None
151
+ error: Optional[UpstreamError] = None
152
+ inner: Optional[UpstreamDataInner] = None
153
+
154
+
155
+ class UpstreamData(BaseModel):
156
+ """Upstream data model"""
157
+ type: str
158
+ data: UpstreamDataData
159
+ error: Optional[UpstreamError] = None
160
+
161
+
162
+ class Model(BaseModel):
163
+ """Model information for listing"""
164
+ id: str
165
+ object: str = "model"
166
+ created: int
167
+ owned_by: str
168
 
 
 
169
 
170
+ class ModelsResponse(BaseModel):
171
+ """Models list response model"""
172
+ object: str = "list"
173
+ data: List[Model]
 
 
 
174
 
 
 
175
 
176
+ # =============================================================================
177
+ # SSE Parser
178
+ # =============================================================================
179
 
 
180
  class SSEParser:
181
+ """Server-Sent Events parser for streaming responses"""
182
 
183
+ def __init__(self, response: requests.Response, debug_mode: bool = False):
184
+ """Initialize SSE parser
185
 
186
  Args:
187
+ response: requests.Response object with stream=True
188
+ debug_mode: Enable debug logging
189
  """
190
  self.response = response
191
  self.debug_mode = debug_mode
192
  self.buffer = ""
193
  self.line_count = 0
194
 
195
+ def debug_log(self, format_str: str, *args) -> None:
196
+ """Log debug message if debug mode is enabled"""
197
  if self.debug_mode:
198
  print(f"[SSE_PARSER] {format_str % args}")
199
 
200
+ def iter_events(self) -> Generator[Dict[str, Any], None, None]:
201
+ """Iterate over SSE events
202
 
203
  Yields:
204
+ dict: Parsed SSE event data
205
  """
206
  self.debug_log("开始解析 SSE 流")
207
 
208
  for line in self.response.iter_lines():
209
  self.line_count += 1
210
 
211
+ # Skip empty lines
212
  if not line:
213
  continue
214
 
215
+ # Decode bytes
216
  if isinstance(line, bytes):
217
  try:
218
  line = line.decode('utf-8')
 
220
  self.debug_log(f"第{self.line_count}行解码失败,跳过")
221
  continue
222
 
223
+ # Skip comment lines
224
  if line.startswith(':'):
225
  continue
226
 
227
+ # Parse field-value pairs
228
  if ':' in line:
229
  field, value = line.split(':', 1)
230
  field = field.strip()
231
+ value = value.lstrip()
232
 
233
  if field == 'data':
 
234
  self.debug_log(f"收到数据 (第{self.line_count}行): {value}")
235
 
236
+ # Try to parse JSON
237
  try:
238
  data = json.loads(value)
239
  yield {
 
242
  'raw': value
243
  }
244
  except json.JSONDecodeError:
 
245
  yield {
246
  'type': 'data',
247
  'data': value,
 
250
  }
251
 
252
  elif field == 'event':
253
+ yield {'type': 'event', 'event': value}
 
 
 
 
254
 
255
  elif field == 'id':
256
+ yield {'type': 'id', 'id': value}
 
 
 
 
257
 
258
  elif field == 'retry':
 
259
  try:
260
  retry = int(value)
261
+ yield {'type': 'retry', 'retry': retry}
 
 
 
262
  except ValueError:
263
  self.debug_log(f"无效的 retry 值: {value}")
264
 
265
+ def iter_data_only(self) -> Generator[Dict[str, Any], None, None]:
266
+ """Iterate only over data events"""
 
 
 
 
267
  for event in self.iter_events():
268
  if event['type'] == 'data':
269
  yield event
270
 
271
+ def iter_json_data(self, model_class: Optional[type] = None) -> Generator[Dict[str, Any], None, None]:
272
+ """Iterate only over JSON data events with optional validation
273
 
274
  Args:
275
+ model_class: Optional Pydantic model class for validation
276
 
277
  Yields:
278
+ dict: JSON data events
279
  """
280
  for event in self.iter_events():
281
  if event['type'] == 'data' and event.get('is_json', True):
282
  try:
283
  if model_class:
 
284
  data = model_class.model_validate_json(event['raw'])
285
  yield {
286
  'type': 'data',
 
293
  self.debug_log(f"数据验证失败: {e}")
294
  continue
295
 
296
+ def close(self) -> None:
297
+ """Close the response connection"""
298
  if hasattr(self.response, 'close'):
299
  self.response.close()
300
 
301
  def __enter__(self):
302
+ """Context manager entry"""
303
  return self
304
 
305
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
306
+ """Context manager exit"""
307
  self.close()
308
 
309
 
310
+ # =============================================================================
311
+ # Utility Functions
312
+ # =============================================================================
 
 
313
 
314
+ def debug_log(message: str, *args) -> None:
315
+ """Log debug message if debug mode is enabled"""
316
+ if Config.DEBUG_MODE:
317
+ print(f"[DEBUG] {message % args}")
 
 
 
 
 
 
 
 
 
318
 
319
 
320
+ def generate_request_ids() -> Tuple[str, str]:
321
+ """Generate unique IDs for chat and message"""
322
+ timestamp = int(time.time())
323
+ chat_id = f"{timestamp * 1000}-{timestamp}"
324
+ msg_id = str(timestamp * 1000000)
325
+ return chat_id, msg_id
 
 
 
 
 
 
 
 
326
 
327
 
328
+ def get_browser_headers(referer_chat_id: str = "") -> Dict[str, str]:
329
+ """Get browser headers for API requests"""
330
+ headers = {
331
+ "Content-Type": "application/json",
332
+ "Accept": "application/json, text/event-stream",
333
+ "User-Agent": Config.BROWSER_UA,
334
+ "Accept-Language": "zh-CN",
335
+ "sec-ch-ua": Config.SEC_CH_UA,
336
+ "sec-ch-ua-mobile": Config.SEC_CH_UA_MOB,
337
+ "sec-ch-ua-platform": Config.SEC_CH_UA_PLAT,
338
+ "X-FE-Version": Config.X_FE_VERSION,
339
+ "Origin": Config.ORIGIN_BASE,
340
+ }
341
+
342
+ if referer_chat_id:
343
+ headers["Referer"] = f"{Config.ORIGIN_BASE}/c/{referer_chat_id}"
344
+
345
+ return headers
346
 
347
 
348
+ def get_anonymous_token() -> str:
349
+ """Get anonymous token for authentication"""
350
+ headers = get_browser_headers()
351
+ headers.update({
352
+ "Accept": "*/*",
353
+ "Accept-Language": "zh-CN,zh;q=0.9",
354
+ "Referer": f"{Config.ORIGIN_BASE}/",
355
+ })
356
+
357
+ try:
358
+ response = requests.get(
359
+ f"{Config.ORIGIN_BASE}/api/v1/auths/",
360
+ headers=headers,
361
+ timeout=10.0
362
+ )
363
+
364
+ if response.status_code != 200:
365
+ raise Exception(f"anon token status={response.status_code}")
366
+
367
+ data = response.json()
368
+ token = data.get("token")
369
+ if not token:
370
+ raise Exception("anon token empty")
371
+
372
+ return token
373
+ except Exception as e:
374
+ debug_log(f"获取匿名token失败: {e}")
375
+ raise
376
 
377
 
378
+ def get_auth_token() -> str:
379
+ """Get authentication token (anonymous or fixed)"""
380
+ if Config.ANON_TOKEN_ENABLED:
381
+ try:
382
+ token = get_anonymous_token()
383
+ debug_log(f"匿名token获取成功: {token[:10]}...")
384
+ return token
385
+ except Exception as e:
386
+ debug_log(f"匿名token获取失败,回退固定token: {e}")
387
+
388
+ return Config.UPSTREAM_TOKEN
389
 
390
 
391
+ def transform_thinking_content(content: str) -> str:
392
+ """Transform thinking content according to configuration"""
393
+ # Remove summary tags
394
+ content = re.sub(r'(?s)<summary>.*?</summary>', '', content)
395
+ # Clean up remaining tags
396
+ content = content.replace("</thinking>", "").replace("<Full>", "").replace("</Full>", "")
397
+ content = content.strip()
398
+
399
+ if Config.THINK_TAGS_MODE == "think":
400
+ content = re.sub(r'<details[^>]*>', '<think>', content)
401
+ content = content.replace("</details>", "</think>")
402
+ elif Config.THINK_TAGS_MODE == "strip":
403
+ content = re.sub(r'<details[^>]*>', '', content)
404
+ content = content.replace("</details>", "")
405
+
406
+ # Remove line prefixes
407
+ content = content.lstrip("> ")
408
+ content = content.replace("\n> ", "\n")
409
+
410
+ return content.strip()
411
 
412
 
413
+ def create_openai_response_chunk(
414
+ model: str,
415
+ delta: Optional[Delta] = None,
416
+ finish_reason: Optional[str] = None
417
+ ) -> OpenAIResponse:
418
+ """Create OpenAI response chunk for streaming"""
419
+ return OpenAIResponse(
420
+ id=f"chatcmpl-{int(time.time())}",
421
+ object="chat.completion.chunk",
422
+ created=int(time.time()),
423
+ model=model,
424
+ choices=[Choice(
425
+ index=0,
426
+ delta=delta or Delta(),
427
+ finish_reason=finish_reason
428
+ )]
429
+ )
430
 
431
 
432
+ def handle_upstream_error(error: UpstreamError) -> Generator[str, None, None]:
433
+ """Handle upstream error response"""
434
+ debug_log(f"上游错误: code={error.code}, detail={error.detail}")
435
+
436
+ # Send end chunk
437
+ end_chunk = create_openai_response_chunk(
438
+ model=Config.DEFAULT_MODEL_NAME,
439
+ finish_reason="stop"
440
+ )
441
+ yield f"data: {end_chunk.model_dump_json()}\n\n"
442
+ yield "data: [DONE]\n\n"
443
 
444
 
445
+ def call_upstream_api(
446
+ upstream_req: UpstreamRequest,
447
+ chat_id: str,
448
+ auth_token: str
449
+ ) -> requests.Response:
450
+ """Call upstream API with proper headers"""
451
+ headers = get_browser_headers(chat_id)
452
+ headers["Authorization"] = f"Bearer {auth_token}"
453
+
454
+ debug_log(f"调用上游API: {Config.UPSTREAM_URL}")
455
+ debug_log(f"上游请求体: {upstream_req.model_dump_json()}")
456
+
457
+ response = requests.post(
458
+ Config.UPSTREAM_URL,
459
+ json=upstream_req.model_dump(exclude_none=True),
460
+ headers=headers,
461
+ timeout=60.0,
462
+ stream=True
463
+ )
464
+
465
+ debug_log(f"上游响应状态: {response.status_code}")
466
+ return response
467
 
468
 
469
+ # =============================================================================
470
+ # Response Handlers
471
+ # =============================================================================
 
 
472
 
473
+ class ResponseHandler:
474
+ """Base class for response handling"""
475
+
476
+ def __init__(self, upstream_req: UpstreamRequest, chat_id: str, auth_token: str):
477
+ self.upstream_req = upstream_req
478
+ self.chat_id = chat_id
479
+ self.auth_token = auth_token
480
+
481
+ def _call_upstream(self) -> requests.Response:
482
+ """Call upstream API with error handling"""
483
+ try:
484
+ return call_upstream_api(self.upstream_req, self.chat_id, self.auth_token)
485
+ except Exception as e:
486
+ debug_log(f"调用上游失败: {e}")
487
+ raise
488
+
489
+ def _handle_upstream_error(self, response: requests.Response) -> None:
490
+ """Handle upstream error response"""
491
+ debug_log(f"上游返回错误状态: {response.status_code}")
492
+ if Config.DEBUG_MODE:
493
+ debug_log(f"上游错误响应: {response.text}")
494
 
 
 
 
495
 
496
+ class StreamResponseHandler(ResponseHandler):
497
+ """Handler for streaming responses"""
498
+
499
+ def handle(self) -> Generator[str, None, None]:
500
+ """Handle streaming response"""
501
+ debug_log(f"开始处理流式响应 (chat_id={self.chat_id})")
502
+
503
+ try:
504
+ response = self._call_upstream()
505
+ except Exception:
506
+ yield "data: {\"error\": \"Failed to call upstream\"}\n\n"
507
+ return
508
+
509
+ if response.status_code != 200:
510
+ self._handle_upstream_error(response)
511
+ yield "data: {\"error\": \"Upstream error\"}\n\n"
512
+ return
513
+
514
+ # Send initial role chunk
515
+ first_chunk = create_openai_response_chunk(
516
+ model=Config.DEFAULT_MODEL_NAME,
517
+ delta=Delta(role="assistant")
518
+ )
519
+ yield f"data: {first_chunk.model_dump_json()}\n\n"
520
+
521
+ # Process stream
522
+ debug_log("开始读取上游SSE流")
523
+ sent_initial_answer = False
524
+
525
+ with SSEParser(response, debug_mode=Config.DEBUG_MODE) as parser:
526
+ for event in parser.iter_json_data(UpstreamData):
527
+ upstream_data = event['data']
528
+
529
+ # Check for errors
530
+ if self._has_error(upstream_data):
531
+ error = self._get_error(upstream_data)
532
+ yield from handle_upstream_error(error)
533
+ break
534
+
535
+ debug_log(f"解析成功 - 类型: {upstream_data.type}, 阶段: {upstream_data.data.phase}, "
536
+ f"内容长度: {len(upstream_data.data.delta_content)}, 完成: {upstream_data.data.done}")
537
+
538
+ # Process content
539
+ yield from self._process_content(upstream_data, sent_initial_answer)
540
+
541
+ # Check if done
542
+ if upstream_data.data.done or upstream_data.data.phase == "done":
543
+ debug_log("检测到流结束信号")
544
+ yield from self._send_end_chunk()
545
+ break
546
+
547
+ def _has_error(self, upstream_data: UpstreamData) -> bool:
548
+ """Check if upstream data contains error"""
549
+ return bool(
550
+ upstream_data.error or
551
+ upstream_data.data.error or
552
+ (upstream_data.data.inner and upstream_data.data.inner.error)
553
+ )
554
+
555
+ def _get_error(self, upstream_data: UpstreamData) -> UpstreamError:
556
+ """Get error from upstream data"""
557
+ return (
558
+ upstream_data.error or
559
+ upstream_data.data.error or
560
+ (upstream_data.data.inner.error if upstream_data.data.inner else None)
561
+ )
562
+
563
+ def _process_content(
564
+ self,
565
+ upstream_data: UpstreamData,
566
+ sent_initial_answer: bool
567
+ ) -> Generator[str, None, None]:
568
+ """Process content from upstream data"""
569
+ # Handle initial answer content
570
+ if (not sent_initial_answer and
571
+ upstream_data.data.edit_content and
572
+ upstream_data.data.phase == "answer"):
573
+
574
+ content = self._extract_edit_content(upstream_data.data.edit_content)
575
+ if content:
576
+ debug_log(f"发送普通内容: {content}")
577
+ chunk = create_openai_response_chunk(
578
+ model=Config.DEFAULT_MODEL_NAME,
579
+ delta=Delta(content=content)
580
+ )
581
+ yield f"data: {chunk.model_dump_json()}\n\n"
582
+ sent_initial_answer = True
583
+
584
+ # Handle delta content
585
+ if upstream_data.data.delta_content:
586
+ content = upstream_data.data.delta_content
587
+
588
+ if upstream_data.data.phase == "thinking":
589
+ content = transform_thinking_content(content)
590
+ if content:
591
+ debug_log(f"发送思考内容: {content}")
592
+ chunk = create_openai_response_chunk(
593
+ model=Config.DEFAULT_MODEL_NAME,
594
+ delta=Delta(reasoning_content=content)
595
+ )
596
+ yield f"data: {chunk.model_dump_json()}\n\n"
597
+ else:
598
+ if content:
599
+ debug_log(f"发送普通内容: {content}")
600
+ chunk = create_openai_response_chunk(
601
+ model=Config.DEFAULT_MODEL_NAME,
602
+ delta=Delta(content=content)
603
+ )
604
+ yield f"data: {chunk.model_dump_json()}\n\n"
605
+
606
+ def _extract_edit_content(self, edit_content: str) -> str:
607
+ """Extract content from edit_content field"""
608
+ parts = edit_content.split("</details>")
609
+ return parts[1] if len(parts) > 1 else ""
610
+
611
+ def _send_end_chunk(self) -> Generator[str, None, None]:
612
+ """Send end chunk and DONE signal"""
613
+ end_chunk = create_openai_response_chunk(
614
+ model=Config.DEFAULT_MODEL_NAME,
615
+ finish_reason="stop"
616
+ )
617
+ yield f"data: {end_chunk.model_dump_json()}\n\n"
618
+ yield "data: [DONE]\n\n"
619
+ debug_log("流式响应完成")
620
 
 
 
621
 
622
+ class NonStreamResponseHandler(ResponseHandler):
623
+ """Handler for non-streaming responses"""
624
+
625
+ def handle(self) -> JSONResponse:
626
+ """Handle non-streaming response"""
627
+ debug_log(f"开始处理非流式响应 (chat_id={self.chat_id})")
628
+
629
+ try:
630
+ response = self._call_upstream()
631
+ except Exception as e:
632
+ debug_log(f"调用上游失败: {e}")
633
+ raise HTTPException(status_code=502, detail="Failed to call upstream")
634
+
635
+ if response.status_code != 200:
636
+ self._handle_upstream_error(response)
637
+ raise HTTPException(status_code=502, detail="Upstream error")
638
+
639
+ # Collect full response
640
+ full_content = []
641
+ debug_log("开始收集完整响应内容")
642
+
643
+ with SSEParser(response, debug_mode=Config.DEBUG_MODE) as parser:
644
+ for event in parser.iter_json_data(UpstreamData):
645
+ upstream_data = event['data']
646
+
647
+ if upstream_data.data.delta_content:
648
+ content = upstream_data.data.delta_content
649
+
650
+ if upstream_data.data.phase == "thinking":
651
+ content = transform_thinking_content(content)
652
+
653
+ if content:
654
+ full_content.append(content)
655
+
656
+ if upstream_data.data.done or upstream_data.data.phase == "done":
657
+ debug_log("检测到完成信号,停止收集")
658
+ break
659
+
660
+ final_content = "".join(full_content)
661
+ debug_log(f"内容收集完成,最终长度: {len(final_content)}")
662
+
663
+ # Build response
664
+ response_data = OpenAIResponse(
665
+ id=f"chatcmpl-{int(time.time())}",
666
+ object="chat.completion",
667
+ created=int(time.time()),
668
+ model=Config.DEFAULT_MODEL_NAME,
669
+ choices=[Choice(
670
+ index=0,
671
+ message=Message(
672
+ role="assistant",
673
+ content=final_content
674
+ ),
675
+ finish_reason="stop"
676
+ )],
677
+ usage=Usage()
678
+ )
679
+
680
+ debug_log("非流式响应发送完成")
681
+ return JSONResponse(content=response_data.model_dump(exclude_none=True))
682
 
 
 
 
 
683
 
684
+ # =============================================================================
685
+ # FastAPI Application
686
+ # =============================================================================
687
 
688
+ app = FastAPI(
689
+ title="OpenAI Compatible API Server",
690
+ description="An OpenAI-compatible API server for Z.AI chat service",
691
+ version="1.0.0"
692
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
693
 
694
 
695
+ # CORS middleware
696
  @app.middleware("http")
697
  async def add_cors_headers(request: Request, call_next):
698
+ """Add CORS headers to responses"""
699
  response = await call_next(request)
700
+ response.headers.update({
701
+ "Access-Control-Allow-Origin": "*",
702
+ "Access-Control-Allow-Methods": "GET, POST, PUT, DELETE, OPTIONS",
703
+ "Access-Control-Allow-Headers": "Content-Type, Authorization",
704
+ "Access-Control-Allow-Credentials": "true"
705
+ })
706
  return response
707
 
708
 
709
+ # =============================================================================
710
+ # API Endpoints
711
+ # =============================================================================
712
+
713
  @app.options("/")
714
  async def handle_options():
715
+ """Handle OPTIONS requests"""
716
  return Response(status_code=200)
717
 
718
 
719
+ @app.get("/")
720
+ async def root():
721
+ """Root endpoint"""
722
+ return {"message": "OpenAI Compatible API Server"}
723
+
724
+
725
  @app.get("/v1/models")
726
+ async def list_models():
727
+ """List available models"""
728
+ current_time = int(time.time())
729
  response = ModelsResponse(
730
  data=[
731
  Model(
732
+ id=Config.DEFAULT_MODEL_NAME,
733
+ created=current_time,
734
  owned_by="z.ai"
735
  ),
736
  Model(
737
+ id=Config.THINKING_MODEL_NAME,
738
+ created=current_time,
739
  owned_by="z.ai"
740
  ),
741
  Model(
742
+ id=Config.SEARCH_MODEL_NAME,
743
+ created=current_time,
744
  owned_by="z.ai"
745
  ),
746
  ]
 
748
  return response
749
 
750
 
 
751
  @app.post("/v1/chat/completions")
752
+ async def chat_completions(
753
  request: OpenAIRequest,
754
  authorization: str = Header(...)
755
  ):
756
+ """Handle chat completion requests"""
757
  debug_log("收到chat completions请求")
758
 
759
+ # Validate API key
760
  if not authorization.startswith("Bearer "):
761
  debug_log("缺少或无效的Authorization头")
762
  raise HTTPException(status_code=401, detail="Missing or invalid Authorization header")
763
 
764
+ api_key = authorization[7:]
765
+ if api_key != Config.DEFAULT_KEY:
766
  debug_log(f"无效的API key: {api_key}")
767
  raise HTTPException(status_code=401, detail="Invalid API key")
768
 
769
  debug_log("API key验证通过")
770
  debug_log(f"请求解析成功 - 模型: {request.model}, 流式: {request.stream}, 消息数: {len(request.messages)}")
771
 
772
+ # Generate IDs
773
+ chat_id, msg_id = generate_request_ids()
 
774
 
775
+ # Determine model features
776
+ is_thinking = request.model == Config.THINKING_MODEL_NAME
777
+ is_search = request.model == Config.SEARCH_MODEL_NAME
778
  search_mcp = "deep-web-search" if is_search else ""
779
 
780
+ # Build upstream request
781
  upstream_req = UpstreamRequest(
782
+ stream=True, # Always use streaming from upstream
783
  chat_id=chat_id,
784
  id=msg_id,
785
+ model="0727-360B-API", # Actual upstream model ID
786
  messages=request.messages,
787
  params={},
788
  features={
 
808
  }
809
  )
810
 
811
+ # Get authentication token
812
+ auth_token = get_auth_token()
 
 
 
 
 
 
 
813
 
814
+ # Handle response based on stream flag
815
  if request.stream:
816
+ handler = StreamResponseHandler(upstream_req, chat_id, auth_token)
817
  return StreamingResponse(
818
+ handler.handle(),
819
  media_type="text/event-stream",
820
  headers={
821
  "Cache-Control": "no-cache",
 
823
  }
824
  )
825
  else:
826
+ handler = NonStreamResponseHandler(upstream_req, chat_id, auth_token)
827
+ return handler.handle()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
828
 
 
 
 
 
829
 
830
+ # =============================================================================
831
+ # Main Entry Point
832
+ # =============================================================================
833
 
834
  if __name__ == "__main__":
835
  import uvicorn
836
+ uvicorn.run("main:app", host="0.0.0.0", port=Config.PORT, reload=True)