bluewinliang commited on
Commit
5b62571
·
verified ·
1 Parent(s): b5903c1

Upload 2 files

Browse files
Files changed (2) hide show
  1. main.py +38 -12
  2. proxy_handler.py +81 -142
main.py CHANGED
@@ -9,8 +9,8 @@ from fastapi.middleware.cors import CORSMiddleware
9
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
10
 
11
  from config import settings
12
- from models import ChatCompletionRequest, ModelsResponse, ModelInfo, ErrorResponse
13
- from proxy_handler import ProxyHandler
14
  from cookie_manager import cookie_manager
15
 
16
  # Configure logging
@@ -20,12 +20,25 @@ logging.basicConfig(
20
  )
21
  logger = logging.getLogger(__name__)
22
 
 
 
 
 
 
23
  # Security
24
  security = HTTPBearer(auto_error=False)
25
 
26
  @asynccontextmanager
27
  async def lifespan(app: FastAPI):
28
  """Application lifespan manager"""
 
 
 
 
 
 
 
 
29
  # Start background tasks
30
  health_check_task = asyncio.create_task(cookie_manager.periodic_health_check())
31
 
@@ -33,11 +46,19 @@ async def lifespan(app: FastAPI):
33
  yield
34
  finally:
35
  # Cleanup
 
36
  health_check_task.cancel()
37
  try:
38
  await health_check_task
39
  except asyncio.CancelledError:
40
- pass
 
 
 
 
 
 
 
41
 
42
  # Create FastAPI app
43
  app = FastAPI(
@@ -61,7 +82,6 @@ async def verify_auth(credentials: HTTPAuthorizationCredentials = Depends(securi
61
  if not credentials:
62
  raise HTTPException(status_code=401, detail="Authorization header required")
63
 
64
- # Verify the API key matches our configured key
65
  if credentials.credentials != settings.API_KEY:
66
  raise HTTPException(status_code=401, detail="Invalid API key")
67
 
@@ -72,7 +92,7 @@ async def list_models():
72
  """List available models"""
73
  models = [
74
  ModelInfo(
75
- id=settings.MODEL_ID,
76
  object="model",
77
  owned_by="z-ai"
78
  )
@@ -82,31 +102,37 @@ async def list_models():
82
  @app.post("/v1/chat/completions")
83
  async def chat_completions(
84
  request: ChatCompletionRequest,
85
- auth_token: str = Depends(verify_auth)
86
  ):
87
  """Create chat completion"""
88
  try:
89
- # Check if cookies are configured
90
  if not settings or not settings.COOKIES:
91
  raise HTTPException(
92
  status_code=503,
93
  detail="Service unavailable: No Z.AI cookies configured. Please set Z_AI_COOKIES environment variable."
94
  )
95
 
96
- # Validate model
97
  if request.model != settings.MODEL_NAME:
98
  raise HTTPException(
99
  status_code=400,
100
  detail=f"Model '{request.model}' not supported. Use '{settings.MODEL_NAME}'"
101
  )
102
 
103
- async with ProxyHandler() as handler:
104
- return await handler.handle_chat_completion(request)
 
 
 
 
 
 
 
105
 
106
  except HTTPException:
107
  raise
108
  except Exception as e:
109
- logger.error(f"Unexpected error: {e}")
 
110
  raise HTTPException(status_code=500, detail="Internal server error")
111
 
112
  @app.get("/health")
@@ -137,4 +163,4 @@ if __name__ == "__main__":
137
  port=settings.PORT,
138
  reload=False,
139
  log_level=settings.LOG_LEVEL.lower()
140
- )
 
9
  from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
10
 
11
  from config import settings
12
+ from models import ChatCompletionRequest, ModelsResponse, ModelInfo
13
+ from proxy_handler import ProxyHandler # 確保 proxy_handler.py 在同級目錄
14
  from cookie_manager import cookie_manager
15
 
16
  # Configure logging
 
20
  )
21
  logger = logging.getLogger(__name__)
22
 
23
+ # --- MODIFICATION START ---
24
+ # 1. 創建一個全局變量來持有 handler 實例
25
+ proxy_handler: ProxyHandler | None = None
26
+ # --- MODIFICATION END ---
27
+
28
  # Security
29
  security = HTTPBearer(auto_error=False)
30
 
31
  @asynccontextmanager
32
  async def lifespan(app: FastAPI):
33
  """Application lifespan manager"""
34
+ global proxy_handler
35
+ logger.info("Application startup: Initializing ProxyHandler and starting background tasks...")
36
+
37
+ # --- MODIFICATION START ---
38
+ # 2. 在應用啟動時初始化 ProxyHandler
39
+ proxy_handler = ProxyHandler()
40
+ # --- MODIFICATION END ---
41
+
42
  # Start background tasks
43
  health_check_task = asyncio.create_task(cookie_manager.periodic_health_check())
44
 
 
46
  yield
47
  finally:
48
  # Cleanup
49
+ logger.info("Application shutdown: Cleaning up resources...")
50
  health_check_task.cancel()
51
  try:
52
  await health_check_task
53
  except asyncio.CancelledError:
54
+ logger.info("Cookie health check task cancelled.")
55
+
56
+ # --- MODIFICATION START ---
57
+ # 3. 在應用關閉時,安全地關閉 ProxyHandler 的 client
58
+ if proxy_handler:
59
+ await proxy_handler.aclose()
60
+ logger.info("ProxyHandler client closed.")
61
+ # --- MODIFICATION END ---
62
 
63
  # Create FastAPI app
64
  app = FastAPI(
 
82
  if not credentials:
83
  raise HTTPException(status_code=401, detail="Authorization header required")
84
 
 
85
  if credentials.credentials != settings.API_KEY:
86
  raise HTTPException(status_code=401, detail="Invalid API key")
87
 
 
92
  """List available models"""
93
  models = [
94
  ModelInfo(
95
+ id=settings.MODEL_ID, # 建議使用 settings.MODEL_NAME 以保持一致性
96
  object="model",
97
  owned_by="z-ai"
98
  )
 
102
  @app.post("/v1/chat/completions")
103
  async def chat_completions(
104
  request: ChatCompletionRequest,
105
+ _auth_token: str = Depends(verify_auth) # 變數名加底線表示未使用
106
  ):
107
  """Create chat completion"""
108
  try:
 
109
  if not settings or not settings.COOKIES:
110
  raise HTTPException(
111
  status_code=503,
112
  detail="Service unavailable: No Z.AI cookies configured. Please set Z_AI_COOKIES environment variable."
113
  )
114
 
 
115
  if request.model != settings.MODEL_NAME:
116
  raise HTTPException(
117
  status_code=400,
118
  detail=f"Model '{request.model}' not supported. Use '{settings.MODEL_NAME}'"
119
  )
120
 
121
+ # --- MODIFICATION START ---
122
+ # 4. 直接使用全局的 proxy_handler 實例,並移除 async with 塊
123
+ if not proxy_handler:
124
+ # 這種情況理論上不應發生,因為 lifespan 會先執行
125
+ logger.error("Proxy handler is not initialized.")
126
+ raise HTTPException(status_code=503, detail="Service is not ready.")
127
+
128
+ return await proxy_handler.handle_chat_completion(request)
129
+ # --- MODIFICATION END ---
130
 
131
  except HTTPException:
132
  raise
133
  except Exception as e:
134
+ # 使用 logger.exception 可以記錄完整的 traceback
135
+ logger.exception(f"Unexpected error in chat_completions endpoint: {e}")
136
  raise HTTPException(status_code=500, detail="Internal server error")
137
 
138
  @app.get("/health")
 
163
  port=settings.PORT,
164
  reload=False,
165
  log_level=settings.LOG_LEVEL.lower()
166
+ )
proxy_handler.py CHANGED
@@ -8,6 +8,7 @@ import httpx
8
  from fastapi import HTTPException
9
  from fastapi.responses import StreamingResponse
10
 
 
11
  from config import settings
12
  from cookie_manager import cookie_manager
13
  from models import ChatCompletionRequest, ChatCompletionResponse
@@ -23,12 +24,16 @@ class ProxyHandler:
23
  http2=True,
24
  )
25
 
26
- async def __aenter__(self): return self
27
- async def __aexit__(self, *_): await self.client.aclose()
 
 
 
 
28
 
29
  # ---------- text utilities ----------
 
30
  def _balance_think_tag(self, txt: str) -> str:
31
- # ensure opens and closes count match
32
  opens = len(re.findall(r"<think>", txt))
33
  closes = len(re.findall(r"</think>", txt))
34
  if opens > closes:
@@ -39,24 +44,18 @@ class ProxyHandler:
39
  return txt
40
 
41
  def _clean_thinking(self, s: str) -> str:
42
- # strip html but keep text, be more careful about content preservation
43
  if not s: return s
44
- # Remove details/summary blocks
45
  s = re.sub(r'<details[^>]*>.*?</details>', '', s, flags=re.DOTALL)
46
  s = re.sub(r'<summary[^>]*>.*?</summary>', '', s, flags=re.DOTALL)
47
- # Remove other HTML tags but preserve content
48
  s = re.sub(r'<[^>]+>', '', s)
49
- # Clean up markdown-style quotes at line start
50
  s = re.sub(r'^\s*>\s*', '', s, flags=re.MULTILINE)
51
  return s.strip()
52
 
53
  def _clean_answer(self, s: str) -> str:
54
- # remove <details> blocks but preserve other content
55
  if not s: return s
56
  return re.sub(r"<details[^>]*>.*?</details>", "", s, flags=re.DOTALL)
57
 
58
  def _serialize_msgs(self, msgs) -> list:
59
- # convert messages to dict
60
  out = []
61
  for m in msgs:
62
  if hasattr(m, "dict"): out.append(m.dict())
@@ -67,70 +66,47 @@ class ProxyHandler:
67
 
68
  # ---------- upstream ----------
69
  async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str]:
 
70
  ck = await cookie_manager.get_next_cookie()
71
  if not ck: raise HTTPException(503, "No available cookies")
72
 
73
  model = settings.UPSTREAM_MODEL if req.model == settings.MODEL_NAME else req.model
74
  body = {
75
- "stream": True,
76
- "model": model,
77
- "messages": self._serialize_msgs(req.messages),
78
- "background_tasks": {"title_generation": True, "tags_generation": True},
79
- "chat_id": str(uuid.uuid4()),
80
- "features": {
81
- "image_generation": False, "code_interpreter": False,
82
- "web_search": False, "auto_web_search": False, "enable_thinking": True,
83
- },
84
- "id": str(uuid.uuid4()),
85
- "mcp_servers": ["deep-web-search"],
86
- "model_item": {"id": model, "name": "GLM-4.5", "owned_by": "openai"},
87
- "params": {},
88
- "tool_servers": [],
89
- "variables": {
90
- "{{USER_NAME}}": "User",
91
- "{{USER_LOCATION}}": "Unknown",
92
- "{{CURRENT_DATETIME}}": time.strftime("%Y-%m-%d %H:%M:%S"),
93
- },
94
  }
95
  headers = {
96
- "Content-Type": "application/json",
97
- "Authorization": f"Bearer {ck}",
98
- "User-Agent": (
99
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
100
- "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
101
- ),
102
- "Accept": "application/json, text/event-stream",
103
- "Accept-Language": "zh-CN",
104
  "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
105
- "sec-ch-ua-mobile": "?0",
106
- "sec-ch-ua-platform": '"macOS"',
107
- "x-fe-version": "prod-fe-1.0.53",
108
- "Origin": "https://chat.z.ai",
109
- "Referer": "https://chat.z.ai/",
110
  }
111
  return body, headers, ck
112
 
113
  # ---------- stream ----------
114
  async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
115
- # 使用 self.client 而不是每次都創建新的 client,以利用連接池
116
  try:
117
  body, headers, ck = await self._prep_upstream(req)
118
  comp_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
119
- think_open, phase_cur = False, None
120
- # FIX: 移除了 first_answer_chunk 標誌,改用更可靠的狀態管理
 
 
121
 
122
- async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers, timeout=self.client.timeout) as resp:
123
  if resp.status_code != 200:
124
  await cookie_manager.mark_cookie_invalid(ck)
125
- err_msg = f"Upstream error: {resp.status_code}"
126
- try:
127
- err_body = await resp.aread()
128
- err_msg += f" - {err_body.decode()}"
129
- except Exception:
130
- pass
131
  err = {
132
- "id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
133
- "model": req.model,
134
  "choices": [{"index": 0, "delta": {"content": err_msg}, "finish_reason": "stop"}],
135
  }
136
  yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"; return
@@ -139,138 +115,109 @@ class ProxyHandler:
139
  if not raw or raw.isspace(): continue
140
  for line in raw.split('\n'):
141
  line = line.strip()
142
- if not line or line.startswith(':') or not line.startswith('data: '): continue
143
 
144
  payload = line[6:]
145
  if payload == '[DONE]':
146
- if think_open: # 如果 <think> 標籤仍然是打開的,在結束前關閉它
147
- close_c = {
148
- "id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
149
- "model": req.model,
150
- "choices": [{"index": 0, "delta": {"content": "</think>"}, "finish_reason": None}],
151
- }
152
- yield f"data: {json.dumps(close_c)}\n\n"
153
- think_open = False
154
-
155
- final_c = {
156
- "id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
157
- "model": req.model, "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
158
- }
159
- yield f"data: {json.dumps(final_c)}\n\n"; yield "data: [DONE]\n\n"; return
160
 
161
  try:
162
  parsed = json.loads(payload)
163
  except json.JSONDecodeError:
164
- logger.warning(f"Failed to decode JSON payload: {payload}")
165
  continue
166
 
167
  dat = parsed.get("data", {})
168
- delta, phase = dat.get("delta_content", ""), dat.get("phase")
169
-
170
- # --- 主要修復邏輯開始 ---
171
- # 1. 處理階段變化 (Phase Transition)
172
- if phase and phase != phase_cur:
173
- # 'thinking' 切換'answer' 時,關閉 <think> 標籤
174
- if phase_cur == "thinking" and phase == "answer":
175
- if think_open and settings.SHOW_THINK_TAGS:
176
- close_c = {
177
- "id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
178
- "model": req.model,
179
- "choices": [{"index": 0, "delta": {"content": "</think>\n"}, "finish_reason": None}],
180
- }
181
- yield f"data: {json.dumps(close_c)}\n\n"
182
- think_open = False
183
- phase_cur = phase
184
 
185
- # 2. 處理內容 (Content Processing)
186
- # 這個邏輯塊獨立於階段變化,確保當前 chunk 的內容總是被處理
187
  text_to_yield = ""
188
  if phase_cur == "thinking":
189
  if settings.SHOW_THINK_TAGS:
190
- # 如果 think 標籤還沒打開,就打開它
191
  if not think_open:
192
- open_c = {
193
- "id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
194
- "model": req.model,
195
- "choices": [{"index": 0, "delta": {"content": "<think>"}, "finish_reason": None}],
196
- }
197
- yield f"data: {json.dumps(open_c)}\n\n"
198
  think_open = True
199
  text_to_yield = self._clean_thinking(delta)
200
-
201
  elif phase_cur == "answer":
202
  text_to_yield = self._clean_answer(delta)
203
 
204
- # 3. 發送內容 (Yield Content)
205
- # 只有在 text_to_yield 有實際內容時才發送,避免發送空 chunk
206
  if text_to_yield:
207
  out = {
208
- "id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
209
- "model": req.model,
210
  "choices": [{"index": 0, "delta": {"content": text_to_yield}, "finish_reason": None}],
211
  }
212
  yield f"data: {json.dumps(out)}\n\n"
213
- # --- 主要修復邏輯結束 ---
214
 
215
  except httpx.RequestError as e:
 
216
  logger.error(f"Request error: {e}")
217
- err = {
218
- "id": f"chatcmpl-{uuid.uuid4().hex[:29]}", "object": "chat.completion.chunk",
219
- "created": int(time.time()), "model": req.model,
220
- "choices": [{"index": 0, "delta": {"content": f"Connection error: {e}"}, "finish_reason": "stop"}],
221
- }
222
  yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"
223
  except Exception as e:
224
- logger.exception(f"Unexpected error in stream_proxy_response: {e}") # 使用 exception 打印 traceback
225
- err = {
226
- "id": f"chatcmpl-{uuid.uuid4().hex[:29]}", "object": "chat.completion.chunk",
227
- "created": int(time.time()), "model": req.model,
228
- "choices": [{"index": 0, "delta": {"content": f"Internal server error."}, "finish_reason": "stop"}],
229
- }
230
  yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"
231
 
232
  # ---------- non-stream ----------
233
  async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
234
- # 非流式邏輯本身比較穩健,因為它先收集所有數據再處理。
235
- # 此處的邏輯已是最佳實踐,無需大改。
236
- ck = None # 在 try 外部定義,以便 finally 中可以訪問
237
  try:
238
  body, headers, ck = await self._prep_upstream(req)
239
  think_buf, answer_buf = [], []
240
 
241
- # 確保使用實例client 和其 timeout 設置
242
- async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers, timeout=self.client.timeout) as resp:
 
 
243
  if resp.status_code != 200:
244
  await cookie_manager.mark_cookie_invalid(ck)
245
  error_detail = await resp.text()
246
- logger.error(f"Upstream error {resp.status_code}: {error_detail}")
247
  raise HTTPException(resp.status_code, f"Upstream error: {error_detail}")
248
 
249
  async for raw in resp.aiter_text():
250
  if not raw or raw.isspace(): continue
251
  for line in raw.split('\n'):
252
  line = line.strip()
253
- if not line or line.startswith(':') or not line.startswith('data: '): continue
254
  payload = line[6:]
255
  if payload == '[DONE]': break
256
- try:
257
- parsed = json.loads(payload)
258
- except json.JSONDecodeError:
259
- continue
260
  dat = parsed.get("data", {})
261
- delta, phase = dat.get("delta_content", ""), dat.get("phase")
 
 
 
 
 
262
 
263
  if not delta: continue
264
 
265
- if phase == "thinking":
 
266
  think_buf.append(delta)
267
- elif phase == "answer":
268
  answer_buf.append(delta)
269
- # 循環結束後 break
270
- else: # for-else 語句,如果 for 循環正常結束(非 break),則執行
271
- pass # 此處不需要做任何事
272
-
273
- # 合併內容後再進行清理
274
  raw_answer = ''.join(answer_buf)
275
  ans_text = self._clean_answer(raw_answer)
276
 
@@ -278,31 +225,23 @@ class ProxyHandler:
278
  if settings.SHOW_THINK_TAGS and think_buf:
279
  raw_thinking = ''.join(think_buf)
280
  think_text = self._clean_thinking(raw_thinking)
281
- # 確保 thinking 內容不為空時才添加標籤和換行
282
  if think_text:
283
  final_content = f"<think>{think_text}</think>\n{ans_text}"
284
 
285
  return ChatCompletionResponse(
286
- id=f"chatcmpl-{uuid.uuid4().hex[:29]}",
287
- created=int(time.time()),
288
- model=req.model,
289
  choices=[{"index": 0, "message": {"role": "assistant", "content": final_content}, "finish_reason": "stop"}],
290
- # 可以在此添加 usage 信息,如果 API 返回的話
291
  )
292
  except httpx.RequestError as e:
293
- logger.error(f"Non-stream request error: {e}")
294
  if ck: await cookie_manager.mark_cookie_invalid(ck)
295
- raise HTTPException(502, f"Connection error to upstream: {e}")
 
296
  except Exception as e:
297
- logger.exception(f"Non-stream unexpected error: {e}") # 使用 exception 打印 traceback
298
  raise HTTPException(500, "Internal server error")
299
 
300
-
301
  # ---------- FastAPI entry ----------
302
  async def handle_chat_completion(self, req: ChatCompletionRequest):
303
- # 移除對 self.client 的重複創建,改用 __aenter__ 和 __aexit__
304
- # 在 FastAPI 中,通常使用 Depends 來管理依賴項的生命週期
305
- # 但這裡 ProxyHandler 作為一個普通類,這樣的寫法也是可以的
306
  stream = bool(req.stream) if req.stream is not None else settings.DEFAULT_STREAM
307
  if stream:
308
  return StreamingResponse(
 
8
  from fastapi import HTTPException
9
  from fastapi.responses import StreamingResponse
10
 
11
+ # 確保這些導入與您的項目結構匹配
12
  from config import settings
13
  from cookie_manager import cookie_manager
14
  from models import ChatCompletionRequest, ChatCompletionResponse
 
24
  http2=True,
25
  )
26
 
27
+ # FIX: 移除 __aenter__ __aexit__,改用顯式的 aclose 方法
28
+ # aenter/aexit 模式不適用於需要跨越請求生命週期的流式響應
29
+ async def aclose(self):
30
+ """Closes the underlying httpx client."""
31
+ if not self.client.is_closed:
32
+ await self.client.aclose()
33
 
34
  # ---------- text utilities ----------
35
+ # _balance_think_tag, _clean_thinking, _clean_answer, _serialize_msgs 方法保持不變
36
  def _balance_think_tag(self, txt: str) -> str:
 
37
  opens = len(re.findall(r"<think>", txt))
38
  closes = len(re.findall(r"</think>", txt))
39
  if opens > closes:
 
44
  return txt
45
 
46
  def _clean_thinking(self, s: str) -> str:
 
47
  if not s: return s
 
48
  s = re.sub(r'<details[^>]*>.*?</details>', '', s, flags=re.DOTALL)
49
  s = re.sub(r'<summary[^>]*>.*?</summary>', '', s, flags=re.DOTALL)
 
50
  s = re.sub(r'<[^>]+>', '', s)
 
51
  s = re.sub(r'^\s*>\s*', '', s, flags=re.MULTILINE)
52
  return s.strip()
53
 
54
  def _clean_answer(self, s: str) -> str:
 
55
  if not s: return s
56
  return re.sub(r"<details[^>]*>.*?</details>", "", s, flags=re.DOTALL)
57
 
58
  def _serialize_msgs(self, msgs) -> list:
 
59
  out = []
60
  for m in msgs:
61
  if hasattr(m, "dict"): out.append(m.dict())
 
66
 
67
  # ---------- upstream ----------
68
  async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str]:
69
+ # 此方法保持不變
70
  ck = await cookie_manager.get_next_cookie()
71
  if not ck: raise HTTPException(503, "No available cookies")
72
 
73
  model = settings.UPSTREAM_MODEL if req.model == settings.MODEL_NAME else req.model
74
  body = {
75
+ "stream": True, "model": model, "messages": self._serialize_msgs(req.messages),
76
+ "background_tasks": {"title_generation": True, "tags_generation": True}, "chat_id": str(uuid.uuid4()),
77
+ "features": {"image_generation": False, "code_interpreter": False, "web_search": False, "auto_web_search": False, "enable_thinking": True,},
78
+ "id": str(uuid.uuid4()), "mcp_servers": ["deep-web-search"],
79
+ "model_item": {"id": model, "name": "GLM-4.5", "owned_by": "openai"}, "params": {}, "tool_servers": [],
80
+ "variables": {"{{USER_NAME}}": "User", "{{USER_LOCATION}}": "Unknown", "{{CURRENT_DATETIME}}": time.strftime("%Y-%m-%d %H:%M:%S"),},
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  }
82
  headers = {
83
+ "Content-Type": "application/json", "Authorization": f"Bearer {ck}",
84
+ "User-Agent": ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"),
85
+ "Accept": "application/json, text/event-stream", "Accept-Language": "zh-CN",
 
 
 
 
 
86
  "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
87
+ "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"macOS"', "x-fe-version": "prod-fe-1.0.53",
88
+ "Origin": "https://chat.z.ai", "Referer": "https://chat.z.ai/",
 
 
 
89
  }
90
  return body, headers, ck
91
 
92
  # ---------- stream ----------
93
  async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
94
+ ck = None
95
  try:
96
  body, headers, ck = await self._prep_upstream(req)
97
  comp_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
98
+ think_open = False
99
+
100
+ # FIX: 維護一個持久的 phase 狀態
101
+ phase_cur = None
102
 
103
+ async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
104
  if resp.status_code != 200:
105
  await cookie_manager.mark_cookie_invalid(ck)
106
+ err_body = await resp.aread()
107
+ err_msg = f"Error: {resp.status_code} - {err_body.decode(errors='ignore')}"
 
 
 
 
108
  err = {
109
+ "id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": req.model,
 
110
  "choices": [{"index": 0, "delta": {"content": err_msg}, "finish_reason": "stop"}],
111
  }
112
  yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"; return
 
115
  if not raw or raw.isspace(): continue
116
  for line in raw.split('\n'):
117
  line = line.strip()
118
+ if not line or not line.startswith('data: '): continue
119
 
120
  payload = line[6:]
121
  if payload == '[DONE]':
122
+ if think_open:
123
+ yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
124
+ yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n"
125
+ yield "data: [DONE]\n\n"; return
 
 
 
 
 
 
 
 
 
 
126
 
127
  try:
128
  parsed = json.loads(payload)
129
  except json.JSONDecodeError:
 
130
  continue
131
 
132
  dat = parsed.get("data", {})
133
+ delta, new_phase = dat.get("delta_content", ""), dat.get("phase")
134
+
135
+ # FIX: 正確的狀態管理邏輯
136
+ # 1. 如果收到了新的 phase,更新當前 phase
137
+ if new_phase and new_phase != phase_cur:
138
+ # 處理從 thinking 到 answer 的過渡
139
+ if phase_cur == "thinking" and new_phase == "answer" and think_open and settings.SHOW_THINK_TAGS:
140
+ yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>\n'}, 'finish_reason': None}]})}\n\n"
141
+ think_open = False
142
+ phase_cur = new_phase
143
+
144
+ if not delta: continue # 如果沒有內容,則跳過
 
 
 
 
145
 
146
+ # 2. 根據當前的 phase_cur 處理內容
 
147
  text_to_yield = ""
148
  if phase_cur == "thinking":
149
  if settings.SHOW_THINK_TAGS:
 
150
  if not think_open:
151
+ yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]})}\n\n"
 
 
 
 
 
152
  think_open = True
153
  text_to_yield = self._clean_thinking(delta)
 
154
  elif phase_cur == "answer":
155
  text_to_yield = self._clean_answer(delta)
156
 
157
+ # 3. 發送處理後的內容
 
158
  if text_to_yield:
159
  out = {
160
+ "id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": req.model,
 
161
  "choices": [{"index": 0, "delta": {"content": text_to_yield}, "finish_reason": None}],
162
  }
163
  yield f"data: {json.dumps(out)}\n\n"
 
164
 
165
  except httpx.RequestError as e:
166
+ if ck: await cookie_manager.mark_cookie_invalid(ck)
167
  logger.error(f"Request error: {e}")
168
+ err_msg = f"Connection error: {e}"
169
+ err = {"choices": [{"delta": {"content": err_msg}, "finish_reason": "stop"}]}
 
 
 
170
  yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"
171
  except Exception as e:
172
+ logger.exception(f"Unexpected error in stream_proxy_response")
173
+ err = {"choices": [{"delta": {"content": f"Internal error in stream"}, "finish_reason": "stop"}]}
 
 
 
 
174
  yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"
175
 
176
  # ---------- non-stream ----------
177
  async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
178
+ ck = None
 
 
179
  try:
180
  body, headers, ck = await self._prep_upstream(req)
181
  think_buf, answer_buf = [], []
182
 
183
+ # FIX: 維護一個持久phase 狀態
184
+ phase_cur = None
185
+
186
+ async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
187
  if resp.status_code != 200:
188
  await cookie_manager.mark_cookie_invalid(ck)
189
  error_detail = await resp.text()
 
190
  raise HTTPException(resp.status_code, f"Upstream error: {error_detail}")
191
 
192
  async for raw in resp.aiter_text():
193
  if not raw or raw.isspace(): continue
194
  for line in raw.split('\n'):
195
  line = line.strip()
196
+ if not line or not line.startswith('data: '): continue
197
  payload = line[6:]
198
  if payload == '[DONE]': break
199
+
200
+ try: parsed = json.loads(payload)
201
+ except json.JSONDecodeError: continue
202
+
203
  dat = parsed.get("data", {})
204
+ delta, new_phase = dat.get("delta_content", ""), dat.get("phase")
205
+
206
+ # FIX: 正確的狀態管理邏輯
207
+ # 1. 更新當前 phase
208
+ if new_phase:
209
+ phase_cur = new_phase
210
 
211
  if not delta: continue
212
 
213
+ # 2. 根據當前的 phase_cur 存儲內容
214
+ if phase_cur == "thinking":
215
  think_buf.append(delta)
216
+ elif phase_cur == "answer":
217
  answer_buf.append(delta)
218
+ else: # for-else, will be executed if loop finishes without break
219
+ pass
220
+
 
 
221
  raw_answer = ''.join(answer_buf)
222
  ans_text = self._clean_answer(raw_answer)
223
 
 
225
  if settings.SHOW_THINK_TAGS and think_buf:
226
  raw_thinking = ''.join(think_buf)
227
  think_text = self._clean_thinking(raw_thinking)
 
228
  if think_text:
229
  final_content = f"<think>{think_text}</think>\n{ans_text}"
230
 
231
  return ChatCompletionResponse(
232
+ id=f"chatcmpl-{uuid.uuid4().hex[:29]}", created=int(time.time()), model=req.model,
 
 
233
  choices=[{"index": 0, "message": {"role": "assistant", "content": final_content}, "finish_reason": "stop"}],
 
234
  )
235
  except httpx.RequestError as e:
 
236
  if ck: await cookie_manager.mark_cookie_invalid(ck)
237
+ logger.error(f"Non-stream request error: {e}")
238
+ raise HTTPException(502, f"Connection error: {e}")
239
  except Exception as e:
240
+ logger.exception(f"Non-stream unexpected error")
241
  raise HTTPException(500, "Internal server error")
242
 
 
243
  # ---------- FastAPI entry ----------
244
  async def handle_chat_completion(self, req: ChatCompletionRequest):
 
 
 
245
  stream = bool(req.stream) if req.stream is not None else settings.DEFAULT_STREAM
246
  if stream:
247
  return StreamingResponse(