Spaces:
Paused
Paused
Upload 2 files
Browse files- main.py +38 -12
- proxy_handler.py +81 -142
main.py
CHANGED
|
@@ -9,8 +9,8 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
| 9 |
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
| 10 |
|
| 11 |
from config import settings
|
| 12 |
-
from models import ChatCompletionRequest, ModelsResponse, ModelInfo
|
| 13 |
-
from proxy_handler import ProxyHandler
|
| 14 |
from cookie_manager import cookie_manager
|
| 15 |
|
| 16 |
# Configure logging
|
|
@@ -20,12 +20,25 @@ logging.basicConfig(
|
|
| 20 |
)
|
| 21 |
logger = logging.getLogger(__name__)
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
# Security
|
| 24 |
security = HTTPBearer(auto_error=False)
|
| 25 |
|
| 26 |
@asynccontextmanager
|
| 27 |
async def lifespan(app: FastAPI):
|
| 28 |
"""Application lifespan manager"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
# Start background tasks
|
| 30 |
health_check_task = asyncio.create_task(cookie_manager.periodic_health_check())
|
| 31 |
|
|
@@ -33,11 +46,19 @@ async def lifespan(app: FastAPI):
|
|
| 33 |
yield
|
| 34 |
finally:
|
| 35 |
# Cleanup
|
|
|
|
| 36 |
health_check_task.cancel()
|
| 37 |
try:
|
| 38 |
await health_check_task
|
| 39 |
except asyncio.CancelledError:
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
# Create FastAPI app
|
| 43 |
app = FastAPI(
|
|
@@ -61,7 +82,6 @@ async def verify_auth(credentials: HTTPAuthorizationCredentials = Depends(securi
|
|
| 61 |
if not credentials:
|
| 62 |
raise HTTPException(status_code=401, detail="Authorization header required")
|
| 63 |
|
| 64 |
-
# Verify the API key matches our configured key
|
| 65 |
if credentials.credentials != settings.API_KEY:
|
| 66 |
raise HTTPException(status_code=401, detail="Invalid API key")
|
| 67 |
|
|
@@ -72,7 +92,7 @@ async def list_models():
|
|
| 72 |
"""List available models"""
|
| 73 |
models = [
|
| 74 |
ModelInfo(
|
| 75 |
-
id=settings.MODEL_ID,
|
| 76 |
object="model",
|
| 77 |
owned_by="z-ai"
|
| 78 |
)
|
|
@@ -82,31 +102,37 @@ async def list_models():
|
|
| 82 |
@app.post("/v1/chat/completions")
|
| 83 |
async def chat_completions(
|
| 84 |
request: ChatCompletionRequest,
|
| 85 |
-
|
| 86 |
):
|
| 87 |
"""Create chat completion"""
|
| 88 |
try:
|
| 89 |
-
# Check if cookies are configured
|
| 90 |
if not settings or not settings.COOKIES:
|
| 91 |
raise HTTPException(
|
| 92 |
status_code=503,
|
| 93 |
detail="Service unavailable: No Z.AI cookies configured. Please set Z_AI_COOKIES environment variable."
|
| 94 |
)
|
| 95 |
|
| 96 |
-
# Validate model
|
| 97 |
if request.model != settings.MODEL_NAME:
|
| 98 |
raise HTTPException(
|
| 99 |
status_code=400,
|
| 100 |
detail=f"Model '{request.model}' not supported. Use '{settings.MODEL_NAME}'"
|
| 101 |
)
|
| 102 |
|
| 103 |
-
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
| 106 |
except HTTPException:
|
| 107 |
raise
|
| 108 |
except Exception as e:
|
| 109 |
-
logger.
|
|
|
|
| 110 |
raise HTTPException(status_code=500, detail="Internal server error")
|
| 111 |
|
| 112 |
@app.get("/health")
|
|
@@ -137,4 +163,4 @@ if __name__ == "__main__":
|
|
| 137 |
port=settings.PORT,
|
| 138 |
reload=False,
|
| 139 |
log_level=settings.LOG_LEVEL.lower()
|
| 140 |
-
)
|
|
|
|
| 9 |
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
| 10 |
|
| 11 |
from config import settings
|
| 12 |
+
from models import ChatCompletionRequest, ModelsResponse, ModelInfo
|
| 13 |
+
from proxy_handler import ProxyHandler # 確保 proxy_handler.py 在同級目錄
|
| 14 |
from cookie_manager import cookie_manager
|
| 15 |
|
| 16 |
# Configure logging
|
|
|
|
| 20 |
)
|
| 21 |
logger = logging.getLogger(__name__)
|
| 22 |
|
| 23 |
+
# --- MODIFICATION START ---
|
| 24 |
+
# 1. 創建一個全局變量來持有 handler 實例
|
| 25 |
+
proxy_handler: ProxyHandler | None = None
|
| 26 |
+
# --- MODIFICATION END ---
|
| 27 |
+
|
| 28 |
# Security
|
| 29 |
security = HTTPBearer(auto_error=False)
|
| 30 |
|
| 31 |
@asynccontextmanager
|
| 32 |
async def lifespan(app: FastAPI):
|
| 33 |
"""Application lifespan manager"""
|
| 34 |
+
global proxy_handler
|
| 35 |
+
logger.info("Application startup: Initializing ProxyHandler and starting background tasks...")
|
| 36 |
+
|
| 37 |
+
# --- MODIFICATION START ---
|
| 38 |
+
# 2. 在應用啟動時初始化 ProxyHandler
|
| 39 |
+
proxy_handler = ProxyHandler()
|
| 40 |
+
# --- MODIFICATION END ---
|
| 41 |
+
|
| 42 |
# Start background tasks
|
| 43 |
health_check_task = asyncio.create_task(cookie_manager.periodic_health_check())
|
| 44 |
|
|
|
|
| 46 |
yield
|
| 47 |
finally:
|
| 48 |
# Cleanup
|
| 49 |
+
logger.info("Application shutdown: Cleaning up resources...")
|
| 50 |
health_check_task.cancel()
|
| 51 |
try:
|
| 52 |
await health_check_task
|
| 53 |
except asyncio.CancelledError:
|
| 54 |
+
logger.info("Cookie health check task cancelled.")
|
| 55 |
+
|
| 56 |
+
# --- MODIFICATION START ---
|
| 57 |
+
# 3. 在應用關閉時,安全地關閉 ProxyHandler 的 client
|
| 58 |
+
if proxy_handler:
|
| 59 |
+
await proxy_handler.aclose()
|
| 60 |
+
logger.info("ProxyHandler client closed.")
|
| 61 |
+
# --- MODIFICATION END ---
|
| 62 |
|
| 63 |
# Create FastAPI app
|
| 64 |
app = FastAPI(
|
|
|
|
| 82 |
if not credentials:
|
| 83 |
raise HTTPException(status_code=401, detail="Authorization header required")
|
| 84 |
|
|
|
|
| 85 |
if credentials.credentials != settings.API_KEY:
|
| 86 |
raise HTTPException(status_code=401, detail="Invalid API key")
|
| 87 |
|
|
|
|
| 92 |
"""List available models"""
|
| 93 |
models = [
|
| 94 |
ModelInfo(
|
| 95 |
+
id=settings.MODEL_ID, # 建議使用 settings.MODEL_NAME 以保持一致性
|
| 96 |
object="model",
|
| 97 |
owned_by="z-ai"
|
| 98 |
)
|
|
|
|
| 102 |
@app.post("/v1/chat/completions")
|
| 103 |
async def chat_completions(
|
| 104 |
request: ChatCompletionRequest,
|
| 105 |
+
_auth_token: str = Depends(verify_auth) # 變數名加底線表示未使用
|
| 106 |
):
|
| 107 |
"""Create chat completion"""
|
| 108 |
try:
|
|
|
|
| 109 |
if not settings or not settings.COOKIES:
|
| 110 |
raise HTTPException(
|
| 111 |
status_code=503,
|
| 112 |
detail="Service unavailable: No Z.AI cookies configured. Please set Z_AI_COOKIES environment variable."
|
| 113 |
)
|
| 114 |
|
|
|
|
| 115 |
if request.model != settings.MODEL_NAME:
|
| 116 |
raise HTTPException(
|
| 117 |
status_code=400,
|
| 118 |
detail=f"Model '{request.model}' not supported. Use '{settings.MODEL_NAME}'"
|
| 119 |
)
|
| 120 |
|
| 121 |
+
# --- MODIFICATION START ---
|
| 122 |
+
# 4. 直接使用全局的 proxy_handler 實例,並移除 async with 塊
|
| 123 |
+
if not proxy_handler:
|
| 124 |
+
# 這種情況理論上不應發生,因為 lifespan 會先執行
|
| 125 |
+
logger.error("Proxy handler is not initialized.")
|
| 126 |
+
raise HTTPException(status_code=503, detail="Service is not ready.")
|
| 127 |
+
|
| 128 |
+
return await proxy_handler.handle_chat_completion(request)
|
| 129 |
+
# --- MODIFICATION END ---
|
| 130 |
|
| 131 |
except HTTPException:
|
| 132 |
raise
|
| 133 |
except Exception as e:
|
| 134 |
+
# 使用 logger.exception 可以記錄完整的 traceback
|
| 135 |
+
logger.exception(f"Unexpected error in chat_completions endpoint: {e}")
|
| 136 |
raise HTTPException(status_code=500, detail="Internal server error")
|
| 137 |
|
| 138 |
@app.get("/health")
|
|
|
|
| 163 |
port=settings.PORT,
|
| 164 |
reload=False,
|
| 165 |
log_level=settings.LOG_LEVEL.lower()
|
| 166 |
+
)
|
proxy_handler.py
CHANGED
|
@@ -8,6 +8,7 @@ import httpx
|
|
| 8 |
from fastapi import HTTPException
|
| 9 |
from fastapi.responses import StreamingResponse
|
| 10 |
|
|
|
|
| 11 |
from config import settings
|
| 12 |
from cookie_manager import cookie_manager
|
| 13 |
from models import ChatCompletionRequest, ChatCompletionResponse
|
|
@@ -23,12 +24,16 @@ class ProxyHandler:
|
|
| 23 |
http2=True,
|
| 24 |
)
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
# ---------- text utilities ----------
|
|
|
|
| 30 |
def _balance_think_tag(self, txt: str) -> str:
|
| 31 |
-
# ensure opens and closes count match
|
| 32 |
opens = len(re.findall(r"<think>", txt))
|
| 33 |
closes = len(re.findall(r"</think>", txt))
|
| 34 |
if opens > closes:
|
|
@@ -39,24 +44,18 @@ class ProxyHandler:
|
|
| 39 |
return txt
|
| 40 |
|
| 41 |
def _clean_thinking(self, s: str) -> str:
|
| 42 |
-
# strip html but keep text, be more careful about content preservation
|
| 43 |
if not s: return s
|
| 44 |
-
# Remove details/summary blocks
|
| 45 |
s = re.sub(r'<details[^>]*>.*?</details>', '', s, flags=re.DOTALL)
|
| 46 |
s = re.sub(r'<summary[^>]*>.*?</summary>', '', s, flags=re.DOTALL)
|
| 47 |
-
# Remove other HTML tags but preserve content
|
| 48 |
s = re.sub(r'<[^>]+>', '', s)
|
| 49 |
-
# Clean up markdown-style quotes at line start
|
| 50 |
s = re.sub(r'^\s*>\s*', '', s, flags=re.MULTILINE)
|
| 51 |
return s.strip()
|
| 52 |
|
| 53 |
def _clean_answer(self, s: str) -> str:
|
| 54 |
-
# remove <details> blocks but preserve other content
|
| 55 |
if not s: return s
|
| 56 |
return re.sub(r"<details[^>]*>.*?</details>", "", s, flags=re.DOTALL)
|
| 57 |
|
| 58 |
def _serialize_msgs(self, msgs) -> list:
|
| 59 |
-
# convert messages to dict
|
| 60 |
out = []
|
| 61 |
for m in msgs:
|
| 62 |
if hasattr(m, "dict"): out.append(m.dict())
|
|
@@ -67,70 +66,47 @@ class ProxyHandler:
|
|
| 67 |
|
| 68 |
# ---------- upstream ----------
|
| 69 |
async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str]:
|
|
|
|
| 70 |
ck = await cookie_manager.get_next_cookie()
|
| 71 |
if not ck: raise HTTPException(503, "No available cookies")
|
| 72 |
|
| 73 |
model = settings.UPSTREAM_MODEL if req.model == settings.MODEL_NAME else req.model
|
| 74 |
body = {
|
| 75 |
-
"stream": True,
|
| 76 |
-
"
|
| 77 |
-
"
|
| 78 |
-
"
|
| 79 |
-
"
|
| 80 |
-
"
|
| 81 |
-
"image_generation": False, "code_interpreter": False,
|
| 82 |
-
"web_search": False, "auto_web_search": False, "enable_thinking": True,
|
| 83 |
-
},
|
| 84 |
-
"id": str(uuid.uuid4()),
|
| 85 |
-
"mcp_servers": ["deep-web-search"],
|
| 86 |
-
"model_item": {"id": model, "name": "GLM-4.5", "owned_by": "openai"},
|
| 87 |
-
"params": {},
|
| 88 |
-
"tool_servers": [],
|
| 89 |
-
"variables": {
|
| 90 |
-
"{{USER_NAME}}": "User",
|
| 91 |
-
"{{USER_LOCATION}}": "Unknown",
|
| 92 |
-
"{{CURRENT_DATETIME}}": time.strftime("%Y-%m-%d %H:%M:%S"),
|
| 93 |
-
},
|
| 94 |
}
|
| 95 |
headers = {
|
| 96 |
-
"Content-Type": "application/json",
|
| 97 |
-
"
|
| 98 |
-
"
|
| 99 |
-
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
| 100 |
-
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"
|
| 101 |
-
),
|
| 102 |
-
"Accept": "application/json, text/event-stream",
|
| 103 |
-
"Accept-Language": "zh-CN",
|
| 104 |
"sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
|
| 105 |
-
"sec-ch-ua-mobile": "?0",
|
| 106 |
-
"
|
| 107 |
-
"x-fe-version": "prod-fe-1.0.53",
|
| 108 |
-
"Origin": "https://chat.z.ai",
|
| 109 |
-
"Referer": "https://chat.z.ai/",
|
| 110 |
}
|
| 111 |
return body, headers, ck
|
| 112 |
|
| 113 |
# ---------- stream ----------
|
| 114 |
async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
|
| 115 |
-
|
| 116 |
try:
|
| 117 |
body, headers, ck = await self._prep_upstream(req)
|
| 118 |
comp_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
|
| 119 |
-
think_open
|
| 120 |
-
|
|
|
|
|
|
|
| 121 |
|
| 122 |
-
async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers
|
| 123 |
if resp.status_code != 200:
|
| 124 |
await cookie_manager.mark_cookie_invalid(ck)
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
err_body = await resp.aread()
|
| 128 |
-
err_msg += f" - {err_body.decode()}"
|
| 129 |
-
except Exception:
|
| 130 |
-
pass
|
| 131 |
err = {
|
| 132 |
-
"id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
|
| 133 |
-
"model": req.model,
|
| 134 |
"choices": [{"index": 0, "delta": {"content": err_msg}, "finish_reason": "stop"}],
|
| 135 |
}
|
| 136 |
yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"; return
|
|
@@ -139,138 +115,109 @@ class ProxyHandler:
|
|
| 139 |
if not raw or raw.isspace(): continue
|
| 140 |
for line in raw.split('\n'):
|
| 141 |
line = line.strip()
|
| 142 |
-
if not line or
|
| 143 |
|
| 144 |
payload = line[6:]
|
| 145 |
if payload == '[DONE]':
|
| 146 |
-
if think_open:
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
"choices": [{"index": 0, "delta": {"content": "</think>"}, "finish_reason": None}],
|
| 151 |
-
}
|
| 152 |
-
yield f"data: {json.dumps(close_c)}\n\n"
|
| 153 |
-
think_open = False
|
| 154 |
-
|
| 155 |
-
final_c = {
|
| 156 |
-
"id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
|
| 157 |
-
"model": req.model, "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
|
| 158 |
-
}
|
| 159 |
-
yield f"data: {json.dumps(final_c)}\n\n"; yield "data: [DONE]\n\n"; return
|
| 160 |
|
| 161 |
try:
|
| 162 |
parsed = json.loads(payload)
|
| 163 |
except json.JSONDecodeError:
|
| 164 |
-
logger.warning(f"Failed to decode JSON payload: {payload}")
|
| 165 |
continue
|
| 166 |
|
| 167 |
dat = parsed.get("data", {})
|
| 168 |
-
delta,
|
| 169 |
-
|
| 170 |
-
#
|
| 171 |
-
# 1.
|
| 172 |
-
if
|
| 173 |
-
#
|
| 174 |
-
if phase_cur == "thinking" and
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
}
|
| 181 |
-
yield f"data: {json.dumps(close_c)}\n\n"
|
| 182 |
-
think_open = False
|
| 183 |
-
phase_cur = phase
|
| 184 |
|
| 185 |
-
# 2. 處理內容
|
| 186 |
-
# 這個邏輯塊獨立於階段變化,確保當前 chunk 的內容總是被處理
|
| 187 |
text_to_yield = ""
|
| 188 |
if phase_cur == "thinking":
|
| 189 |
if settings.SHOW_THINK_TAGS:
|
| 190 |
-
# 如果 think 標籤還沒打開,就打開它
|
| 191 |
if not think_open:
|
| 192 |
-
|
| 193 |
-
"id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
|
| 194 |
-
"model": req.model,
|
| 195 |
-
"choices": [{"index": 0, "delta": {"content": "<think>"}, "finish_reason": None}],
|
| 196 |
-
}
|
| 197 |
-
yield f"data: {json.dumps(open_c)}\n\n"
|
| 198 |
think_open = True
|
| 199 |
text_to_yield = self._clean_thinking(delta)
|
| 200 |
-
|
| 201 |
elif phase_cur == "answer":
|
| 202 |
text_to_yield = self._clean_answer(delta)
|
| 203 |
|
| 204 |
-
# 3. 發送內容
|
| 205 |
-
# 只有在 text_to_yield 有實際內容時才發送,避免發送空 chunk
|
| 206 |
if text_to_yield:
|
| 207 |
out = {
|
| 208 |
-
"id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()),
|
| 209 |
-
"model": req.model,
|
| 210 |
"choices": [{"index": 0, "delta": {"content": text_to_yield}, "finish_reason": None}],
|
| 211 |
}
|
| 212 |
yield f"data: {json.dumps(out)}\n\n"
|
| 213 |
-
# --- 主要修復邏輯結束 ---
|
| 214 |
|
| 215 |
except httpx.RequestError as e:
|
|
|
|
| 216 |
logger.error(f"Request error: {e}")
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
"created": int(time.time()), "model": req.model,
|
| 220 |
-
"choices": [{"index": 0, "delta": {"content": f"Connection error: {e}"}, "finish_reason": "stop"}],
|
| 221 |
-
}
|
| 222 |
yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"
|
| 223 |
except Exception as e:
|
| 224 |
-
logger.exception(f"Unexpected error in stream_proxy_response
|
| 225 |
-
err = {
|
| 226 |
-
"id": f"chatcmpl-{uuid.uuid4().hex[:29]}", "object": "chat.completion.chunk",
|
| 227 |
-
"created": int(time.time()), "model": req.model,
|
| 228 |
-
"choices": [{"index": 0, "delta": {"content": f"Internal server error."}, "finish_reason": "stop"}],
|
| 229 |
-
}
|
| 230 |
yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"
|
| 231 |
|
| 232 |
# ---------- non-stream ----------
|
| 233 |
async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
|
| 234 |
-
|
| 235 |
-
# 此處的邏輯已是最佳實踐,無需大改。
|
| 236 |
-
ck = None # 在 try 外部定義,以便 finally 中可以訪問
|
| 237 |
try:
|
| 238 |
body, headers, ck = await self._prep_upstream(req)
|
| 239 |
think_buf, answer_buf = [], []
|
| 240 |
|
| 241 |
-
#
|
| 242 |
-
|
|
|
|
|
|
|
| 243 |
if resp.status_code != 200:
|
| 244 |
await cookie_manager.mark_cookie_invalid(ck)
|
| 245 |
error_detail = await resp.text()
|
| 246 |
-
logger.error(f"Upstream error {resp.status_code}: {error_detail}")
|
| 247 |
raise HTTPException(resp.status_code, f"Upstream error: {error_detail}")
|
| 248 |
|
| 249 |
async for raw in resp.aiter_text():
|
| 250 |
if not raw or raw.isspace(): continue
|
| 251 |
for line in raw.split('\n'):
|
| 252 |
line = line.strip()
|
| 253 |
-
if not line or
|
| 254 |
payload = line[6:]
|
| 255 |
if payload == '[DONE]': break
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
except json.JSONDecodeError:
|
| 259 |
-
|
| 260 |
dat = parsed.get("data", {})
|
| 261 |
-
delta,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
if not delta: continue
|
| 264 |
|
| 265 |
-
|
|
|
|
| 266 |
think_buf.append(delta)
|
| 267 |
-
elif
|
| 268 |
answer_buf.append(delta)
|
| 269 |
-
#
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
# 合併內容後再進行清理
|
| 274 |
raw_answer = ''.join(answer_buf)
|
| 275 |
ans_text = self._clean_answer(raw_answer)
|
| 276 |
|
|
@@ -278,31 +225,23 @@ class ProxyHandler:
|
|
| 278 |
if settings.SHOW_THINK_TAGS and think_buf:
|
| 279 |
raw_thinking = ''.join(think_buf)
|
| 280 |
think_text = self._clean_thinking(raw_thinking)
|
| 281 |
-
# 確保 thinking 內容不為空時才添加標籤和換行
|
| 282 |
if think_text:
|
| 283 |
final_content = f"<think>{think_text}</think>\n{ans_text}"
|
| 284 |
|
| 285 |
return ChatCompletionResponse(
|
| 286 |
-
id=f"chatcmpl-{uuid.uuid4().hex[:29]}",
|
| 287 |
-
created=int(time.time()),
|
| 288 |
-
model=req.model,
|
| 289 |
choices=[{"index": 0, "message": {"role": "assistant", "content": final_content}, "finish_reason": "stop"}],
|
| 290 |
-
# 可以在此添加 usage 信息,如果 API 返回的話
|
| 291 |
)
|
| 292 |
except httpx.RequestError as e:
|
| 293 |
-
logger.error(f"Non-stream request error: {e}")
|
| 294 |
if ck: await cookie_manager.mark_cookie_invalid(ck)
|
| 295 |
-
|
|
|
|
| 296 |
except Exception as e:
|
| 297 |
-
logger.exception(f"Non-stream unexpected error
|
| 298 |
raise HTTPException(500, "Internal server error")
|
| 299 |
|
| 300 |
-
|
| 301 |
# ---------- FastAPI entry ----------
|
| 302 |
async def handle_chat_completion(self, req: ChatCompletionRequest):
|
| 303 |
-
# 移除對 self.client 的重複創建,改用 __aenter__ 和 __aexit__
|
| 304 |
-
# 在 FastAPI 中,通常使用 Depends 來管理依賴項的生命週期
|
| 305 |
-
# 但這裡 ProxyHandler 作為一個普通類,這樣的寫法也是可以的
|
| 306 |
stream = bool(req.stream) if req.stream is not None else settings.DEFAULT_STREAM
|
| 307 |
if stream:
|
| 308 |
return StreamingResponse(
|
|
|
|
| 8 |
from fastapi import HTTPException
|
| 9 |
from fastapi.responses import StreamingResponse
|
| 10 |
|
| 11 |
+
# 確保這些導入與您的項目結構匹配
|
| 12 |
from config import settings
|
| 13 |
from cookie_manager import cookie_manager
|
| 14 |
from models import ChatCompletionRequest, ChatCompletionResponse
|
|
|
|
| 24 |
http2=True,
|
| 25 |
)
|
| 26 |
|
| 27 |
+
# FIX: 移除 __aenter__ 和 __aexit__,改用顯式的 aclose 方法
|
| 28 |
+
# aenter/aexit 模式不適用於需要跨越請求生命週期的流式響應
|
| 29 |
+
async def aclose(self):
|
| 30 |
+
"""Closes the underlying httpx client."""
|
| 31 |
+
if not self.client.is_closed:
|
| 32 |
+
await self.client.aclose()
|
| 33 |
|
| 34 |
# ---------- text utilities ----------
|
| 35 |
+
# _balance_think_tag, _clean_thinking, _clean_answer, _serialize_msgs 方法保持不變
|
| 36 |
def _balance_think_tag(self, txt: str) -> str:
|
|
|
|
| 37 |
opens = len(re.findall(r"<think>", txt))
|
| 38 |
closes = len(re.findall(r"</think>", txt))
|
| 39 |
if opens > closes:
|
|
|
|
| 44 |
return txt
|
| 45 |
|
| 46 |
def _clean_thinking(self, s: str) -> str:
|
|
|
|
| 47 |
if not s: return s
|
|
|
|
| 48 |
s = re.sub(r'<details[^>]*>.*?</details>', '', s, flags=re.DOTALL)
|
| 49 |
s = re.sub(r'<summary[^>]*>.*?</summary>', '', s, flags=re.DOTALL)
|
|
|
|
| 50 |
s = re.sub(r'<[^>]+>', '', s)
|
|
|
|
| 51 |
s = re.sub(r'^\s*>\s*', '', s, flags=re.MULTILINE)
|
| 52 |
return s.strip()
|
| 53 |
|
| 54 |
def _clean_answer(self, s: str) -> str:
|
|
|
|
| 55 |
if not s: return s
|
| 56 |
return re.sub(r"<details[^>]*>.*?</details>", "", s, flags=re.DOTALL)
|
| 57 |
|
| 58 |
def _serialize_msgs(self, msgs) -> list:
|
|
|
|
| 59 |
out = []
|
| 60 |
for m in msgs:
|
| 61 |
if hasattr(m, "dict"): out.append(m.dict())
|
|
|
|
| 66 |
|
| 67 |
# ---------- upstream ----------
|
| 68 |
async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str]:
|
| 69 |
+
# 此方法保持不變
|
| 70 |
ck = await cookie_manager.get_next_cookie()
|
| 71 |
if not ck: raise HTTPException(503, "No available cookies")
|
| 72 |
|
| 73 |
model = settings.UPSTREAM_MODEL if req.model == settings.MODEL_NAME else req.model
|
| 74 |
body = {
|
| 75 |
+
"stream": True, "model": model, "messages": self._serialize_msgs(req.messages),
|
| 76 |
+
"background_tasks": {"title_generation": True, "tags_generation": True}, "chat_id": str(uuid.uuid4()),
|
| 77 |
+
"features": {"image_generation": False, "code_interpreter": False, "web_search": False, "auto_web_search": False, "enable_thinking": True,},
|
| 78 |
+
"id": str(uuid.uuid4()), "mcp_servers": ["deep-web-search"],
|
| 79 |
+
"model_item": {"id": model, "name": "GLM-4.5", "owned_by": "openai"}, "params": {}, "tool_servers": [],
|
| 80 |
+
"variables": {"{{USER_NAME}}": "User", "{{USER_LOCATION}}": "Unknown", "{{CURRENT_DATETIME}}": time.strftime("%Y-%m-%d %H:%M:%S"),},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
}
|
| 82 |
headers = {
|
| 83 |
+
"Content-Type": "application/json", "Authorization": f"Bearer {ck}",
|
| 84 |
+
"User-Agent": ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"),
|
| 85 |
+
"Accept": "application/json, text/event-stream", "Accept-Language": "zh-CN",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
"sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
|
| 87 |
+
"sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"macOS"', "x-fe-version": "prod-fe-1.0.53",
|
| 88 |
+
"Origin": "https://chat.z.ai", "Referer": "https://chat.z.ai/",
|
|
|
|
|
|
|
|
|
|
| 89 |
}
|
| 90 |
return body, headers, ck
|
| 91 |
|
| 92 |
# ---------- stream ----------
|
| 93 |
async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
|
| 94 |
+
ck = None
|
| 95 |
try:
|
| 96 |
body, headers, ck = await self._prep_upstream(req)
|
| 97 |
comp_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
|
| 98 |
+
think_open = False
|
| 99 |
+
|
| 100 |
+
# FIX: 維護一個持久的 phase 狀態
|
| 101 |
+
phase_cur = None
|
| 102 |
|
| 103 |
+
async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
|
| 104 |
if resp.status_code != 200:
|
| 105 |
await cookie_manager.mark_cookie_invalid(ck)
|
| 106 |
+
err_body = await resp.aread()
|
| 107 |
+
err_msg = f"Error: {resp.status_code} - {err_body.decode(errors='ignore')}"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
err = {
|
| 109 |
+
"id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": req.model,
|
|
|
|
| 110 |
"choices": [{"index": 0, "delta": {"content": err_msg}, "finish_reason": "stop"}],
|
| 111 |
}
|
| 112 |
yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"; return
|
|
|
|
| 115 |
if not raw or raw.isspace(): continue
|
| 116 |
for line in raw.split('\n'):
|
| 117 |
line = line.strip()
|
| 118 |
+
if not line or not line.startswith('data: '): continue
|
| 119 |
|
| 120 |
payload = line[6:]
|
| 121 |
if payload == '[DONE]':
|
| 122 |
+
if think_open:
|
| 123 |
+
yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
|
| 124 |
+
yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}]})}\n\n"
|
| 125 |
+
yield "data: [DONE]\n\n"; return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
|
| 127 |
try:
|
| 128 |
parsed = json.loads(payload)
|
| 129 |
except json.JSONDecodeError:
|
|
|
|
| 130 |
continue
|
| 131 |
|
| 132 |
dat = parsed.get("data", {})
|
| 133 |
+
delta, new_phase = dat.get("delta_content", ""), dat.get("phase")
|
| 134 |
+
|
| 135 |
+
# FIX: 正確的狀態管理邏輯
|
| 136 |
+
# 1. 如果收到了新的 phase,更新當前 phase
|
| 137 |
+
if new_phase and new_phase != phase_cur:
|
| 138 |
+
# 處理從 thinking 到 answer 的過渡
|
| 139 |
+
if phase_cur == "thinking" and new_phase == "answer" and think_open and settings.SHOW_THINK_TAGS:
|
| 140 |
+
yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>\n'}, 'finish_reason': None}]})}\n\n"
|
| 141 |
+
think_open = False
|
| 142 |
+
phase_cur = new_phase
|
| 143 |
+
|
| 144 |
+
if not delta: continue # 如果沒有內容,則跳過
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
+
# 2. 根據當前的 phase_cur 處理內容
|
|
|
|
| 147 |
text_to_yield = ""
|
| 148 |
if phase_cur == "thinking":
|
| 149 |
if settings.SHOW_THINK_TAGS:
|
|
|
|
| 150 |
if not think_open:
|
| 151 |
+
yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]})}\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
think_open = True
|
| 153 |
text_to_yield = self._clean_thinking(delta)
|
|
|
|
| 154 |
elif phase_cur == "answer":
|
| 155 |
text_to_yield = self._clean_answer(delta)
|
| 156 |
|
| 157 |
+
# 3. 發送處理後的內容
|
|
|
|
| 158 |
if text_to_yield:
|
| 159 |
out = {
|
| 160 |
+
"id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": req.model,
|
|
|
|
| 161 |
"choices": [{"index": 0, "delta": {"content": text_to_yield}, "finish_reason": None}],
|
| 162 |
}
|
| 163 |
yield f"data: {json.dumps(out)}\n\n"
|
|
|
|
| 164 |
|
| 165 |
except httpx.RequestError as e:
|
| 166 |
+
if ck: await cookie_manager.mark_cookie_invalid(ck)
|
| 167 |
logger.error(f"Request error: {e}")
|
| 168 |
+
err_msg = f"Connection error: {e}"
|
| 169 |
+
err = {"choices": [{"delta": {"content": err_msg}, "finish_reason": "stop"}]}
|
|
|
|
|
|
|
|
|
|
| 170 |
yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"
|
| 171 |
except Exception as e:
|
| 172 |
+
logger.exception(f"Unexpected error in stream_proxy_response")
|
| 173 |
+
err = {"choices": [{"delta": {"content": f"Internal error in stream"}, "finish_reason": "stop"}]}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
yield f"data: {json.dumps(err)}\n\n"; yield "data: [DONE]\n\n"
|
| 175 |
|
| 176 |
# ---------- non-stream ----------
|
| 177 |
async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
|
| 178 |
+
ck = None
|
|
|
|
|
|
|
| 179 |
try:
|
| 180 |
body, headers, ck = await self._prep_upstream(req)
|
| 181 |
think_buf, answer_buf = [], []
|
| 182 |
|
| 183 |
+
# FIX: 維護一個持久的 phase 狀態
|
| 184 |
+
phase_cur = None
|
| 185 |
+
|
| 186 |
+
async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
|
| 187 |
if resp.status_code != 200:
|
| 188 |
await cookie_manager.mark_cookie_invalid(ck)
|
| 189 |
error_detail = await resp.text()
|
|
|
|
| 190 |
raise HTTPException(resp.status_code, f"Upstream error: {error_detail}")
|
| 191 |
|
| 192 |
async for raw in resp.aiter_text():
|
| 193 |
if not raw or raw.isspace(): continue
|
| 194 |
for line in raw.split('\n'):
|
| 195 |
line = line.strip()
|
| 196 |
+
if not line or not line.startswith('data: '): continue
|
| 197 |
payload = line[6:]
|
| 198 |
if payload == '[DONE]': break
|
| 199 |
+
|
| 200 |
+
try: parsed = json.loads(payload)
|
| 201 |
+
except json.JSONDecodeError: continue
|
| 202 |
+
|
| 203 |
dat = parsed.get("data", {})
|
| 204 |
+
delta, new_phase = dat.get("delta_content", ""), dat.get("phase")
|
| 205 |
+
|
| 206 |
+
# FIX: 正確的狀態管理邏輯
|
| 207 |
+
# 1. 更新當前 phase
|
| 208 |
+
if new_phase:
|
| 209 |
+
phase_cur = new_phase
|
| 210 |
|
| 211 |
if not delta: continue
|
| 212 |
|
| 213 |
+
# 2. 根據當前的 phase_cur 存儲內容
|
| 214 |
+
if phase_cur == "thinking":
|
| 215 |
think_buf.append(delta)
|
| 216 |
+
elif phase_cur == "answer":
|
| 217 |
answer_buf.append(delta)
|
| 218 |
+
else: # for-else, will be executed if loop finishes without break
|
| 219 |
+
pass
|
| 220 |
+
|
|
|
|
|
|
|
| 221 |
raw_answer = ''.join(answer_buf)
|
| 222 |
ans_text = self._clean_answer(raw_answer)
|
| 223 |
|
|
|
|
| 225 |
if settings.SHOW_THINK_TAGS and think_buf:
|
| 226 |
raw_thinking = ''.join(think_buf)
|
| 227 |
think_text = self._clean_thinking(raw_thinking)
|
|
|
|
| 228 |
if think_text:
|
| 229 |
final_content = f"<think>{think_text}</think>\n{ans_text}"
|
| 230 |
|
| 231 |
return ChatCompletionResponse(
|
| 232 |
+
id=f"chatcmpl-{uuid.uuid4().hex[:29]}", created=int(time.time()), model=req.model,
|
|
|
|
|
|
|
| 233 |
choices=[{"index": 0, "message": {"role": "assistant", "content": final_content}, "finish_reason": "stop"}],
|
|
|
|
| 234 |
)
|
| 235 |
except httpx.RequestError as e:
|
|
|
|
| 236 |
if ck: await cookie_manager.mark_cookie_invalid(ck)
|
| 237 |
+
logger.error(f"Non-stream request error: {e}")
|
| 238 |
+
raise HTTPException(502, f"Connection error: {e}")
|
| 239 |
except Exception as e:
|
| 240 |
+
logger.exception(f"Non-stream unexpected error")
|
| 241 |
raise HTTPException(500, "Internal server error")
|
| 242 |
|
|
|
|
| 243 |
# ---------- FastAPI entry ----------
|
| 244 |
async def handle_chat_completion(self, req: ChatCompletionRequest):
|
|
|
|
|
|
|
|
|
|
| 245 |
stream = bool(req.stream) if req.stream is not None else settings.DEFAULT_STREAM
|
| 246 |
if stream:
|
| 247 |
return StreamingResponse(
|