Spaces:
Paused
Paused
File size: 12,232 Bytes
6480add |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 |
"""
Proxy handler for Z.AI API requests
"""
import json
import logging
import re
import time
from typing import AsyncGenerator, Dict, Any, Optional
import httpx
from fastapi import HTTPException
from fastapi.responses import StreamingResponse
from config import settings
from cookie_manager import cookie_manager
from models import ChatCompletionRequest, ChatCompletionResponse, ChatCompletionStreamResponse
logger = logging.getLogger(__name__)
class ProxyHandler:
def __init__(self):
self.client = httpx.AsyncClient(timeout=60.0)
async def __aenter__(self):
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self.client.aclose()
def transform_content(self, content: str) -> str:
"""Transform content by replacing HTML tags and optionally removing think tags"""
if not content:
return content
logger.debug(f"SHOW_THINK_TAGS setting: {settings.SHOW_THINK_TAGS}")
# Optionally remove thinking content based on configuration
if not settings.SHOW_THINK_TAGS:
logger.debug("Removing thinking content from response")
original_length = len(content)
# Remove <details> blocks (thinking content) - handle both closed and unclosed tags
# First try to remove complete <details>...</details> blocks
content = re.sub(r'<details[^>]*>.*?</details>', '', content, flags=re.DOTALL)
# Then remove any remaining <details> opening tags and everything after them until we hit answer content
# Look for pattern: <details...><summary>...</summary>...content... and remove the thinking part
content = re.sub(r'<details[^>]*>.*?(?=\s*[A-Z]|\s*\d|\s*$)', '', content, flags=re.DOTALL)
content = content.strip()
logger.debug(f"Content length after removing thinking content: {original_length} -> {len(content)}")
else:
logger.debug("Keeping thinking content, converting to <think> tags")
# Replace <details> with <think>
content = re.sub(r'<details[^>]*>', '<think>', content)
content = content.replace('</details>', '</think>')
# Remove <summary> tags and their content
content = re.sub(r'<summary>.*?</summary>', '', content, flags=re.DOTALL)
# If there's no closing </think>, add it at the end of thinking content
if '<think>' in content and '</think>' not in content:
# Find where thinking ends and answer begins
think_start = content.find('<think>')
if think_start != -1:
# Look for the start of the actual answer (usually starts with a capital letter or number)
answer_match = re.search(r'\n\s*[A-Z0-9]', content[think_start:])
if answer_match:
insert_pos = think_start + answer_match.start()
content = content[:insert_pos] + '</think>\n' + content[insert_pos:]
else:
content += '</think>'
return content.strip()
async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
"""Proxy request to Z.AI API"""
cookie = await cookie_manager.get_next_cookie()
if not cookie:
raise HTTPException(status_code=503, detail="No available cookies")
# Transform model name
target_model = settings.UPSTREAM_MODEL if request.model == settings.MODEL_NAME else request.model
# Determine if this should be a streaming response
is_streaming = request.stream if request.stream is not None else settings.DEFAULT_STREAM
# Validate parameter compatibility
if is_streaming and not settings.SHOW_THINK_TAGS:
logger.warning("SHOW_THINK_TAGS=false is ignored for streaming responses")
# Prepare request data
request_data = request.model_dump(exclude_none=True)
request_data["model"] = target_model
# Build request data based on actual Z.AI format from zai-messages.md
import uuid
request_data = {
"stream": True, # Always request streaming from Z.AI for processing
"model": target_model,
"messages": request_data["messages"],
"background_tasks": {
"title_generation": True,
"tags_generation": True
},
"chat_id": str(uuid.uuid4()),
"features": {
"image_generation": False,
"code_interpreter": False,
"web_search": False,
"auto_web_search": False
},
"id": str(uuid.uuid4()),
"mcp_servers": ["deep-web-search"],
"model_item": {
"id": target_model,
"name": "GLM-4.5",
"owned_by": "openai"
},
"params": {},
"tool_servers": [],
"variables": {
"{{USER_NAME}}": "User",
"{{USER_LOCATION}}": "Unknown",
"{{CURRENT_DATETIME}}": "2025-08-04 16:46:56"
}
}
logger.debug(f"Sending request data: {request_data}")
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {cookie}",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36",
"Accept": "application/json, text/event-stream",
"Accept-Language": "zh-CN",
"sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"macOS"',
"x-fe-version": "prod-fe-1.0.53",
"Origin": "https://chat.z.ai",
"Referer": "https://chat.z.ai/c/069723d5-060b-404f-992c-4705f1554c4c"
}
try:
response = await self.client.post(
settings.UPSTREAM_URL,
json=request_data,
headers=headers
)
if response.status_code == 401:
await cookie_manager.mark_cookie_failed(cookie)
raise HTTPException(status_code=401, detail="Invalid authentication")
if response.status_code != 200:
raise HTTPException(status_code=response.status_code, detail=f"Upstream error: {response.text}")
await cookie_manager.mark_cookie_success(cookie)
return {"response": response, "cookie": cookie}
except httpx.RequestError as e:
logger.error(f"Request error: {e}")
await cookie_manager.mark_cookie_failed(cookie)
raise HTTPException(status_code=503, detail="Upstream service unavailable")
async def process_streaming_response(self, response: httpx.Response) -> AsyncGenerator[Dict[str, Any], None]:
"""Process streaming response from Z.AI"""
buffer = ""
async for chunk in response.aiter_text():
buffer += chunk
lines = buffer.split('\n')
buffer = lines[-1] # Keep incomplete line in buffer
for line in lines[:-1]:
line = line.strip()
if not line.startswith("data: "):
continue
payload = line[6:].strip()
if payload == "[DONE]":
return
try:
parsed = json.loads(payload)
# Check for errors first
if parsed.get("error") or (parsed.get("data", {}).get("error")):
error_detail = (parsed.get("error", {}).get("detail") or
parsed.get("data", {}).get("error", {}).get("detail") or
"Unknown error from upstream")
logger.error(f"Upstream error: {error_detail}")
raise HTTPException(status_code=400, detail=f"Upstream error: {error_detail}")
# Transform the response
if parsed.get("data"):
# Remove unwanted fields
parsed["data"].pop("edit_index", None)
parsed["data"].pop("edit_content", None)
# Note: We don't transform delta_content here because <think> tags
# might span multiple chunks. We'll transform the final aggregated content.
yield parsed
except json.JSONDecodeError:
continue # Skip non-JSON lines
async def handle_chat_completion(self, request: ChatCompletionRequest):
"""Handle chat completion request"""
proxy_result = await self.proxy_request(request)
response = proxy_result["response"]
# Determine final streaming mode
is_streaming = request.stream if request.stream is not None else settings.DEFAULT_STREAM
if is_streaming:
# For streaming responses, SHOW_THINK_TAGS setting is ignored
return StreamingResponse(
self.stream_response(response, request.model),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
}
)
else:
# For non-streaming responses, SHOW_THINK_TAGS setting applies
return await self.non_stream_response(response, request.model)
async def stream_response(self, response: httpx.Response, model: str) -> AsyncGenerator[str, None]:
"""Generate streaming response"""
async for parsed in self.process_streaming_response(response):
yield f"data: {json.dumps(parsed)}\n\n"
yield "data: [DONE]\n\n"
async def non_stream_response(self, response: httpx.Response, model: str) -> ChatCompletionResponse:
"""Generate non-streaming response"""
chunks = []
async for parsed in self.process_streaming_response(response):
chunks.append(parsed)
logger.debug(f"Received chunk: {parsed}") # Debug log
if not chunks:
raise HTTPException(status_code=500, detail="No response from upstream")
logger.info(f"Total chunks received: {len(chunks)}")
logger.debug(f"First chunk structure: {chunks[0] if chunks else 'None'}")
# Aggregate content based on SHOW_THINK_TAGS setting
if settings.SHOW_THINK_TAGS:
# Include all content
full_content = "".join(
chunk.get("data", {}).get("delta_content", "") for chunk in chunks
)
else:
# Only include answer phase content
full_content = "".join(
chunk.get("data", {}).get("delta_content", "")
for chunk in chunks
if chunk.get("data", {}).get("phase") == "answer"
)
logger.info(f"Aggregated content length: {len(full_content)}")
logger.debug(f"Full aggregated content: {full_content}") # Show full content for debugging
# Apply content transformation (including think tag filtering)
transformed_content = self.transform_content(full_content)
logger.info(f"Transformed content length: {len(transformed_content)}")
logger.debug(f"Transformed content: {transformed_content[:200]}...")
# Create OpenAI-compatible response
return ChatCompletionResponse(
id=chunks[0].get("data", {}).get("id", "chatcmpl-unknown"),
created=int(time.time()),
model=model,
choices=[{
"index": 0,
"message": {
"role": "assistant",
"content": transformed_content
},
"finish_reason": "stop"
}]
)
|