Spaces:

bluewinliang
/

zai2api

Paused

App Files Files Community

bluewinliang commited on Aug 6, 2025

Commit

16171fa

verified ·

1 Parent(s): bf3f212

Upload proxy_handler.py

Browse files

Files changed (1) hide show

proxy_handler.py +155 -115

proxy_handler.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """
 Proxy handler for Z.AI API requests
 """
 import json
 import logging
 import re
@@ -12,20 +13,25 @@ from fastapi.responses import StreamingResponse
 from config import settings
 from cookie_manager import cookie_manager
-from models import ChatCompletionRequest, ChatCompletionResponse, ChatCompletionStreamResponse
 logger = logging.getLogger(__name__)
 class ProxyHandler:
     def __init__(self):
         self.client = httpx.AsyncClient(timeout=60.0)
     async def __aenter__(self):
         return self
     async def __aexit__(self, exc_type, exc_val, exc_tb):
         await self.client.aclose()
     def transform_content(self, content: str) -> str:
         """Transform content by replacing HTML tags and optionally removing think tags"""
         if not content:
@@ -39,79 +45,95 @@ class ProxyHandler:
             original_length = len(content)
             # Remove <details> blocks (thinking content) - handle both closed and unclosed tags
-            content = re.sub(r'<details[^>]*>.*?</details>', '', content, flags=re.DOTALL)
-            content = re.sub(r'<details[^>]*>.*?(?=\s*[A-Z]|\s*\d|\s*$)', '', content, flags=re.DOTALL)
             content = content.strip()
-            logger.debug(f"Content length after removing thinking content: {original_length} -> {len(content)}")
         else:
             logger.debug("Keeping thinking content, converting to <think> tags")
             # Replace <details> with <think>
-            content = re.sub(r'<details[^>]*>', '<think>', content)
-            content = content.replace('</details>', '</think>')
             # Remove <summary> tags and their content
-            content = re.sub(r'<summary>.*?</summary>', '', content, flags=re.DOTALL)
             # If there's no closing </think>, add it at the end of thinking content
-            if '<think>' in content and '</think>' not in content:
-                think_start = content.find('<think>')
                 if think_start != -1:
-                    answer_match = re.search(r'\n\s*[A-Z0-9]', content[think_start:])
                     if answer_match:
                         insert_pos = think_start + answer_match.start()
-                        content = content[:insert_pos] + '</think>\n' + content[insert_pos:]
                     else:
-                        content += '</think>'
         return content.strip()
     def transform_delta_content(self, content: str) -> str:
         """Transform delta content for streaming"""
         if not content:
             return content
         # Convert <details> to <think> and remove summary tags
-        content = re.sub(r'<details[^>]*>', '<think>', content)
-        content = content.replace('</details>', '</think>')
-        content = re.sub(r'<summary>.*?</summary>', '', content, flags=re.DOTALL)
         return content
     async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
         """Proxy request to Z.AI API"""
     cookie = await cookie_manager.get_next_cookie()
     if not cookie:
         raise HTTPException(status_code=503, detail="No available cookies")
     # Transform model name
-    target_model = settings.UPSTREAM_MODEL if request.model == settings.MODEL_NAME else request.model
     # Build request data based on the actual Z.AI API format
     import uuid
     from datetime import datetime
     current_time = datetime.now()
     # Generate unique IDs for the request
     chat_id = str(uuid.uuid4())
     request_id = str(uuid.uuid4())
     # Transform messages to include message_id
     messages_with_ids = []
     for msg in request.model_dump()["messages"]:
         message_with_id = {
             **msg,
-            "message_id": str(uuid.uuid4())  # Add message_id to each message
         }
         messages_with_ids.append(message_with_id)
     request_data = {
         "stream": True,
         "model": target_model,
         "messages": messages_with_ids,  # Use messages with IDs
         "chat_id": chat_id,  # Add chat_id
-        "id": request_id,    # Add request ID
         "params": {},
         "tool_servers": [],
         "features": {
@@ -124,9 +146,9 @@ class ProxyHandler:
             "features": [
                 {"type": "mcp", "server": "vibe-coding", "status": "hidden"},
                 {"type": "mcp", "server": "ppt-maker", "status": "hidden"},
-                {"type": "mcp", "server": "image-search", "status": "hidden"}
             ],
-            "enable_thinking": True
         },
         "variables": {
             "{{USER_NAME}}": "User",
@@ -136,7 +158,7 @@ class ProxyHandler:
             "{{CURRENT_TIME}}": current_time.strftime("%H:%M:%S"),
             "{{CURRENT_WEEKDAY}}": current_time.strftime("%A"),
             "{{CURRENT_TIMEZONE}}": "Asia/Taipei",
-            "{{USER_LANGUAGE}}": "zh-CN"
         },
         "model_item": {
             "id": target_model,
@@ -147,7 +169,7 @@ class ProxyHandler:
                 "name": target_model,
                 "owned_by": "openai",
                 "openai": {"id": target_model},
-                "urlIdx": 1
             },
             "urlIdx": 1,
             "info": {
@@ -155,11 +177,7 @@ class ProxyHandler:
                 "user_id": "7080a6c5-5fcc-4ea4-a85f-3b3fac905cf2",
                 "base_model_id": None,
                 "name": "GLM-4.5",
-                "params": {
-                    "top_p": 0.95,
-                    "temperature": 0.6,
-                    "max_tokens": 80000
-                },
                 "meta": {
                     "profile_image_url": "/static/favicon.png",
                     "description": "Most advanced model, proficient in coding and tool use",
@@ -174,16 +192,21 @@ class ProxyHandler:
                         "file_qa": True,
                         "returnFc": True,
                         "returnThink": True,
-                        "think": True
                     },
-                    "mcpServerIds": ["deep-web-search", "ppt-maker", "image-search", "vibe-coding"]
-                }
-            }
-        }
     }
     logger.debug(f"Sending request data: {json.dumps(request_data, indent=2)}")
     # Use the exact headers from your curl request
     headers = {
         "Accept": "*/*",
@@ -201,63 +224,70 @@ class ProxyHandler:
         "X-FE-Version": "prod-fe-1.0.57",
         "sec-ch-ua": '"Chromium";v="137", "Not/A)Brand";v="24"',
         "sec-ch-ua-mobile": "?1",
-        "sec-ch-ua-platform": '"Android"'
     }
     try:
         response = await self.client.post(
-            settings.UPSTREAM_URL,
-            json=request_data,
-            headers=headers
         )
         if response.status_code == 401:
             await cookie_manager.mark_cookie_failed(cookie)
             raise HTTPException(status_code=401, detail="Invalid authentication")
         if response.status_code != 200:
             logger.error(f"Upstream error: {response.status_code} - {response.text}")
-            raise HTTPException(status_code=response.status_code, detail=f"Upstream error: {response.text}")
         await cookie_manager.mark_cookie_success(cookie)
         return {"response": response, "cookie": cookie}
     except httpx.RequestError as e:
         logger.error(f"Request error: {e}")
         await cookie_manager.mark_cookie_failed(cookie)
         raise HTTPException(status_code=503, detail="Upstream service unavailable")
-    async def process_streaming_response_real_time(self, response: httpx.Response) -> AsyncGenerator[Dict[str, Any], None]:
         """Process streaming response in real time - truly streaming"""
         buffer = ""
         async for chunk in response.aiter_text():
             if not chunk:
                 continue
             buffer += chunk
-            lines = buffer.split('\n')
             buffer = lines[-1]  # Keep incomplete line in buffer
             for line in lines[:-1]:
                 line = line.strip()
                 if not line.startswith("data: "):
                     continue
                 payload = line[6:].strip()
                 if payload == "[DONE]":
                     return
                 try:
                     parsed = json.loads(payload)
                     # Check for errors first
                     if parsed.get("error") or (parsed.get("data", {}).get("error")):
-                        error_detail = (parsed.get("error", {}).get("detail") or
-                                      parsed.get("data", {}).get("error", {}).get("detail") or
-                                      "Unknown error from upstream")
                         logger.error(f"Upstream error: {error_detail}")
-                        raise HTTPException(status_code=400, detail=f"Upstream error: {error_detail}")
                     # Transform the response immediately
                     if parsed.get("data"):
@@ -275,16 +305,20 @@ class ProxyHandler:
                     yield parsed
                 except json.JSONDecodeError as e:
-                    logger.debug(f"JSON decode error for line: {line[:100]}... Error: {e}")
                     continue  # Skip non-JSON lines
     async def handle_chat_completion(self, request: ChatCompletionRequest):
         """Handle chat completion request"""
         proxy_result = await self.proxy_request(request)
         response = proxy_result["response"]
         # Determine final streaming mode
-        is_streaming = request.stream if request.stream is not None else settings.DEFAULT_STREAM
         if is_streaming:
             return StreamingResponse(
@@ -293,19 +327,21 @@ class ProxyHandler:
                 headers={
                     "Cache-Control": "no-cache",
                     "Connection": "keep-alive",
-                }
             )
         else:
             return await self.non_stream_response(response, request.model)
-    async def stream_response_real_time(self, response: httpx.Response, model: str) -> AsyncGenerator[str, None]:
         """Generate truly real-time streaming response in OpenAI format"""
         import uuid
         import time
         # Generate a unique completion ID
         completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
         try:
             # Process each chunk immediately as it arrives - true streaming!
             async for parsed in self.process_streaming_response_real_time(response):
@@ -313,12 +349,18 @@ class ProxyHandler:
                     data = parsed.get("data", {})
                     delta_content = data.get("delta_content", "")
                     phase = data.get("phase", "")
                     # For SHOW_THINK_TAGS=false, filter out non-answer content
-                    if not settings.SHOW_THINK_TAGS and phase != "answer" and delta_content:
-                        logger.debug(f"Skipping content in {phase} phase (SHOW_THINK_TAGS=false)")
                         continue
                     # Send content immediately if available
                     if delta_content:
                         openai_chunk = {
@@ -326,58 +368,53 @@ class ProxyHandler:
                             "object": "chat.completion.chunk",
                             "created": int(time.time()),
                             "model": model,
-                            "choices": [{
-                                "index": 0,
-                                "delta": {
-                                    "content": delta_content
-                                },
-                                "finish_reason": None
-                            }]
                         }
                         chunk_json = json.dumps(openai_chunk)
                         yield f"data: {chunk_json}\n\n"
                         logger.debug(f"Sent chunk: {chunk_json[:100]}...")
                 except Exception as e:
                     logger.error(f"Error processing streaming chunk: {e}")
                     continue
             # Send final completion chunk
             final_chunk = {
                 "id": completion_id,
-                "object": "chat.completion.chunk",
                 "created": int(time.time()),
                 "model": model,
-                "choices": [{
-                    "index": 0,
-                    "delta": {},
-                    "finish_reason": "stop"
-                }]
             }
             yield f"data: {json.dumps(final_chunk)}\n\n"
             yield "data: [DONE]\n\n"
         except Exception as e:
             logger.error(f"Streaming error: {e}")
             # Send error in OpenAI format
-            error_chunk = {
-                "error": {
-                    "message": str(e),
-                    "type": "server_error"
-                }
-            }
             yield f"data: {json.dumps(error_chunk)}\n\n"
-    async def non_stream_response(self, response: httpx.Response, model: str) -> ChatCompletionResponse:
         """Generate non-streaming response by collecting all chunks"""
         chunks = []
         # For non-streaming, we still collect all chunks first
         async for parsed in self.process_streaming_response_real_time(response):
             chunks.append(parsed)
-            logger.debug(f"Collected chunk: {parsed.get('data', {}).get('delta_content', '')[:50]}...")
         if not chunks:
             raise HTTPException(status_code=500, detail="No response from upstream")
@@ -405,15 +442,18 @@ class ProxyHandler:
         # Create OpenAI-compatible response
         return ChatCompletionResponse(
-            id=chunks[0].get("data", {}).get("id", "chatcmpl-unknown") if chunks else "chatcmpl-unknown",
             created=int(time.time()),
             model=model,
-            choices=[{
-                "index": 0,
-                "message": {
-                    "role": "assistant",
-                    "content": transformed_content
-                },
-                "finish_reason": "stop"
-            }]
         )

 """
 Proxy handler for Z.AI API requests
 """
 import json
 import logging
 import re
 from config import settings
 from cookie_manager import cookie_manager
+from models import (
+    ChatCompletionRequest,
+    ChatCompletionResponse,
+    ChatCompletionStreamResponse,
+)
 logger = logging.getLogger(__name__)
 class ProxyHandler:
     def __init__(self):
         self.client = httpx.AsyncClient(timeout=60.0)
     async def __aenter__(self):
         return self
     async def __aexit__(self, exc_type, exc_val, exc_tb):
         await self.client.aclose()
     def transform_content(self, content: str) -> str:
         """Transform content by replacing HTML tags and optionally removing think tags"""
         if not content:
             original_length = len(content)
             # Remove <details> blocks (thinking content) - handle both closed and unclosed tags
+            content = re.sub(
+                r"<details[^>]*>.*?</details>", "", content, flags=re.DOTALL
+            )
+            content = re.sub(
+                r"<details[^>]*>.*?(?=\s*[A-Z]|\s*\d|\s*$)",
+                "",
+                content,
+                flags=re.DOTALL,
+            )
             content = content.strip()
+            logger.debug(
+                f"Content length after removing thinking content: {original_length} -> {len(content)}"
+            )
         else:
             logger.debug("Keeping thinking content, converting to <think> tags")
             # Replace <details> with <think>
+            content = re.sub(r"<details[^>]*>", "<think>", content)
+            content = content.replace("</details>", "</think>")
             # Remove <summary> tags and their content
+            content = re.sub(r"<summary>.*?</summary>", "", content, flags=re.DOTALL)
             # If there's no closing </think>, add it at the end of thinking content
+            if "<think>" in content and "</think>" not in content:
+                think_start = content.find("<think>")
                 if think_start != -1:
+                    answer_match = re.search(r"\n\s*[A-Z0-9]", content[think_start:])
                     if answer_match:
                         insert_pos = think_start + answer_match.start()
+                        content = (
+                            content[:insert_pos] + "</think>\n" + content[insert_pos:]
+                        )
                     else:
+                        content += "</think>"
         return content.strip()
     def transform_delta_content(self, content: str) -> str:
         """Transform delta content for streaming"""
         if not content:
             return content
         # Convert <details> to <think> and remove summary tags
+        content = re.sub(r"<details[^>]*>", "<think>", content)
+        content = content.replace("</details>", "</think>")
+        content = re.sub(r"<summary>.*?</summary>", "", content, flags=re.DOTALL)
         return content
     async def proxy_request(self, request: ChatCompletionRequest) -> Dict[str, Any]:
         """Proxy request to Z.AI API"""
     cookie = await cookie_manager.get_next_cookie()
     if not cookie:
         raise HTTPException(status_code=503, detail="No available cookies")
     # Transform model name
+    target_model = (
+        settings.UPSTREAM_MODEL
+        if request.model == settings.MODEL_NAME
+        else request.model
+    )
     # Build request data based on the actual Z.AI API format
     import uuid
     from datetime import datetime
     current_time = datetime.now()
     # Generate unique IDs for the request
     chat_id = str(uuid.uuid4())
     request_id = str(uuid.uuid4())
     # Transform messages to include message_id
     messages_with_ids = []
     for msg in request.model_dump()["messages"]:
         message_with_id = {
             **msg,
+            "message_id": str(uuid.uuid4()),  # Add message_id to each message
         }
         messages_with_ids.append(message_with_id)
     request_data = {
         "stream": True,
         "model": target_model,
         "messages": messages_with_ids,  # Use messages with IDs
         "chat_id": chat_id,  # Add chat_id
+        "id": request_id,  # Add request ID
         "params": {},
         "tool_servers": [],
         "features": {
             "features": [
                 {"type": "mcp", "server": "vibe-coding", "status": "hidden"},
                 {"type": "mcp", "server": "ppt-maker", "status": "hidden"},
+                {"type": "mcp", "server": "image-search", "status": "hidden"},
             ],
+            "enable_thinking": True,
         },
         "variables": {
             "{{USER_NAME}}": "User",
             "{{CURRENT_TIME}}": current_time.strftime("%H:%M:%S"),
             "{{CURRENT_WEEKDAY}}": current_time.strftime("%A"),
             "{{CURRENT_TIMEZONE}}": "Asia/Taipei",
+            "{{USER_LANGUAGE}}": "zh-CN",
         },
         "model_item": {
             "id": target_model,
                 "name": target_model,
                 "owned_by": "openai",
                 "openai": {"id": target_model},
+                "urlIdx": 1,
             },
             "urlIdx": 1,
             "info": {
                 "user_id": "7080a6c5-5fcc-4ea4-a85f-3b3fac905cf2",
                 "base_model_id": None,
                 "name": "GLM-4.5",
+                "params": {"top_p": 0.95, "temperature": 0.6, "max_tokens": 80000},
                 "meta": {
                     "profile_image_url": "/static/favicon.png",
                     "description": "Most advanced model, proficient in coding and tool use",
                         "file_qa": True,
                         "returnFc": True,
                         "returnThink": True,
+                        "think": True,
                     },
+                    "mcpServerIds": [
+                        "deep-web-search",
+                        "ppt-maker",
+                        "image-search",
+                        "vibe-coding",
+                    ],
+                },
+            },
+        },
     }
     logger.debug(f"Sending request data: {json.dumps(request_data, indent=2)}")
     # Use the exact headers from your curl request
     headers = {
         "Accept": "*/*",
         "X-FE-Version": "prod-fe-1.0.57",
         "sec-ch-ua": '"Chromium";v="137", "Not/A)Brand";v="24"',
         "sec-ch-ua-mobile": "?1",
+        "sec-ch-ua-platform": '"Android"',
     }
     try:
         response = await self.client.post(
+            settings.UPSTREAM_URL, json=request_data, headers=headers
         )
         if response.status_code == 401:
             await cookie_manager.mark_cookie_failed(cookie)
             raise HTTPException(status_code=401, detail="Invalid authentication")
         if response.status_code != 200:
             logger.error(f"Upstream error: {response.status_code} - {response.text}")
+            raise HTTPException(
+                status_code=response.status_code,
+                detail=f"Upstream error: {response.text}",
+            )
         await cookie_manager.mark_cookie_success(cookie)
         return {"response": response, "cookie": cookie}
     except httpx.RequestError as e:
         logger.error(f"Request error: {e}")
         await cookie_manager.mark_cookie_failed(cookie)
         raise HTTPException(status_code=503, detail="Upstream service unavailable")
+    async def process_streaming_response_real_time(
+        self, response: httpx.Response
+    ) -> AsyncGenerator[Dict[str, Any], None]:
         """Process streaming response in real time - truly streaming"""
         buffer = ""
         async for chunk in response.aiter_text():
             if not chunk:
                 continue
             buffer += chunk
+            lines = buffer.split("\n")
             buffer = lines[-1]  # Keep incomplete line in buffer
             for line in lines[:-1]:
                 line = line.strip()
                 if not line.startswith("data: "):
                     continue
                 payload = line[6:].strip()
                 if payload == "[DONE]":
                     return
                 try:
                     parsed = json.loads(payload)
                     # Check for errors first
                     if parsed.get("error") or (parsed.get("data", {}).get("error")):
+                        error_detail = (
+                            parsed.get("error", {}).get("detail")
+                            or parsed.get("data", {}).get("error", {}).get("detail")
+                            or "Unknown error from upstream"
+                        )
                         logger.error(f"Upstream error: {error_detail}")
+                        raise HTTPException(
+                            status_code=400, detail=f"Upstream error: {error_detail}"
+                        )
                     # Transform the response immediately
                     if parsed.get("data"):
                     yield parsed
                 except json.JSONDecodeError as e:
+                    logger.debug(
+                        f"JSON decode error for line: {line[:100]}... Error: {e}"
+                    )
                     continue  # Skip non-JSON lines
     async def handle_chat_completion(self, request: ChatCompletionRequest):
         """Handle chat completion request"""
         proxy_result = await self.proxy_request(request)
         response = proxy_result["response"]
         # Determine final streaming mode
+        is_streaming = (
+            request.stream if request.stream is not None else settings.DEFAULT_STREAM
+        )
         if is_streaming:
             return StreamingResponse(
                 headers={
                     "Cache-Control": "no-cache",
                     "Connection": "keep-alive",
+                },
             )
         else:
             return await self.non_stream_response(response, request.model)
+    async def stream_response_real_time(
+        self, response: httpx.Response, model: str
+    ) -> AsyncGenerator[str, None]:
         """Generate truly real-time streaming response in OpenAI format"""
         import uuid
         import time
         # Generate a unique completion ID
         completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
         try:
             # Process each chunk immediately as it arrives - true streaming!
             async for parsed in self.process_streaming_response_real_time(response):
                     data = parsed.get("data", {})
                     delta_content = data.get("delta_content", "")
                     phase = data.get("phase", "")
                     # For SHOW_THINK_TAGS=false, filter out non-answer content
+                    if (
+                        not settings.SHOW_THINK_TAGS
+                        and phase != "answer"
+                        and delta_content
+                    ):
+                        logger.debug(
+                            f"Skipping content in {phase} phase (SHOW_THINK_TAGS=false)"
+                        )
                         continue
                     # Send content immediately if available
                     if delta_content:
                         openai_chunk = {
                             "object": "chat.completion.chunk",
                             "created": int(time.time()),
                             "model": model,
+                            "choices": [
+                                {
+                                    "index": 0,
+                                    "delta": {"content": delta_content},
+                                    "finish_reason": None,
+                                }
+                            ],
                         }
                         chunk_json = json.dumps(openai_chunk)
                         yield f"data: {chunk_json}\n\n"
                         logger.debug(f"Sent chunk: {chunk_json[:100]}...")
                 except Exception as e:
                     logger.error(f"Error processing streaming chunk: {e}")
                     continue
             # Send final completion chunk
             final_chunk = {
                 "id": completion_id,
+                "object": "chat.completion.chunk",
                 "created": int(time.time()),
                 "model": model,
+                "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
             }
             yield f"data: {json.dumps(final_chunk)}\n\n"
             yield "data: [DONE]\n\n"
         except Exception as e:
             logger.error(f"Streaming error: {e}")
             # Send error in OpenAI format
+            error_chunk = {"error": {"message": str(e), "type": "server_error"}}
             yield f"data: {json.dumps(error_chunk)}\n\n"
+    async def non_stream_response(
+        self, response: httpx.Response, model: str
+    ) -> ChatCompletionResponse:
         """Generate non-streaming response by collecting all chunks"""
         chunks = []
         # For non-streaming, we still collect all chunks first
         async for parsed in self.process_streaming_response_real_time(response):
             chunks.append(parsed)
+            logger.debug(
+                f"Collected chunk: {parsed.get('data', {}).get('delta_content', '')[:50]}..."
+            )
         if not chunks:
             raise HTTPException(status_code=500, detail="No response from upstream")
         # Create OpenAI-compatible response
         return ChatCompletionResponse(
+            id=(
+                chunks[0].get("data", {}).get("id", "chatcmpl-unknown")
+                if chunks
+                else "chatcmpl-unknown"
+            ),
             created=int(time.time()),
             model=model,
+            choices=[
+                {
+                    "index": 0,
+                    "message": {"role": "assistant", "content": transformed_content},
+                    "finish_reason": "stop",
+                }
+            ],
         )