Spaces:

bluewinliang
/

zai2api

Paused

App Files Files Community

bluewinliang commited on Oct 14, 2025

Commit

83eadcd

verified ·

1 Parent(s): 41a07df

Upload proxy_handler.py

Browse files

Files changed (1) hide show

proxy_handler.py +62 -34

proxy_handler.py CHANGED Viewed

@@ -22,6 +22,7 @@ class ProxyHandler:
             limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
             http2=True,
         )
         self.primary_secret = "junjie".encode('utf-8')
     async def aclose(self):
@@ -32,46 +33,54 @@ class ProxyHandler:
         return int(time.time() * 1000)
     def _parse_jwt_token(self, token: str) -> Dict[str, str]:
         try:
             parts = token.split('.')
             if len(parts) != 3: return {"user_id": ""}
             payload_b64 = parts[1]
-            payload_b64 += '=' * (-len(payload_b64) % 4)
             payload_json = base64.urlsafe_b64decode(payload_b64).decode('utf-8')
             payload = json.loads(payload_json)
             return {"user_id": payload.get("sub", "")}
         except Exception:
             return {"user_id": ""}
-    def _generate_signature(self, e_payload: str, t_payload: str, timestamp_ms: int) -> Dict[str, Any]:
         """
         Generates the signature based on the logic from the reference JS code.
-        This version corrects the HMAC chaining issue by using .digest() for the intermediate key.
         Args:
             e_payload (str): The simplified payload string (e.g., "requestId,...,timestamp,...").
             t_payload (str): The last message content.
-            timestamp_ms (int): The consistent timestamp generated for the request.
         Returns:
             A dictionary with 'signature' and 'timestamp'.
         """
-        b64_encoded_t = base64.b64encode(t_payload.encode("utf-8")).decode("utf-8")
-        message_string = f"{e_payload}|{b64_encoded_t}|{timestamp_ms}"
-        n = timestamp_ms // (5 * 60 * 1000)
-        # --- MODIFICATION START: Correct HMAC Chaining ---
-        # 1. First HMAC: Calculate intermediate key as RAW BYTES using .digest()
         msg1 = str(n).encode("utf-8")
-        intermediate_key_bytes = hmac.new(self.primary_secret, msg1, hashlib.sha256).digest()
-        # 2. Second HMAC: Use the raw bytes of the intermediate key directly.
-        #    The final result is converted to a hex string for the header.
         msg2 = message_string.encode("utf-8")
-        final_signature = hmac.new(intermediate_key_bytes, msg2, hashlib.sha256).hexdigest()
-        # --- MODIFICATION END ---
         return {"signature": final_signature, "timestamp": timestamp_ms}
@@ -86,14 +95,13 @@ class ProxyHandler:
     def _clean_answer_content(self, text: str) -> str:
         if not text: return ""
-        cleaned_text = re.sub(r'<details[^>]*>.*?</details>', '', text, flags=re.DOTALL)
-        cleaned_text = re.sub(r'<glm_block.*?</glm_block>|<summary>.*?</summary>', '', text, flags=re.DOTALL)
-        cleaned_text = re.sub(r'\s*duration="\d+"[^>]*>', '', cleaned_text)
         return cleaned_text
     def _serialize_msgs(self, msgs) -> list:
         out = []
         for m in msgs:
             if hasattr(m, "dict"): out.append(m.dict())
             elif hasattr(m, "model_dump"): out.append(m.model_dump())
             elif isinstance(m, dict): out.append(m)
@@ -101,30 +109,39 @@ class ProxyHandler:
         return out
     async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str, str]:
         ck = await cookie_manager.get_next_cookie()
         if not ck: raise HTTPException(503, "No available cookies")
         model = settings.UPSTREAM_MODEL if req.model == settings.MODEL_NAME else req.model
-        timestamp_ms = self._get_timestamp_millis()
-        payload_timestamp = str(timestamp_ms)
         payload_user_id = str(uuid.uuid4())
         payload_request_id = str(uuid.uuid4())
         e_payload = f"requestId,{payload_request_id},timestamp,{payload_timestamp},user_id,{payload_user_id}"
         t_payload = ""
         if req.messages:
             last_message = req.messages[-1]
             if isinstance(last_message.content, str):
                 t_payload = last_message.content
-        signature_data = self._generate_signature(e_payload, t_payload, timestamp_ms)
         signature = signature_data["signature"]
         signature_timestamp = signature_data["timestamp"]
         url_params = {
             "requestId": payload_request_id,
             "timestamp": payload_timestamp,
@@ -132,14 +149,16 @@ class ProxyHandler:
             "signature_timestamp": str(signature_timestamp)
         }
         final_url = httpx.URL(settings.UPSTREAM_URL).copy_with(params=url_params)
         body = {
             "stream": True,
             "model": model,
             "messages": self._serialize_msgs(req.messages),
-            "chat_id": str(uuid.uuid4()),
-            "id": str(uuid.uuid4()),
             "features": {
                 "image_generation": False,
                 "web_search": False,
@@ -209,6 +228,7 @@ class ProxyHandler:
                         line = line.strip()
                         if not line.startswith('data: '): continue
                         payload_str = line[6:]
                         if payload_str == '[DONE]':
                             if think_open:
                                 yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
@@ -222,7 +242,11 @@ class ProxyHandler:
                         phase = dat.get("phase")
                         content_chunk = dat.get("delta_content") or dat.get("edit_content")
                         if not content_chunk:
-                            continue
                         if phase == "thinking":
                             current_raw_thinking = content_chunk if dat.get("edit_content") is not None else current_raw_thinking + content_chunk
@@ -231,12 +255,10 @@ class ProxyHandler:
                         elif phase == "answer":
                             content_to_process = content_chunk
                             if is_first_answer_chunk:
-                                last_bracket_pos = content_to_process.rfind('>')
-                                if last_bracket_pos != -1:
-                                    content_to_process = content_to_process[last_bracket_pos + 1:]
-                                content_to_process = content_to_process.lstrip()
                                 is_first_answer_chunk = False
                             if content_to_process:
                                 async for item in yield_delta("answer", content_to_process):
                                     yield item
@@ -244,9 +266,12 @@ class ProxyHandler:
             logger.exception("Stream error"); raise
     async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
         ck = None
         try:
             body, headers, ck, url = await self._prep_upstream(req)
             body["stream"] = False
             async with self.client.post(url, json=body, headers=headers) as resp:
@@ -256,9 +281,13 @@ class ProxyHandler:
                     raise HTTPException(resp.status_code, f"Upstream error: {error_detail}")
                 await cookie_manager.mark_cookie_success(ck)
                 response_data = resp.json()
                 final_content = ""
-                finish_reason = "stop"
                 if "choices" in response_data and response_data["choices"]:
                     first_choice = response_data["choices"][0]
@@ -267,8 +296,6 @@ class ProxyHandler:
                     if "finish_reason" in first_choice:
                         finish_reason = first_choice["finish_reason"]
-                final_content = self._clean_answer_content(final_content)
                 return ChatCompletionResponse(
                     id=response_data.get("id", f"chatcmpl-{uuid.uuid4().hex[:29]}"),
                     created=int(time.time()),
@@ -279,6 +306,7 @@ class ProxyHandler:
             logger.exception("Non-stream processing failed"); raise
     async def handle_chat_completion(self, req: ChatCompletionRequest):
         stream = bool(req.stream) if req.stream is not None else settings.DEFAULT_STREAM
         if stream:
             return StreamingResponse(self.stream_proxy_response(req), media_type="text/event-stream",

             limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
             http2=True,
         )
+        # The primary secret key from the reference code.
         self.primary_secret = "junjie".encode('utf-8')
     async def aclose(self):
         return int(time.time() * 1000)
     def _parse_jwt_token(self, token: str) -> Dict[str, str]:
+        """A simple JWT payload decoder to get user ID ('sub' claim)."""
         try:
             parts = token.split('.')
             if len(parts) != 3: return {"user_id": ""}
             payload_b64 = parts[1]
+            payload_b64 += '=' * (-len(payload_b64) % 4) # Add padding if needed
             payload_json = base64.urlsafe_b64decode(payload_b64).decode('utf-8')
             payload = json.loads(payload_json)
             return {"user_id": payload.get("sub", "")}
         except Exception:
+            # It's okay if this fails; we'll proceed with an empty user_id.
             return {"user_id": ""}
+    def _generate_signature(self, e_payload: str, t_payload: str) -> Dict[str, Any]:
         """
         Generates the signature based on the logic from the reference JS code.
+        This is a two-level HMAC-SHA256 process.
         Args:
             e_payload (str): The simplified payload string (e.g., "requestId,...,timestamp,...").
             t_payload (str): The last message content.
         Returns:
             A dictionary with 'signature' and 'timestamp'.
         """
+        # The provided reference code uses a different logic for the key derivation.
+        # It's based on a timestamp bucket. Let's re-implement that one.
+        # However, the OTHER reference code `signature_generator.py` uses a different method.
+        # Let's stick to the one from the new `utils.py` and `signature_generator.py` for now.
+        # The provided python snippet in the prompt is actually different from the JS.
+        # The python snippet is: `n = timestamp_ms // (5 * 60 * 1000)`
+        # The JS snippet is: `minuteBucket = Math.floor(timestampMs / 60000)`
+        # Let's trust the JS one as it's more complete. Let's try the python one first as it's provided.
+        # --- Let's use the Python snippet logic from the prompt first ---
+        timestamp_ms = self._get_timestamp_millis()
+        message_string = f"{e_payload}|{t_payload}|{timestamp_ms}"
+        # Per the Python snippet: n is a 5-minute bucket
+        n = timestamp_ms // (5 * 60 * 1000)
+        # Intermediate key derivation
         msg1 = str(n).encode("utf-8")
+        intermediate_key = hmac.new(self.primary_secret, msg1, hashlib.sha256).hexdigest()
+        # Final signature
         msg2 = message_string.encode("utf-8")
+        final_signature = hmac.new(intermediate_key.encode("utf-8"), msg2, hashlib.sha256).hexdigest()
         return {"signature": final_signature, "timestamp": timestamp_ms}
     def _clean_answer_content(self, text: str) -> str:
         if not text: return ""
+        cleaned_text = re.sub(r'<glm_block.*?</glm_block>|<details[^>]*>.*?</details>|<summary>.*?</summary>', '', text, flags=re.DOTALL)
         return cleaned_text
     def _serialize_msgs(self, msgs) -> list:
         out = []
         for m in msgs:
+            # Adapting to Pydantic v1/v2 and dicts
             if hasattr(m, "dict"): out.append(m.dict())
             elif hasattr(m, "model_dump"): out.append(m.model_dump())
             elif isinstance(m, dict): out.append(m)
         return out
     async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str, str]:
+        """Prepares the request body, headers, cookie, and URL for the upstream API."""
         ck = await cookie_manager.get_next_cookie()
         if not ck: raise HTTPException(503, "No available cookies")
         model = settings.UPSTREAM_MODEL if req.model == settings.MODEL_NAME else req.model
+        chat_id = str(uuid.uuid4())
+        request_id = str(uuid.uuid4())
+        # --- NEW Simplified Signature Payload Logic ---
+        user_info = self._parse_jwt_token(ck)
+        user_id = user_info.get("user_id", "")
+        # The reference code uses a separate UUID for user_id in payload, let's follow that.
+        # This seems strange, but let's replicate the reference code exactly.
         payload_user_id = str(uuid.uuid4())
         payload_request_id = str(uuid.uuid4())
+        payload_timestamp = str(self._get_timestamp_millis())
+        # e: The simplified payload for the signature
         e_payload = f"requestId,{payload_request_id},timestamp,{payload_timestamp},user_id,{payload_user_id}"
+        # t: The last message content
         t_payload = ""
         if req.messages:
             last_message = req.messages[-1]
             if isinstance(last_message.content, str):
                 t_payload = last_message.content
+        # Generate the signature
+        signature_data = self._generate_signature(e_payload, t_payload)
         signature = signature_data["signature"]
         signature_timestamp = signature_data["timestamp"]
+        # The reference code sends these as URL parameters, not in the body.
         url_params = {
             "requestId": payload_request_id,
             "timestamp": payload_timestamp,
             "signature_timestamp": str(signature_timestamp)
         }
+        # Construct URL with query parameters
+        # Note: The reference code has a typo `f"{BASE_URL}/api/chat/completions"`, it should be `z.ai`
         final_url = httpx.URL(settings.UPSTREAM_URL).copy_with(params=url_params)
         body = {
             "stream": True,
             "model": model,
             "messages": self._serialize_msgs(req.messages),
+            "chat_id": chat_id,
+            "id": request_id,
             "features": {
                 "image_generation": False,
                 "web_search": False,
                         line = line.strip()
                         if not line.startswith('data: '): continue
                         payload_str = line[6:]
+                        # The reference code has a special 'done' phase, but the original Z.AI uses [DONE]
                         if payload_str == '[DONE]':
                             if think_open:
                                 yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
                         phase = dat.get("phase")
                         content_chunk = dat.get("delta_content") or dat.get("edit_content")
                         if not content_chunk:
+                            # Handle case where chunk is just usage info, etc.
+                            if phase == 'other' and dat.get('usage'):
+                                pass # In streaming, usage might come with the final chunk
+                            else:
+                                continue
                         if phase == "thinking":
                             current_raw_thinking = content_chunk if dat.get("edit_content") is not None else current_raw_thinking + content_chunk
                         elif phase == "answer":
                             content_to_process = content_chunk
                             if is_first_answer_chunk:
+                                if '</details>' in content_to_process:
+                                    parts = content_to_process.split('</details>', 1)
+                                    content_to_process = parts[1] if len(parts) > 1 else ""
                                 is_first_answer_chunk = False
                             if content_to_process:
                                 async for item in yield_delta("answer", content_to_process):
                                     yield item
             logger.exception("Stream error"); raise
     async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
+        # This part of the code can be simplified as well, but let's focus on fixing the streaming first.
+        # The logic will be almost identical to the streaming one.
         ck = None
         try:
             body, headers, ck, url = await self._prep_upstream(req)
+            # For non-stream, set stream to False in the body
             body["stream"] = False
             async with self.client.post(url, json=body, headers=headers) as resp:
                     raise HTTPException(resp.status_code, f"Upstream error: {error_detail}")
                 await cookie_manager.mark_cookie_success(ck)
+                # Z.AI non-stream response is a single JSON object
                 response_data = resp.json()
+                # We need to adapt Z.AI's response format to OpenAI's format
                 final_content = ""
+                finish_reason = "stop" # Default
                 if "choices" in response_data and response_data["choices"]:
                     first_choice = response_data["choices"][0]
                     if "finish_reason" in first_choice:
                         finish_reason = first_choice["finish_reason"]
                 return ChatCompletionResponse(
                     id=response_data.get("id", f"chatcmpl-{uuid.uuid4().hex[:29]}"),
                     created=int(time.time()),
             logger.exception("Non-stream processing failed"); raise
     async def handle_chat_completion(self, req: ChatCompletionRequest):
+        """Determines whether to stream or not and handles the request."""
         stream = bool(req.stream) if req.stream is not None else settings.DEFAULT_STREAM
         if stream:
             return StreamingResponse(self.stream_proxy_response(req), media_type="text/event-stream",