Spaces:
Paused
Paused
Upload proxy_handler.py
Browse files- proxy_handler.py +16 -13
proxy_handler.py
CHANGED
|
@@ -27,7 +27,7 @@ class ProxyHandler:
|
|
| 27 |
if not self.client.is_closed:
|
| 28 |
await self.client.aclose()
|
| 29 |
|
| 30 |
-
# --- Text utilities
|
| 31 |
def _clean_thinking(self, s: str) -> str:
|
| 32 |
if not s: return ""
|
| 33 |
s = re.sub(r'<details[^>]*>.*?</details>', '', s, flags=re.DOTALL)
|
|
@@ -37,21 +37,13 @@ class ProxyHandler:
|
|
| 37 |
return s
|
| 38 |
|
| 39 |
def _clean_answer(self, s: str, from_edit_content: bool = False) -> str:
|
| 40 |
-
"""
|
| 41 |
-
Cleans the answer string.
|
| 42 |
-
If from_edit_content is True, it extracts only the content after the last </details> tag.
|
| 43 |
-
"""
|
| 44 |
if not s: return ""
|
| 45 |
-
|
| 46 |
if from_edit_content:
|
| 47 |
last_details_pos = s.rfind('</details>')
|
| 48 |
if last_details_pos != -1:
|
| 49 |
s = s[last_details_pos + len('</details>'):]
|
| 50 |
-
|
| 51 |
s = re.sub(r"<details[^>]*>.*?</details>", "", s, flags=re.DOTALL)
|
| 52 |
-
|
| 53 |
-
# FIX: Use a more specific lstrip to only remove leading newlines,
|
| 54 |
-
# but preserve leading spaces which are important for markdown.
|
| 55 |
return s.lstrip('\n\r')
|
| 56 |
|
| 57 |
# ... Other methods like _serialize_msgs, _prep_upstream remain the same ...
|
|
@@ -71,12 +63,15 @@ class ProxyHandler:
|
|
| 71 |
headers = { "Content-Type": "application/json", "Authorization": f"Bearer {ck}", "User-Agent": ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"), "Accept": "application/json, text/event-stream", "Accept-Language": "zh-CN", "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"macOS"', "x-fe-version": "prod-fe-1.0.53", "Origin": "https://chat.z.ai", "Referer": "https://chat.z.ai/",}
|
| 72 |
return body, headers, ck
|
| 73 |
|
| 74 |
-
# ---------- stream ----------
|
| 75 |
async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
|
| 76 |
ck = None
|
| 77 |
try:
|
| 78 |
body, headers, ck = await self._prep_upstream(req)
|
| 79 |
comp_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"; think_open = False; phase_cur = None
|
|
|
|
|
|
|
|
|
|
| 80 |
async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
|
| 81 |
if resp.status_code != 200:
|
| 82 |
await cookie_manager.mark_cookie_failed(ck); err_body = await resp.aread()
|
|
@@ -117,18 +112,27 @@ class ProxyHandler:
|
|
| 117 |
if not think_open:
|
| 118 |
open_payload = {'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]}
|
| 119 |
yield f"data: {json.dumps(open_payload)}\n\n"
|
|
|
|
| 120 |
think_open = True
|
| 121 |
text_to_yield = self._clean_thinking(content)
|
| 122 |
elif current_content_phase == "answer":
|
| 123 |
text_to_yield = self._clean_answer(content, from_edit_content=is_edit)
|
| 124 |
|
| 125 |
-
|
|
|
|
|
|
|
|
|
|
| 126 |
content_payload = {"id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": req.model, "choices": [{"index": 0, "delta": {"content": text_to_yield}, "finish_reason": None}],}
|
| 127 |
yield f"data: {json.dumps(content_payload)}\n\n"
|
|
|
|
|
|
|
|
|
|
| 128 |
except Exception:
|
| 129 |
logger.exception("Stream error"); raise
|
| 130 |
|
| 131 |
# ---------- non-stream ----------
|
|
|
|
|
|
|
| 132 |
async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
|
| 133 |
ck = None
|
| 134 |
try:
|
|
@@ -171,7 +175,6 @@ class ProxyHandler:
|
|
| 171 |
if settings.SHOW_THINK_TAGS and think_buf:
|
| 172 |
think_text = ''.join(think_buf).strip()
|
| 173 |
if think_text:
|
| 174 |
-
# The newline is handled by the answer itself now, so we can just concatenate.
|
| 175 |
final_content = f"<think>{think_text}</think>{ans_text}"
|
| 176 |
|
| 177 |
return ChatCompletionResponse(
|
|
|
|
| 27 |
if not self.client.is_closed:
|
| 28 |
await self.client.aclose()
|
| 29 |
|
| 30 |
+
# --- Text utilities remain the same from the last version ---
|
| 31 |
def _clean_thinking(self, s: str) -> str:
|
| 32 |
if not s: return ""
|
| 33 |
s = re.sub(r'<details[^>]*>.*?</details>', '', s, flags=re.DOTALL)
|
|
|
|
| 37 |
return s
|
| 38 |
|
| 39 |
def _clean_answer(self, s: str, from_edit_content: bool = False) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
if not s: return ""
|
|
|
|
| 41 |
if from_edit_content:
|
| 42 |
last_details_pos = s.rfind('</details>')
|
| 43 |
if last_details_pos != -1:
|
| 44 |
s = s[last_details_pos + len('</details>'):]
|
|
|
|
| 45 |
s = re.sub(r"<details[^>]*>.*?</details>", "", s, flags=re.DOTALL)
|
| 46 |
+
# We keep the lstrip logic as it is, but we will handle yielding empty strings differently.
|
|
|
|
|
|
|
| 47 |
return s.lstrip('\n\r')
|
| 48 |
|
| 49 |
# ... Other methods like _serialize_msgs, _prep_upstream remain the same ...
|
|
|
|
| 63 |
headers = { "Content-Type": "application/json", "Authorization": f"Bearer {ck}", "User-Agent": ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"), "Accept": "application/json, text/event-stream", "Accept-Language": "zh-CN", "sec-ch-ua": '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": '"macOS"', "x-fe-version": "prod-fe-1.0.53", "Origin": "https://chat.z.ai", "Referer": "https://chat.z.ai/",}
|
| 64 |
return body, headers, ck
|
| 65 |
|
| 66 |
+
# ---------- stream (REVISED YIELD LOGIC) ----------
|
| 67 |
async def stream_proxy_response(self, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
|
| 68 |
ck = None
|
| 69 |
try:
|
| 70 |
body, headers, ck = await self._prep_upstream(req)
|
| 71 |
comp_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"; think_open = False; phase_cur = None
|
| 72 |
+
# FIX: New flag to track if we've started sending content.
|
| 73 |
+
has_yielded_content = False
|
| 74 |
+
|
| 75 |
async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
|
| 76 |
if resp.status_code != 200:
|
| 77 |
await cookie_manager.mark_cookie_failed(ck); err_body = await resp.aread()
|
|
|
|
| 112 |
if not think_open:
|
| 113 |
open_payload = {'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]}
|
| 114 |
yield f"data: {json.dumps(open_payload)}\n\n"
|
| 115 |
+
has_yielded_content = True
|
| 116 |
think_open = True
|
| 117 |
text_to_yield = self._clean_thinking(content)
|
| 118 |
elif current_content_phase == "answer":
|
| 119 |
text_to_yield = self._clean_answer(content, from_edit_content=is_edit)
|
| 120 |
|
| 121 |
+
# FIX: Revised yield logic.
|
| 122 |
+
# We yield if there's content, OR if we've already started yielding and the content is an empty string
|
| 123 |
+
# (which could be a meaningful newline that was stripped).
|
| 124 |
+
if text_to_yield or (has_yielded_content and content is not None and text_to_yield == ""):
|
| 125 |
content_payload = {"id": comp_id, "object": "chat.completion.chunk", "created": int(time.time()), "model": req.model, "choices": [{"index": 0, "delta": {"content": text_to_yield}, "finish_reason": None}],}
|
| 126 |
yield f"data: {json.dumps(content_payload)}\n\n"
|
| 127 |
+
if text_to_yield: # Only set the flag if we actually sent non-empty content.
|
| 128 |
+
has_yielded_content = True
|
| 129 |
+
|
| 130 |
except Exception:
|
| 131 |
logger.exception("Stream error"); raise
|
| 132 |
|
| 133 |
# ---------- non-stream ----------
|
| 134 |
+
# The non-stream version is less susceptible to this issue because it processes all content at once.
|
| 135 |
+
# The existing logic should be sufficient. No changes needed here.
|
| 136 |
async def non_stream_proxy_response(self, req: ChatCompletionRequest) -> ChatCompletionResponse:
|
| 137 |
ck = None
|
| 138 |
try:
|
|
|
|
| 175 |
if settings.SHOW_THINK_TAGS and think_buf:
|
| 176 |
think_text = ''.join(think_buf).strip()
|
| 177 |
if think_text:
|
|
|
|
| 178 |
final_content = f"<think>{think_text}</think>{ans_text}"
|
| 179 |
|
| 180 |
return ChatCompletionResponse(
|