bluewinliang commited on
Commit
8d3ffb0
·
verified ·
1 Parent(s): 6deff8b

Upload proxy_handler.py

Browse files
Files changed (1) hide show
  1. proxy_handler.py +45 -21
proxy_handler.py CHANGED
@@ -29,24 +29,37 @@ class ProxyHandler:
29
 
30
  def _clean_thinking_content(self, text: str) -> str:
31
  """
32
- A robust cleaner for the raw thinking content string.
 
33
  """
34
  if not text:
35
  return ""
36
- # 1. Remove tool call blocks first
37
  cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
38
- # 2. Remove all HTML-like tags. This gets rid of <details>, <summary>, etc.
39
  cleaned_text = re.sub(r'<[^>]+>', '', cleaned_text)
40
- # 3. Remove specific known metadata patterns that are not standard HTML.
41
  cleaned_text = re.sub(r'true" duration="\d+">\s*Thought for \d+ seconds', '', cleaned_text)
42
- # 4. Remove leading markdown quote symbols
43
  cleaned_text = re.sub(r'^\s*>\s*', '', cleaned_text, flags=re.MULTILINE)
44
- # 5. Remove any "Thinking..." headers.
45
  cleaned_text = cleaned_text.replace("Thinking…", "")
46
- # 6. Final strip to clean up any residual whitespace.
 
 
 
 
 
 
 
 
 
 
 
47
  return cleaned_text.strip()
48
 
49
  def _serialize_msgs(self, msgs) -> list:
 
50
  out = []
51
  for m in msgs:
52
  if hasattr(m, "dict"): out.append(m.dict())
@@ -56,6 +69,7 @@ class ProxyHandler:
56
  return out
57
 
58
  async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str]:
 
59
  ck = await cookie_manager.get_next_cookie()
60
  if not ck: raise HTTPException(503, "No available cookies")
61
  model = settings.UPSTREAM_MODEL if req.model == settings.MODEL_NAME else req.model
@@ -73,24 +87,28 @@ class ProxyHandler:
73
  nonlocal think_open
74
  if not text: return
75
 
 
 
 
 
 
 
 
 
 
76
  if content_type == "thinking" and settings.SHOW_THINK_TAGS:
77
  if not think_open:
78
  yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]})}\n\n"
79
  think_open = True
80
 
81
- # --- START OF FINAL FIX ---
82
- # Use the unified cleaning function for streaming content as well.
83
- # This ensures consistent output with non-streaming mode.
84
- cleaned_text = self._clean_thinking_content(text)
85
- # --- END OF FINAL FIX ---
86
-
87
- if cleaned_text:
88
- yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': cleaned_text}, 'finish_reason': None}]})}\n\n"
89
  elif content_type == "answer":
90
  if think_open:
91
  yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
92
  think_open = False
93
- yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': text}, 'finish_reason': None}]})}\n\n"
 
94
 
95
  async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
96
  if resp.status_code != 200:
@@ -103,7 +121,8 @@ class ProxyHandler:
103
  async for raw in resp.aiter_text():
104
  for line in raw.strip().split('\n'):
105
  line = line.strip()
106
- if not line or not line.startswith('data: '): continue
 
107
  payload_str = line[6:]
108
  if payload_str == '[DONE]':
109
  if think_open: yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
@@ -143,7 +162,8 @@ class ProxyHandler:
143
  async for raw in resp.aiter_text():
144
  for line in raw.strip().split('\n'):
145
  line = line.strip()
146
- if not line or not line.startswith('data: '): continue
 
147
  payload_str = line[6:]
148
  if payload_str == '[DONE]': break
149
  try:
@@ -170,12 +190,15 @@ class ProxyHandler:
170
  else: continue
171
  break
172
 
173
- final_ans_text = ''.join(raw_answer_parts)
174
- final_content = final_ans_text
 
 
175
  if settings.SHOW_THINK_TAGS and raw_thinking_parts:
 
176
  cleaned_think_text = self._clean_thinking_content(''.join(raw_thinking_parts))
177
  if cleaned_think_text:
178
- final_content = f"<think>{cleaned_think_text}</think>{final_ans_text}"
179
 
180
  return ChatCompletionResponse(
181
  id=f"chatcmpl-{uuid.uuid4().hex[:29]}", created=int(time.time()), model=req.model,
@@ -185,6 +208,7 @@ class ProxyHandler:
185
  logger.exception("Non-stream processing failed"); raise
186
 
187
  async def handle_chat_completion(self, req: ChatCompletionRequest):
 
188
  stream = bool(req.stream) if req.stream is not None else settings.DEFAULT_STREAM
189
  if stream:
190
  return StreamingResponse(self.stream_proxy_response(req), media_type="text/event-stream",
 
29
 
30
  def _clean_thinking_content(self, text: str) -> str:
31
  """
32
+ Aggressively cleans raw thinking content strings.
33
+ Removes tool calls, HTML-like tags, and other metadata.
34
  """
35
  if not text:
36
  return ""
37
+ # Remove tool call blocks first
38
  cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
39
+ # Remove all other HTML-like tags
40
  cleaned_text = re.sub(r'<[^>]+>', '', cleaned_text)
41
+ # Remove specific metadata patterns
42
  cleaned_text = re.sub(r'true" duration="\d+">\s*Thought for \d+ seconds', '', cleaned_text)
43
+ # Remove leading markdown quote symbols
44
  cleaned_text = re.sub(r'^\s*>\s*', '', cleaned_text, flags=re.MULTILINE)
45
+ # Remove "Thinking..." headers
46
  cleaned_text = cleaned_text.replace("Thinking…", "")
47
+ # Final strip to clean up residual whitespace
48
+ return cleaned_text.strip()
49
+
50
+ def _clean_answer_content(self, text: str) -> str:
51
+ """
52
+ Cleans only <glm_block> tags from the final answer content,
53
+ preserving other potential markdown or HTML formatting.
54
+ """
55
+ if not text:
56
+ return ""
57
+ # Remove only tool call blocks
58
+ cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
59
  return cleaned_text.strip()
60
 
61
  def _serialize_msgs(self, msgs) -> list:
62
+ """Converts message objects to a list of dictionaries."""
63
  out = []
64
  for m in msgs:
65
  if hasattr(m, "dict"): out.append(m.dict())
 
69
  return out
70
 
71
  async def _prep_upstream(self, req: ChatCompletionRequest) -> Tuple[Dict[str, Any], Dict[str, str], str]:
72
+ """Prepares the request body, headers, and cookie for the upstream API."""
73
  ck = await cookie_manager.get_next_cookie()
74
  if not ck: raise HTTPException(503, "No available cookies")
75
  model = settings.UPSTREAM_MODEL if req.model == settings.MODEL_NAME else req.model
 
87
  nonlocal think_open
88
  if not text: return
89
 
90
+ # Apply cleaning based on content type
91
+ cleaned_text = ""
92
+ if content_type == "thinking":
93
+ cleaned_text = self._clean_thinking_content(text)
94
+ elif content_type == "answer":
95
+ cleaned_text = self._clean_answer_content(text)
96
+
97
+ if not cleaned_text: return
98
+
99
  if content_type == "thinking" and settings.SHOW_THINK_TAGS:
100
  if not think_open:
101
  yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '<think>'}, 'finish_reason': None}]})}\n\n"
102
  think_open = True
103
 
104
+ yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': cleaned_text}, 'finish_reason': None}]})}\n\n"
105
+
 
 
 
 
 
 
106
  elif content_type == "answer":
107
  if think_open:
108
  yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
109
  think_open = False
110
+
111
+ yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': cleaned_text}, 'finish_reason': None}]})}\n\n"
112
 
113
  async with self.client.stream("POST", settings.UPSTREAM_URL, json=body, headers=headers) as resp:
114
  if resp.status_code != 200:
 
121
  async for raw in resp.aiter_text():
122
  for line in raw.strip().split('\n'):
123
  line = line.strip()
124
+ if not line.startswith('data: '): continue
125
+
126
  payload_str = line[6:]
127
  if payload_str == '[DONE]':
128
  if think_open: yield f"data: {json.dumps({'id': comp_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': req.model, 'choices': [{'index': 0, 'delta': {'content': '</think>'}, 'finish_reason': None}]})}\n\n"
 
162
  async for raw in resp.aiter_text():
163
  for line in raw.strip().split('\n'):
164
  line = line.strip()
165
+ if not line.startswith('data: '): continue
166
+
167
  payload_str = line[6:]
168
  if payload_str == '[DONE]': break
169
  try:
 
190
  else: continue
191
  break
192
 
193
+ # Clean the final answer text, removing only <glm_block> tags.
194
+ cleaned_ans_text = self._clean_answer_content(''.join(raw_answer_parts))
195
+ final_content = cleaned_ans_text
196
+
197
  if settings.SHOW_THINK_TAGS and raw_thinking_parts:
198
+ # Aggressively clean the thinking part.
199
  cleaned_think_text = self._clean_thinking_content(''.join(raw_thinking_parts))
200
  if cleaned_think_text:
201
+ final_content = f"<think>{cleaned_think_text}</think>{cleaned_ans_text}"
202
 
203
  return ChatCompletionResponse(
204
  id=f"chatcmpl-{uuid.uuid4().hex[:29]}", created=int(time.time()), model=req.model,
 
208
  logger.exception("Non-stream processing failed"); raise
209
 
210
  async def handle_chat_completion(self, req: ChatCompletionRequest):
211
+ """Determines whether to stream or not and handles the request."""
212
  stream = bool(req.stream) if req.stream is not None else settings.DEFAULT_STREAM
213
  if stream:
214
  return StreamingResponse(self.stream_proxy_response(req), media_type="text/event-stream",