bluewinliang commited on
Commit
d58e410
·
verified ·
1 Parent(s): af689c4

Update proxy_handler.py

Browse files
Files changed (1) hide show
  1. proxy_handler.py +29 -12
proxy_handler.py CHANGED
@@ -28,23 +28,40 @@ class ProxyHandler:
28
 
29
  def _clean_thinking_content(self, text: str) -> str:
30
  """
31
- Aggressively cleans raw thinking content strings.
32
- Removes tool calls, HTML-like tags, and other metadata.
 
 
33
  """
34
  if not text:
35
  return ""
36
- # Remove tool call blocks
37
- cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', text, flags=re.DOTALL)
38
- # Remove all other HTML-like tags
39
- cleaned_text = re.sub(r'<[^>]+>', '', cleaned_text)
40
- # Remove specific metadata patterns
41
- # FIX: Made regex more flexible to handle additional attributes before the closing '>'
42
- cleaned_text = re.sub(r'true" duration="\d+"[^>]*>\s*Thought for \d+ seconds', '', cleaned_text)
43
- # Remove only first-level markdown quotes, preserving sub-quotes like '>>'
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
45
- # Remove "Thinking..." headers
 
46
  cleaned_text = cleaned_text.replace("Thinking…", "")
47
- # Final strip to clean up residual whitespace
 
48
  return cleaned_text.strip()
49
 
50
  def _clean_answer_content(self, text: str) -> str:
 
28
 
29
  def _clean_thinking_content(self, text: str) -> str:
30
  """
31
+ Aggressively cleans raw thinking content strings based on observed patterns
32
+ from the Z.AI API, inspired by a reference Cloudflare implementation.
33
+ Removes tool calls, specific HTML-like tags, and other metadata while preserving
34
+ the core thought process content.
35
  """
36
  if not text:
37
  return ""
38
+
39
+ cleaned_text = text
40
+
41
+ # 1. Remove entire blocks where the content is also unwanted metadata.
42
+ # e.g., <summary>Thinking...</summary> or <glm_block>...</glm_block>
43
+ cleaned_text = re.sub(r'<summary>.*?</summary>', '', cleaned_text, flags=re.DOTALL)
44
+ cleaned_text = re.sub(r'<glm_block.*?</glm_block>', '', cleaned_text, flags=re.DOTALL)
45
+
46
+ # 2. Remove specific structural tags, but keep the content between them.
47
+ # Inspired by the reference implementation's targeted replaces.
48
+ # e.g., <details> content </details> becomes just 'content'
49
+ cleaned_text = cleaned_text.replace("</thinking>", "")
50
+ cleaned_text = cleaned_text.replace("<Full>", "")
51
+ cleaned_text = cleaned_text.replace("</Full>", "")
52
+ # This regex handles <details>, <details open>, and </details>
53
+ cleaned_text = re.sub(r'</?details[^>]*>', '', cleaned_text)
54
+
55
+ # 3. Handle markdown blockquotes.
56
+ # The reference implementation removes "> " at the start of lines.
57
+ # The original Python implementation is more robust and correctly
58
+ # preserves multi-level quotes (e.g., '>>'). We'll keep it.
59
  cleaned_text = re.sub(r'^\s*>\s*(?!>)', '', cleaned_text, flags=re.MULTILINE)
60
+
61
+ # 4. Remove other known text artifacts.
62
  cleaned_text = cleaned_text.replace("Thinking…", "")
63
+
64
+ # 5. Final strip to clean up residual whitespace from removed elements.
65
  return cleaned_text.strip()
66
 
67
  def _clean_answer_content(self, text: str) -> str: