Spaces:

FaiziRBLX
/

NousAPI

Sleeping

App Files Files Community

FaiziRBLX commited on 23 days ago

Commit

0cc633b

verified ·

1 Parent(s): f040bfd

Update best.py

Browse files

Files changed (1) hide show

best.py +55 -20

best.py CHANGED Viewed

@@ -1136,7 +1136,14 @@ def generate_text(
             if generated_ids.size(1) >= model.config.max_position_embeddings:
                 break
-    return tokenizer.decode(generated_ids[0], skip_special_tokens=False)
 # ============================================================================
@@ -1146,17 +1153,21 @@ def generate_text(
 def _clean_response(response: str) -> str:
     import re
-    # Strip CoT block
     if "<cot>" in response and "</cot>" in response:
         response = response.split("</cot>", 1)[-1]
     elif "<cot>" in response:
-        response = response.split("<cot>", 1)[0]
-    # Strip XML-like tags
     response = re.sub(r'<[^>]+>', '', response)
-    # FIX: match role markers only at start of line (not mid-sentence Indonesian "user")
-    # Pattern: "User:" or "Assistant:" ONLY at line start
     response = re.sub(r'(?im)^\s*(user\s*:|assistant\s*:).*', '', response)
     # Strip meta-commentary (Indonesian-specific)
@@ -1164,30 +1175,43 @@ def _clean_response(response: str) -> str:
         if marker in response:
             response = response.split(marker)[0]
-    # Collapse double newlines
     response = re.sub(r'\n{2,}', '\n', response)
-    # Collapse multiple spaces
     response = re.sub(r' {2,}', ' ', response)
-    # Strip leading junk characters
-    response = re.sub(r'^[\s:!,.\-|\[\]]+', '', response)
     return response.strip()
 def _extract_thinking(raw: str) -> Tuple[str, str]:
     import re
-    raw = re.sub(r'<(?!cot|/cot)[^>]+>', '', raw)  # remove tags OTHER than cot
     if "</cot>" in raw:
         thinking_raw, answer_raw = raw.split("</cot>", 1)
-    else:
-        thinking_raw, answer_raw = raw, ""
-    thinking = re.sub(r'<[^>]+>', '', thinking_raw).strip()
-    # FIX: don't cut on "user" mid-word — require full word boundary + colon
-    thinking = re.sub(r'(?im)^\s*(user\s*:|assistant\s*:).*', '', thinking).strip()
-    answer = _clean_response(answer_raw)
     return thinking, answer
@@ -1247,9 +1271,20 @@ def interactive_chat(
             if show_thinking and thinking:
                 print(f"[Thinking: {thinking}]")
-            final = answer if len(answer) >= 3 else _clean_response(response)
-            if len(final) < 3:
-                final = "Maaf, saya tidak mengerti. Bisa diulang?"
             print(final)
         except KeyboardInterrupt:

             if generated_ids.size(1) >= model.config.max_position_embeddings:
                 break
+    # Decode full sequence but strip BERT-style tokens ([SEP],[CLS],[PAD])
+    # while keeping our custom tokens (<cot>, </cot>) for _extract_thinking.
+    # We cannot use skip_special_tokens=True because that also removes </cot>.
+    import re as _re
+    raw_text = tokenizer.decode(generated_ids[0], skip_special_tokens=False)
+    # Remove BERT pad/sep/cls but keep <cot> </cot>
+    raw_text = _re.sub(r'\[(SEP|CLS|PAD|UNK|MASK)\]', '', raw_text)
+    return raw_text
 # ============================================================================
 def _clean_response(response: str) -> str:
     import re
+    # Strip CoT block — do this first before any other processing
     if "<cot>" in response and "</cot>" in response:
         response = response.split("</cot>", 1)[-1]
     elif "<cot>" in response:
+        # Model started CoT but never closed it — everything before <cot> is prompt leak,
+        # everything after is the partial reasoning. Discard both, use empty.
+        response = ""
+    # Strip BERT-style special tokens that appear when skip_special_tokens=False
+    response = re.sub(r'\[(SEP|CLS|PAD|UNK|MASK)\]', '', response)
+    # Strip all remaining XML/special tags
     response = re.sub(r'<[^>]+>', '', response)
+    # Role markers only at line start
     response = re.sub(r'(?im)^\s*(user\s*:|assistant\s*:).*', '', response)
     # Strip meta-commentary (Indonesian-specific)
         if marker in response:
             response = response.split(marker)[0]
+    # Collapse whitespace
     response = re.sub(r'\n{2,}', '\n', response)
     response = re.sub(r' {2,}', ' ', response)
+    # Strip leading punctuation/whitespace junk — but NOT digits or letters
+    response = re.sub(r'^[\s:!,.\-|]+', '', response)
     return response.strip()
 def _extract_thinking(raw: str) -> Tuple[str, str]:
     import re
+    # Strip BERT special tokens first (they appear with skip_special_tokens=False)
+    raw = re.sub(r'\[(SEP|CLS|PAD|UNK|MASK)\]', '', raw)
     if "</cot>" in raw:
+        # Normal case: model produced full CoT block
         thinking_raw, answer_raw = raw.split("</cot>", 1)
+        thinking = re.sub(r'<[^>]+>', '', thinking_raw).strip()
+        thinking = re.sub(r'(?im)^\s*(user\s*:|assistant\s*:).*', '', thinking).strip()
+        answer   = _clean_response(answer_raw)
+    elif "<cot>" in raw:
+        # Model started CoT but never finished — reasoning only, no answer yet.
+        # Extract whatever came before <cot> as a potential direct answer,
+        # or whatever came after as partial reasoning.
+        parts    = raw.split("<cot>", 1)
+        thinking = _clean_response(parts[1]) if len(parts) > 1 else ""
+        # No clean answer available — return empty, caller will fall back
+        answer   = _clean_response(parts[0]) if parts[0].strip() else ""
+    else:
+        # No CoT tags at all — the whole output IS the answer (model skipped reasoning)
+        thinking = ""
+        answer   = _clean_response(raw)
     return thinking, answer
             if show_thinking and thinking:
                 print(f"[Thinking: {thinking}]")
+            # Use answer if non-empty; fall back to cleaned full response;
+            # last resort: use thinking itself (model reasoned but didn't emit answer).
+            # Never throw away a valid short answer like "1", "2", "ya".
+            if answer:
+                final = answer
+            else:
+                final = _clean_response(response)
+                if not final and thinking:
+                    # Model only produced reasoning, extract last sentence as answer
+                    sentences = [s.strip() for s in thinking.split('.') if s.strip()]
+                    final = sentences[-1] if sentences else thinking[:200]
+            if not final:
+                final = "..."
             print(final)
         except KeyboardInterrupt: