Update best.py
Browse files
best.py
CHANGED
|
@@ -1136,7 +1136,14 @@ def generate_text(
|
|
| 1136 |
if generated_ids.size(1) >= model.config.max_position_embeddings:
|
| 1137 |
break
|
| 1138 |
|
| 1139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1140 |
|
| 1141 |
|
| 1142 |
# ============================================================================
|
|
@@ -1146,17 +1153,21 @@ def generate_text(
|
|
| 1146 |
def _clean_response(response: str) -> str:
|
| 1147 |
import re
|
| 1148 |
|
| 1149 |
-
# Strip CoT block
|
| 1150 |
if "<cot>" in response and "</cot>" in response:
|
| 1151 |
response = response.split("</cot>", 1)[-1]
|
| 1152 |
elif "<cot>" in response:
|
| 1153 |
-
|
|
|
|
|
|
|
| 1154 |
|
| 1155 |
-
# Strip
|
|
|
|
|
|
|
|
|
|
| 1156 |
response = re.sub(r'<[^>]+>', '', response)
|
| 1157 |
|
| 1158 |
-
#
|
| 1159 |
-
# Pattern: "User:" or "Assistant:" ONLY at line start
|
| 1160 |
response = re.sub(r'(?im)^\s*(user\s*:|assistant\s*:).*', '', response)
|
| 1161 |
|
| 1162 |
# Strip meta-commentary (Indonesian-specific)
|
|
@@ -1164,30 +1175,43 @@ def _clean_response(response: str) -> str:
|
|
| 1164 |
if marker in response:
|
| 1165 |
response = response.split(marker)[0]
|
| 1166 |
|
| 1167 |
-
# Collapse
|
| 1168 |
response = re.sub(r'\n{2,}', '\n', response)
|
| 1169 |
-
# Collapse multiple spaces
|
| 1170 |
response = re.sub(r' {2,}', ' ', response)
|
| 1171 |
-
|
| 1172 |
-
|
|
|
|
| 1173 |
|
| 1174 |
return response.strip()
|
| 1175 |
|
| 1176 |
|
| 1177 |
def _extract_thinking(raw: str) -> Tuple[str, str]:
|
| 1178 |
import re
|
| 1179 |
-
|
|
|
|
|
|
|
| 1180 |
|
| 1181 |
if "</cot>" in raw:
|
|
|
|
| 1182 |
thinking_raw, answer_raw = raw.split("</cot>", 1)
|
| 1183 |
-
|
| 1184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1185 |
|
| 1186 |
-
|
| 1187 |
-
|
| 1188 |
-
|
|
|
|
| 1189 |
|
| 1190 |
-
answer = _clean_response(answer_raw)
|
| 1191 |
return thinking, answer
|
| 1192 |
|
| 1193 |
|
|
@@ -1247,9 +1271,20 @@ def interactive_chat(
|
|
| 1247 |
if show_thinking and thinking:
|
| 1248 |
print(f"[Thinking: {thinking}]")
|
| 1249 |
|
| 1250 |
-
|
| 1251 |
-
|
| 1252 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1253 |
print(final)
|
| 1254 |
|
| 1255 |
except KeyboardInterrupt:
|
|
|
|
| 1136 |
if generated_ids.size(1) >= model.config.max_position_embeddings:
|
| 1137 |
break
|
| 1138 |
|
| 1139 |
+
# Decode full sequence but strip BERT-style tokens ([SEP],[CLS],[PAD])
|
| 1140 |
+
# while keeping our custom tokens (<cot>, </cot>) for _extract_thinking.
|
| 1141 |
+
# We cannot use skip_special_tokens=True because that also removes </cot>.
|
| 1142 |
+
import re as _re
|
| 1143 |
+
raw_text = tokenizer.decode(generated_ids[0], skip_special_tokens=False)
|
| 1144 |
+
# Remove BERT pad/sep/cls but keep <cot> </cot>
|
| 1145 |
+
raw_text = _re.sub(r'\[(SEP|CLS|PAD|UNK|MASK)\]', '', raw_text)
|
| 1146 |
+
return raw_text
|
| 1147 |
|
| 1148 |
|
| 1149 |
# ============================================================================
|
|
|
|
| 1153 |
def _clean_response(response: str) -> str:
|
| 1154 |
import re
|
| 1155 |
|
| 1156 |
+
# Strip CoT block β do this first before any other processing
|
| 1157 |
if "<cot>" in response and "</cot>" in response:
|
| 1158 |
response = response.split("</cot>", 1)[-1]
|
| 1159 |
elif "<cot>" in response:
|
| 1160 |
+
# Model started CoT but never closed it β everything before <cot> is prompt leak,
|
| 1161 |
+
# everything after is the partial reasoning. Discard both, use empty.
|
| 1162 |
+
response = ""
|
| 1163 |
|
| 1164 |
+
# Strip BERT-style special tokens that appear when skip_special_tokens=False
|
| 1165 |
+
response = re.sub(r'\[(SEP|CLS|PAD|UNK|MASK)\]', '', response)
|
| 1166 |
+
|
| 1167 |
+
# Strip all remaining XML/special tags
|
| 1168 |
response = re.sub(r'<[^>]+>', '', response)
|
| 1169 |
|
| 1170 |
+
# Role markers only at line start
|
|
|
|
| 1171 |
response = re.sub(r'(?im)^\s*(user\s*:|assistant\s*:).*', '', response)
|
| 1172 |
|
| 1173 |
# Strip meta-commentary (Indonesian-specific)
|
|
|
|
| 1175 |
if marker in response:
|
| 1176 |
response = response.split(marker)[0]
|
| 1177 |
|
| 1178 |
+
# Collapse whitespace
|
| 1179 |
response = re.sub(r'\n{2,}', '\n', response)
|
|
|
|
| 1180 |
response = re.sub(r' {2,}', ' ', response)
|
| 1181 |
+
|
| 1182 |
+
# Strip leading punctuation/whitespace junk β but NOT digits or letters
|
| 1183 |
+
response = re.sub(r'^[\s:!,.\-|]+', '', response)
|
| 1184 |
|
| 1185 |
return response.strip()
|
| 1186 |
|
| 1187 |
|
| 1188 |
def _extract_thinking(raw: str) -> Tuple[str, str]:
|
| 1189 |
import re
|
| 1190 |
+
|
| 1191 |
+
# Strip BERT special tokens first (they appear with skip_special_tokens=False)
|
| 1192 |
+
raw = re.sub(r'\[(SEP|CLS|PAD|UNK|MASK)\]', '', raw)
|
| 1193 |
|
| 1194 |
if "</cot>" in raw:
|
| 1195 |
+
# Normal case: model produced full CoT block
|
| 1196 |
thinking_raw, answer_raw = raw.split("</cot>", 1)
|
| 1197 |
+
thinking = re.sub(r'<[^>]+>', '', thinking_raw).strip()
|
| 1198 |
+
thinking = re.sub(r'(?im)^\s*(user\s*:|assistant\s*:).*', '', thinking).strip()
|
| 1199 |
+
answer = _clean_response(answer_raw)
|
| 1200 |
+
|
| 1201 |
+
elif "<cot>" in raw:
|
| 1202 |
+
# Model started CoT but never finished β reasoning only, no answer yet.
|
| 1203 |
+
# Extract whatever came before <cot> as a potential direct answer,
|
| 1204 |
+
# or whatever came after as partial reasoning.
|
| 1205 |
+
parts = raw.split("<cot>", 1)
|
| 1206 |
+
thinking = _clean_response(parts[1]) if len(parts) > 1 else ""
|
| 1207 |
+
# No clean answer available β return empty, caller will fall back
|
| 1208 |
+
answer = _clean_response(parts[0]) if parts[0].strip() else ""
|
| 1209 |
|
| 1210 |
+
else:
|
| 1211 |
+
# No CoT tags at all β the whole output IS the answer (model skipped reasoning)
|
| 1212 |
+
thinking = ""
|
| 1213 |
+
answer = _clean_response(raw)
|
| 1214 |
|
|
|
|
| 1215 |
return thinking, answer
|
| 1216 |
|
| 1217 |
|
|
|
|
| 1271 |
if show_thinking and thinking:
|
| 1272 |
print(f"[Thinking: {thinking}]")
|
| 1273 |
|
| 1274 |
+
# Use answer if non-empty; fall back to cleaned full response;
|
| 1275 |
+
# last resort: use thinking itself (model reasoned but didn't emit answer).
|
| 1276 |
+
# Never throw away a valid short answer like "1", "2", "ya".
|
| 1277 |
+
if answer:
|
| 1278 |
+
final = answer
|
| 1279 |
+
else:
|
| 1280 |
+
final = _clean_response(response)
|
| 1281 |
+
if not final and thinking:
|
| 1282 |
+
# Model only produced reasoning, extract last sentence as answer
|
| 1283 |
+
sentences = [s.strip() for s in thinking.split('.') if s.strip()]
|
| 1284 |
+
final = sentences[-1] if sentences else thinking[:200]
|
| 1285 |
+
|
| 1286 |
+
if not final:
|
| 1287 |
+
final = "..."
|
| 1288 |
print(final)
|
| 1289 |
|
| 1290 |
except KeyboardInterrupt:
|