FaiziRBLX commited on
Commit
0cc633b
Β·
verified Β·
1 Parent(s): f040bfd

Update best.py

Browse files
Files changed (1) hide show
  1. best.py +55 -20
best.py CHANGED
@@ -1136,7 +1136,14 @@ def generate_text(
1136
  if generated_ids.size(1) >= model.config.max_position_embeddings:
1137
  break
1138
 
1139
- return tokenizer.decode(generated_ids[0], skip_special_tokens=False)
 
 
 
 
 
 
 
1140
 
1141
 
1142
  # ============================================================================
@@ -1146,17 +1153,21 @@ def generate_text(
1146
  def _clean_response(response: str) -> str:
1147
  import re
1148
 
1149
- # Strip CoT block
1150
  if "<cot>" in response and "</cot>" in response:
1151
  response = response.split("</cot>", 1)[-1]
1152
  elif "<cot>" in response:
1153
- response = response.split("<cot>", 1)[0]
 
 
1154
 
1155
- # Strip XML-like tags
 
 
 
1156
  response = re.sub(r'<[^>]+>', '', response)
1157
 
1158
- # FIX: match role markers only at start of line (not mid-sentence Indonesian "user")
1159
- # Pattern: "User:" or "Assistant:" ONLY at line start
1160
  response = re.sub(r'(?im)^\s*(user\s*:|assistant\s*:).*', '', response)
1161
 
1162
  # Strip meta-commentary (Indonesian-specific)
@@ -1164,30 +1175,43 @@ def _clean_response(response: str) -> str:
1164
  if marker in response:
1165
  response = response.split(marker)[0]
1166
 
1167
- # Collapse double newlines
1168
  response = re.sub(r'\n{2,}', '\n', response)
1169
- # Collapse multiple spaces
1170
  response = re.sub(r' {2,}', ' ', response)
1171
- # Strip leading junk characters
1172
- response = re.sub(r'^[\s:!,.\-|\[\]]+', '', response)
 
1173
 
1174
  return response.strip()
1175
 
1176
 
1177
  def _extract_thinking(raw: str) -> Tuple[str, str]:
1178
  import re
1179
- raw = re.sub(r'<(?!cot|/cot)[^>]+>', '', raw) # remove tags OTHER than cot
 
 
1180
 
1181
  if "</cot>" in raw:
 
1182
  thinking_raw, answer_raw = raw.split("</cot>", 1)
1183
- else:
1184
- thinking_raw, answer_raw = raw, ""
 
 
 
 
 
 
 
 
 
 
1185
 
1186
- thinking = re.sub(r'<[^>]+>', '', thinking_raw).strip()
1187
- # FIX: don't cut on "user" mid-word β€” require full word boundary + colon
1188
- thinking = re.sub(r'(?im)^\s*(user\s*:|assistant\s*:).*', '', thinking).strip()
 
1189
 
1190
- answer = _clean_response(answer_raw)
1191
  return thinking, answer
1192
 
1193
 
@@ -1247,9 +1271,20 @@ def interactive_chat(
1247
  if show_thinking and thinking:
1248
  print(f"[Thinking: {thinking}]")
1249
 
1250
- final = answer if len(answer) >= 3 else _clean_response(response)
1251
- if len(final) < 3:
1252
- final = "Maaf, saya tidak mengerti. Bisa diulang?"
 
 
 
 
 
 
 
 
 
 
 
1253
  print(final)
1254
 
1255
  except KeyboardInterrupt:
 
1136
  if generated_ids.size(1) >= model.config.max_position_embeddings:
1137
  break
1138
 
1139
+ # Decode full sequence but strip BERT-style tokens ([SEP],[CLS],[PAD])
1140
+ # while keeping our custom tokens (<cot>, </cot>) for _extract_thinking.
1141
+ # We cannot use skip_special_tokens=True because that also removes </cot>.
1142
+ import re as _re
1143
+ raw_text = tokenizer.decode(generated_ids[0], skip_special_tokens=False)
1144
+ # Remove BERT pad/sep/cls but keep <cot> </cot>
1145
+ raw_text = _re.sub(r'\[(SEP|CLS|PAD|UNK|MASK)\]', '', raw_text)
1146
+ return raw_text
1147
 
1148
 
1149
  # ============================================================================
 
1153
  def _clean_response(response: str) -> str:
1154
  import re
1155
 
1156
+ # Strip CoT block β€” do this first before any other processing
1157
  if "<cot>" in response and "</cot>" in response:
1158
  response = response.split("</cot>", 1)[-1]
1159
  elif "<cot>" in response:
1160
+ # Model started CoT but never closed it β€” everything before <cot> is prompt leak,
1161
+ # everything after is the partial reasoning. Discard both, use empty.
1162
+ response = ""
1163
 
1164
+ # Strip BERT-style special tokens that appear when skip_special_tokens=False
1165
+ response = re.sub(r'\[(SEP|CLS|PAD|UNK|MASK)\]', '', response)
1166
+
1167
+ # Strip all remaining XML/special tags
1168
  response = re.sub(r'<[^>]+>', '', response)
1169
 
1170
+ # Role markers only at line start
 
1171
  response = re.sub(r'(?im)^\s*(user\s*:|assistant\s*:).*', '', response)
1172
 
1173
  # Strip meta-commentary (Indonesian-specific)
 
1175
  if marker in response:
1176
  response = response.split(marker)[0]
1177
 
1178
+ # Collapse whitespace
1179
  response = re.sub(r'\n{2,}', '\n', response)
 
1180
  response = re.sub(r' {2,}', ' ', response)
1181
+
1182
+ # Strip leading punctuation/whitespace junk β€” but NOT digits or letters
1183
+ response = re.sub(r'^[\s:!,.\-|]+', '', response)
1184
 
1185
  return response.strip()
1186
 
1187
 
1188
  def _extract_thinking(raw: str) -> Tuple[str, str]:
1189
  import re
1190
+
1191
+ # Strip BERT special tokens first (they appear with skip_special_tokens=False)
1192
+ raw = re.sub(r'\[(SEP|CLS|PAD|UNK|MASK)\]', '', raw)
1193
 
1194
  if "</cot>" in raw:
1195
+ # Normal case: model produced full CoT block
1196
  thinking_raw, answer_raw = raw.split("</cot>", 1)
1197
+ thinking = re.sub(r'<[^>]+>', '', thinking_raw).strip()
1198
+ thinking = re.sub(r'(?im)^\s*(user\s*:|assistant\s*:).*', '', thinking).strip()
1199
+ answer = _clean_response(answer_raw)
1200
+
1201
+ elif "<cot>" in raw:
1202
+ # Model started CoT but never finished β€” reasoning only, no answer yet.
1203
+ # Extract whatever came before <cot> as a potential direct answer,
1204
+ # or whatever came after as partial reasoning.
1205
+ parts = raw.split("<cot>", 1)
1206
+ thinking = _clean_response(parts[1]) if len(parts) > 1 else ""
1207
+ # No clean answer available β€” return empty, caller will fall back
1208
+ answer = _clean_response(parts[0]) if parts[0].strip() else ""
1209
 
1210
+ else:
1211
+ # No CoT tags at all β€” the whole output IS the answer (model skipped reasoning)
1212
+ thinking = ""
1213
+ answer = _clean_response(raw)
1214
 
 
1215
  return thinking, answer
1216
 
1217
 
 
1271
  if show_thinking and thinking:
1272
  print(f"[Thinking: {thinking}]")
1273
 
1274
+ # Use answer if non-empty; fall back to cleaned full response;
1275
+ # last resort: use thinking itself (model reasoned but didn't emit answer).
1276
+ # Never throw away a valid short answer like "1", "2", "ya".
1277
+ if answer:
1278
+ final = answer
1279
+ else:
1280
+ final = _clean_response(response)
1281
+ if not final and thinking:
1282
+ # Model only produced reasoning, extract last sentence as answer
1283
+ sentences = [s.strip() for s in thinking.split('.') if s.strip()]
1284
+ final = sentences[-1] if sentences else thinking[:200]
1285
+
1286
+ if not final:
1287
+ final = "..."
1288
  print(final)
1289
 
1290
  except KeyboardInterrupt: