Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -84,13 +84,23 @@ class ChatBot:
|
|
| 84 |
eos_token_id=[self.tokenizer.eos_token_id] + stop_token_ids,
|
| 85 |
)
|
| 86 |
|
| 87 |
-
# Decode and clean response
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
)
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
|
| 96 |
return history + [(message, response)]
|
|
|
|
| 84 |
eos_token_id=[self.tokenizer.eos_token_id] + stop_token_ids,
|
| 85 |
)
|
| 86 |
|
| 87 |
+
# Decode and clean response, with multiple debugs
|
| 88 |
+
# Inspect token IDs
|
| 89 |
+
gen_ids = outputs.sequences[0][len(inputs.input_ids[0]):]
|
| 90 |
+
print("\n--- GENERATED TOKEN IDS ---\n", gen_ids.tolist())
|
| 91 |
+
|
| 92 |
+
# Decode without skipping specials
|
| 93 |
+
raw_output = self.tokenizer.decode(gen_ids, skip_special_tokens=False)
|
| 94 |
+
print("\n--- RAW DECODED OUTPUT ---\n", repr(raw_output))
|
| 95 |
+
|
| 96 |
+
# Show first generated token decoded individually
|
| 97 |
+
if len(gen_ids) > 0:
|
| 98 |
+
first_token = self.tokenizer.decode([gen_ids[0]])
|
| 99 |
+
print(f"\n--- FIRST TOKEN --- '{first_token}' ---")
|
| 100 |
+
|
| 101 |
+
# Clean as usual
|
| 102 |
+
response = THINK_TAG_PATTERN.sub("", raw_output).strip()
|
| 103 |
+
print("\n--- CLEANED RESPONSE ---\n", repr(response))
|
| 104 |
|
| 105 |
|
| 106 |
return history + [(message, response)]
|