Spaces:
Sleeping
Sleeping
Fix response clipping: use robust assistant header detection instead of prompt length
Browse files- gradio_app.py +13 -1
gradio_app.py
CHANGED
|
@@ -121,7 +121,19 @@ def chat_with_model(message, history, temperature):
|
|
| 121 |
|
| 122 |
# Decode the generated text and remove the input prompt
|
| 123 |
full_text = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
|
| 126 |
if not response:
|
| 127 |
response = "I couldn't generate a response. Please try a different prompt."
|
|
|
|
| 121 |
|
| 122 |
# Decode the generated text and remove the input prompt
|
| 123 |
full_text = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 124 |
+
# Use a more robust method to extract the response
|
| 125 |
+
# Look for the assistant header end and extract everything after it
|
| 126 |
+
assistant_start = "<|start_header_id|>assistant<|end_header_id|>"
|
| 127 |
+
if assistant_start in full_text:
|
| 128 |
+
# Find the position after the assistant header
|
| 129 |
+
response_start = full_text.find(assistant_start) + len(assistant_start)
|
| 130 |
+
response = full_text[response_start:].strip()
|
| 131 |
+
else:
|
| 132 |
+
# Fallback: try to remove the original prompt
|
| 133 |
+
try:
|
| 134 |
+
response = full_text[len(prompt):].strip()
|
| 135 |
+
except:
|
| 136 |
+
response = full_text.strip()
|
| 137 |
|
| 138 |
if not response:
|
| 139 |
response = "I couldn't generate a response. Please try a different prompt."
|