david167 commited on
Commit
0d85e38
·
1 Parent(s): 625d819

Fix response clipping: use robust assistant header detection instead of prompt length

Browse files
Files changed (1) hide show
  1. gradio_app.py +13 -1
gradio_app.py CHANGED
@@ -121,7 +121,19 @@ def chat_with_model(message, history, temperature):
121
 
122
  # Decode the generated text and remove the input prompt
123
  full_text = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
124
- response = full_text[len(prompt):].strip()
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  if not response:
127
  response = "I couldn't generate a response. Please try a different prompt."
 
121
 
122
  # Decode the generated text and remove the input prompt
123
  full_text = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
124
+ # Use a more robust method to extract the response
125
+ # Look for the assistant header end and extract everything after it
126
+ assistant_start = "<|start_header_id|>assistant<|end_header_id|>"
127
+ if assistant_start in full_text:
128
+ # Find the position after the assistant header
129
+ response_start = full_text.find(assistant_start) + len(assistant_start)
130
+ response = full_text[response_start:].strip()
131
+ else:
132
+ # Fallback: try to remove the original prompt
133
+ try:
134
+ response = full_text[len(prompt):].strip()
135
+ except:
136
+ response = full_text.strip()
137
 
138
  if not response:
139
  response = "I couldn't generate a response. Please try a different prompt."