david167 commited on
Commit
0c60639
·
1 Parent(s): fcef7cd

DEBUG: Show complete raw model output and prompt to identify clipping source

Browse files
Files changed (1) hide show
  1. gradio_app.py +9 -15
gradio_app.py CHANGED
@@ -115,22 +115,16 @@ def chat_response(message: str, history: List[List[str]], temperature: float) ->
115
  )
116
 
117
  # Decode response
118
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
119
-
120
- # Extract just the assistant's response (everything after the last assistant header)
121
- response_start = generated_text.rfind("<|start_header_id|>assistant<|end_header_id|>")
122
- if response_start != -1:
123
- response = generated_text[response_start + len("<|start_header_id|>assistant<|end_header_id|>"):].strip()
124
- else:
125
- # Fallback: remove the original prompt
126
- response = generated_text[len(prompt):].strip()
127
-
128
- # Clean up any remaining tokens
129
- response = response.replace("<|eot_id|>", "").strip()
130
-
131
- if not response:
132
- response = "I apologize, but I couldn't generate a response. Please try rephrasing your message."
133
 
 
134
  # Add to history
135
  history.append([message, response])
136
 
 
115
  )
116
 
117
  # Decode response
118
+ # TEMPORARY: Show complete raw output to debug clipping
119
+ response = f"""=== RAW MODEL OUTPUT ===
120
+ {generated_text}
121
+ === END RAW OUTPUT ===
122
+
123
+ === PROMPT USED ===
124
+ {prompt}
125
+ === END PROMPT ==="""
 
 
 
 
 
 
 
126
 
127
+ # Add to history
128
  # Add to history
129
  history.append([message, response])
130