Spaces:

david167
/

question-generation-api

Sleeping

david167 commited on Aug 6, 2025

Commit

0c60639

1 Parent(s): fcef7cd

DEBUG: Show complete raw model output and prompt to identify clipping source

Files changed (1) hide show

gradio_app.py CHANGED Viewed

@@ -115,22 +115,16 @@ def chat_response(message: str, history: List[List[str]], temperature: float) ->
             )
         # Decode response
-        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract just the assistant's response (everything after the last assistant header)
-        response_start = generated_text.rfind("<|start_header_id|>assistant<|end_header_id|>")
-        if response_start != -1:
-            response = generated_text[response_start + len("<|start_header_id|>assistant<|end_header_id|>"):].strip()
-        else:
-            # Fallback: remove the original prompt
-            response = generated_text[len(prompt):].strip()
-        # Clean up any remaining tokens
-        response = response.replace("<|eot_id|>", "").strip()
-        if not response:
-            response = "I apologize, but I couldn't generate a response. Please try rephrasing your message."
         # Add to history
         history.append([message, response])

             )
         # Decode response
+        # TEMPORARY: Show complete raw output to debug clipping
+        response = f"""=== RAW MODEL OUTPUT ===
+{generated_text}
+=== END RAW OUTPUT ===
+=== PROMPT USED ===
+{prompt}
+=== END PROMPT ==="""
+        # Add to history
         # Add to history
         history.append([message, response])