Spaces:
Sleeping
Sleeping
Fix response extraction - prevent truncation at beginning of JSON responses
Browse files- gradio_app.py +26 -2
gradio_app.py
CHANGED
|
@@ -261,12 +261,36 @@ def chat_with_model(message, history, temperature, json_mode=False, json_templat
|
|
| 261 |
# Decode response
|
| 262 |
generated_text = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 263 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
# Extract the response part (remove the prompt)
|
| 265 |
if "<|start_header_id|>assistant<|end_header_id|>" in generated_text:
|
| 266 |
response = generated_text.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
|
| 267 |
else:
|
| 268 |
-
#
|
| 269 |
-
response = generated_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
|
| 271 |
# Log response length for debugging
|
| 272 |
logger.info(f"Generated response length: {len(response)} characters")
|
|
|
|
| 261 |
# Decode response
|
| 262 |
generated_text = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 263 |
|
| 264 |
+
# Debug logging
|
| 265 |
+
logger.info(f"Full generated text length: {len(generated_text)} characters")
|
| 266 |
+
logger.info(f"Generated text preview: {generated_text[:300]}...")
|
| 267 |
+
logger.info(f"Generated text ending: ...{generated_text[-300:]}")
|
| 268 |
+
|
| 269 |
# Extract the response part (remove the prompt)
|
| 270 |
if "<|start_header_id|>assistant<|end_header_id|>" in generated_text:
|
| 271 |
response = generated_text.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
|
| 272 |
else:
|
| 273 |
+
# Improved fallback: look for common JSON starting patterns
|
| 274 |
+
response = generated_text
|
| 275 |
+
|
| 276 |
+
# Try to find where the actual response starts
|
| 277 |
+
json_start_patterns = ['[', '{', '"']
|
| 278 |
+
for pattern in json_start_patterns:
|
| 279 |
+
if pattern in generated_text:
|
| 280 |
+
# Find the first occurrence that looks like the start of JSON
|
| 281 |
+
start_idx = generated_text.find(pattern)
|
| 282 |
+
if start_idx > len(prompt) // 2: # Make sure it's after the prompt
|
| 283 |
+
response = generated_text[start_idx:].strip()
|
| 284 |
+
break
|
| 285 |
+
|
| 286 |
+
# Ultimate fallback: use the last portion of the text
|
| 287 |
+
if response == generated_text:
|
| 288 |
+
# Split by common delimiters and take the largest chunk
|
| 289 |
+
chunks = generated_text.split('\n\n')
|
| 290 |
+
if len(chunks) > 1:
|
| 291 |
+
response = chunks[-1].strip()
|
| 292 |
+
else:
|
| 293 |
+
response = generated_text[len(prompt)//2:].strip()
|
| 294 |
|
| 295 |
# Log response length for debugging
|
| 296 |
logger.info(f"Generated response length: {len(response)} characters")
|