Spaces:
Sleeping
Sleeping
ULTRA CONSERVATIVE EXTRACTION: Find JSON array boundaries properly, extensive logging, no aggressive cutting
Browse files- gradio_app.py +41 -24
gradio_app.py
CHANGED
|
@@ -137,37 +137,54 @@ def generate_response(prompt, temperature=0.8):
|
|
| 137 |
# Decode
|
| 138 |
full = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 139 |
|
| 140 |
-
#
|
| 141 |
logger.info(f"Full generated text length: {len(full)} chars")
|
| 142 |
|
| 143 |
-
#
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
else:
|
| 151 |
-
|
|
|
|
| 152 |
else:
|
| 153 |
-
#
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
| 158 |
else:
|
| 159 |
-
#
|
| 160 |
response = full.strip()
|
| 161 |
-
logger.info(
|
| 162 |
-
|
| 163 |
-
# For CoT requests, the model should return the JSON directly
|
| 164 |
-
# Don't try to extract JSON - trust the model's output
|
| 165 |
-
if is_cot:
|
| 166 |
-
logger.info("CoT request - using response as-is (trusting model output)")
|
| 167 |
|
| 168 |
-
logger.info(f"
|
| 169 |
-
logger.info(f"
|
| 170 |
-
logger.info(f"
|
| 171 |
|
| 172 |
logger.info(f"Response generated: {len(response)} chars")
|
| 173 |
return response.strip()
|
|
|
|
| 137 |
# Decode
|
| 138 |
full = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 139 |
|
| 140 |
+
# ULTRA CONSERVATIVE EXTRACTION - STOP ALL TRUNCATION!
|
| 141 |
logger.info(f"Full generated text length: {len(full)} chars")
|
| 142 |
|
| 143 |
+
# For debugging - log the full text boundaries
|
| 144 |
+
logger.info(f"Full text starts: {full[:200]}...")
|
| 145 |
+
logger.info(f"Full text ends: ...{full[-200:]}")
|
| 146 |
+
|
| 147 |
+
# Find the JSON array in the response - look for the actual content
|
| 148 |
+
# The model should generate the JSON array directly
|
| 149 |
+
if is_cot and '[' in full and ']' in full:
|
| 150 |
+
# Find the JSON array boundaries
|
| 151 |
+
start_idx = full.find('[')
|
| 152 |
+
end_idx = full.rfind(']')
|
| 153 |
+
|
| 154 |
+
if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
|
| 155 |
+
# Extract the complete JSON array
|
| 156 |
+
json_content = full[start_idx:end_idx+1]
|
| 157 |
+
logger.info(f"Found JSON array: {len(json_content)} chars")
|
| 158 |
+
logger.info(f"JSON starts: {json_content[:100]}...")
|
| 159 |
+
logger.info(f"JSON ends: ...{json_content[-100:]}")
|
| 160 |
+
|
| 161 |
+
# Validate it looks like proper JSON
|
| 162 |
+
if '"user"' in json_content and '"assistant"' in json_content:
|
| 163 |
+
response = json_content
|
| 164 |
+
logger.info("✅ Using extracted JSON array")
|
| 165 |
+
else:
|
| 166 |
+
logger.warning("❌ JSON validation failed, using full response")
|
| 167 |
+
response = full.strip()
|
| 168 |
else:
|
| 169 |
+
logger.warning("❌ Could not find JSON boundaries, using full response")
|
| 170 |
+
response = full.strip()
|
| 171 |
else:
|
| 172 |
+
# For non-CoT or if no JSON found, try to extract assistant response
|
| 173 |
+
if "<|start_header_id|>assistant<|end_header_id|>" in full:
|
| 174 |
+
parts = full.split("<|start_header_id|>assistant<|end_header_id|>")
|
| 175 |
+
if len(parts) > 1:
|
| 176 |
+
response = parts[-1].strip()
|
| 177 |
+
logger.info(f"Extracted after assistant header: {len(response)} chars")
|
| 178 |
+
else:
|
| 179 |
+
response = full.strip()
|
| 180 |
else:
|
| 181 |
+
# ABSOLUTELY NO CUTTING - use everything
|
| 182 |
response = full.strip()
|
| 183 |
+
logger.info("Using complete full response - no cutting")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
|
| 185 |
+
logger.info(f"FINAL response length: {len(response)} chars")
|
| 186 |
+
logger.info(f"FINAL starts with: {response[:150]}...")
|
| 187 |
+
logger.info(f"FINAL ends with: ...{response[-150:]}")
|
| 188 |
|
| 189 |
logger.info(f"Response generated: {len(response)} chars")
|
| 190 |
return response.strip()
|