david167 commited on
Commit
d82dc35
·
1 Parent(s): f52c60e

ULTRA CONSERVATIVE EXTRACTION: Find JSON array boundaries properly, extensive logging, no aggressive cutting

Browse files
Files changed (1) hide show
  1. gradio_app.py +41 -24
gradio_app.py CHANGED
@@ -137,37 +137,54 @@ def generate_response(prompt, temperature=0.8):
137
  # Decode
138
  full = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
139
 
140
- # FIXED RESPONSE EXTRACTION - No more truncation!
141
  logger.info(f"Full generated text length: {len(full)} chars")
142
 
143
- # Find the assistant response more reliably
144
- if "<|start_header_id|>assistant<|end_header_id|>" in full:
145
- # Split and take everything after the assistant header
146
- parts = full.split("<|start_header_id|>assistant<|end_header_id|>")
147
- if len(parts) > 1:
148
- response = parts[-1].strip()
149
- logger.info(f"Extracted after assistant header: {len(response)} chars")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  else:
151
- response = full
 
152
  else:
153
- # Fallback - be more conservative about cutting
154
- # Only cut if we're absolutely sure where the prompt ends
155
- if len(full) > len(formatted) + 100: # Safety buffer
156
- response = full[len(formatted):].strip()
157
- logger.info(f"Extracted after prompt length: {len(response)} chars")
 
 
 
158
  else:
159
- # Don't cut anything if we're not sure
160
  response = full.strip()
161
- logger.info(f"Using full response: {len(response)} chars")
162
-
163
- # For CoT requests, the model should return the JSON directly
164
- # Don't try to extract JSON - trust the model's output
165
- if is_cot:
166
- logger.info("CoT request - using response as-is (trusting model output)")
167
 
168
- logger.info(f"Final response length: {len(response)} chars")
169
- logger.info(f"Response starts with: {response[:100]}...")
170
- logger.info(f"Response ends with: ...{response[-100:]}")
171
 
172
  logger.info(f"Response generated: {len(response)} chars")
173
  return response.strip()
 
137
  # Decode
138
  full = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
139
 
140
+ # ULTRA CONSERVATIVE EXTRACTION - STOP ALL TRUNCATION!
141
  logger.info(f"Full generated text length: {len(full)} chars")
142
 
143
+ # For debugging - log the full text boundaries
144
+ logger.info(f"Full text starts: {full[:200]}...")
145
+ logger.info(f"Full text ends: ...{full[-200:]}")
146
+
147
+ # Find the JSON array in the response - look for the actual content
148
+ # The model should generate the JSON array directly
149
+ if is_cot and '[' in full and ']' in full:
150
+ # Find the JSON array boundaries
151
+ start_idx = full.find('[')
152
+ end_idx = full.rfind(']')
153
+
154
+ if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
155
+ # Extract the complete JSON array
156
+ json_content = full[start_idx:end_idx+1]
157
+ logger.info(f"Found JSON array: {len(json_content)} chars")
158
+ logger.info(f"JSON starts: {json_content[:100]}...")
159
+ logger.info(f"JSON ends: ...{json_content[-100:]}")
160
+
161
+ # Validate it looks like proper JSON
162
+ if '"user"' in json_content and '"assistant"' in json_content:
163
+ response = json_content
164
+ logger.info("✅ Using extracted JSON array")
165
+ else:
166
+ logger.warning("❌ JSON validation failed, using full response")
167
+ response = full.strip()
168
  else:
169
+ logger.warning("❌ Could not find JSON boundaries, using full response")
170
+ response = full.strip()
171
  else:
172
+ # For non-CoT or if no JSON found, try to extract assistant response
173
+ if "<|start_header_id|>assistant<|end_header_id|>" in full:
174
+ parts = full.split("<|start_header_id|>assistant<|end_header_id|>")
175
+ if len(parts) > 1:
176
+ response = parts[-1].strip()
177
+ logger.info(f"Extracted after assistant header: {len(response)} chars")
178
+ else:
179
+ response = full.strip()
180
  else:
181
+ # ABSOLUTELY NO CUTTING - use everything
182
  response = full.strip()
183
+ logger.info("Using complete full response - no cutting")
 
 
 
 
 
184
 
185
+ logger.info(f"FINAL response length: {len(response)} chars")
186
+ logger.info(f"FINAL starts with: {response[:150]}...")
187
+ logger.info(f"FINAL ends with: ...{response[-150:]}")
188
 
189
  logger.info(f"Response generated: {len(response)} chars")
190
  return response.strip()