Spaces:

david167
/

question-generation-api

Sleeping

App Files Files Community

david167 commited on Aug 13, 2025

Commit

d82dc35

1 Parent(s): f52c60e

ULTRA CONSERVATIVE EXTRACTION: Find JSON array boundaries properly, extensive logging, no aggressive cutting

Browse files

Files changed (1) hide show

gradio_app.py +41 -24

gradio_app.py CHANGED Viewed

@@ -137,37 +137,54 @@ def generate_response(prompt, temperature=0.8):
         # Decode
         full = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # FIXED RESPONSE EXTRACTION - No more truncation!
         logger.info(f"Full generated text length: {len(full)} chars")
-        # Find the assistant response more reliably
-        if "<|start_header_id|>assistant<|end_header_id|>" in full:
-            # Split and take everything after the assistant header
-            parts = full.split("<|start_header_id|>assistant<|end_header_id|>")
-            if len(parts) > 1:
-                response = parts[-1].strip()
-                logger.info(f"Extracted after assistant header: {len(response)} chars")
             else:
-                response = full
         else:
-            # Fallback - be more conservative about cutting
-            # Only cut if we're absolutely sure where the prompt ends
-            if len(full) > len(formatted) + 100:  # Safety buffer
-                response = full[len(formatted):].strip()
-                logger.info(f"Extracted after prompt length: {len(response)} chars")
             else:
-                # Don't cut anything if we're not sure
                 response = full.strip()
-                logger.info(f"Using full response: {len(response)} chars")
-        # For CoT requests, the model should return the JSON directly
-        # Don't try to extract JSON - trust the model's output
-        if is_cot:
-            logger.info("CoT request - using response as-is (trusting model output)")
-        logger.info(f"Final response length: {len(response)} chars")
-        logger.info(f"Response starts with: {response[:100]}...")
-        logger.info(f"Response ends with: ...{response[-100:]}")
         logger.info(f"Response generated: {len(response)} chars")
         return response.strip()

         # Decode
         full = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # ULTRA CONSERVATIVE EXTRACTION - STOP ALL TRUNCATION!
         logger.info(f"Full generated text length: {len(full)} chars")
+        # For debugging - log the full text boundaries
+        logger.info(f"Full text starts: {full[:200]}...")
+        logger.info(f"Full text ends: ...{full[-200:]}")
+        # Find the JSON array in the response - look for the actual content
+        # The model should generate the JSON array directly
+        if is_cot and '[' in full and ']' in full:
+            # Find the JSON array boundaries
+            start_idx = full.find('[')
+            end_idx = full.rfind(']')
+            if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
+                # Extract the complete JSON array
+                json_content = full[start_idx:end_idx+1]
+                logger.info(f"Found JSON array: {len(json_content)} chars")
+                logger.info(f"JSON starts: {json_content[:100]}...")
+                logger.info(f"JSON ends: ...{json_content[-100:]}")
+                # Validate it looks like proper JSON
+                if '"user"' in json_content and '"assistant"' in json_content:
+                    response = json_content
+                    logger.info("✅ Using extracted JSON array")
+                else:
+                    logger.warning("❌ JSON validation failed, using full response")
+                    response = full.strip()
             else:
+                logger.warning("❌ Could not find JSON boundaries, using full response")
+                response = full.strip()
         else:
+            # For non-CoT or if no JSON found, try to extract assistant response
+            if "<|start_header_id|>assistant<|end_header_id|>" in full:
+                parts = full.split("<|start_header_id|>assistant<|end_header_id|>")
+                if len(parts) > 1:
+                    response = parts[-1].strip()
+                    logger.info(f"Extracted after assistant header: {len(response)} chars")
+                else:
+                    response = full.strip()
             else:
+                # ABSOLUTELY NO CUTTING - use everything
                 response = full.strip()
+                logger.info("Using complete full response - no cutting")
+        logger.info(f"FINAL response length: {len(response)} chars")
+        logger.info(f"FINAL starts with: {response[:150]}...")
+        logger.info(f"FINAL ends with: ...{response[-150:]}")
         logger.info(f"Response generated: {len(response)} chars")
         return response.strip()