Spaces:

david167
/

question-generation-api

Sleeping

App Files Files Community

david167 commited on Aug 13, 2025

Commit

f52c60e

1 Parent(s): 1644c5e

FIX TRUNCATION: Improved response extraction logic, conservative cutting, detailed logging - NO MORE TRUNCATION && git push

Browse files

Files changed (2) hide show

gradio_app.py +28 -8
test_api.py +99 -199

gradio_app.py CHANGED Viewed

@@ -137,17 +137,37 @@ def generate_response(prompt, temperature=0.8):
         # Decode
         full = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract response
         if "<|start_header_id|>assistant<|end_header_id|>" in full:
-            response = full.split("<|start_header_id|>assistant<|end_header_id|>", 1)[-1].strip()
         else:
-            response = full[len(formatted):].strip()
-        # For CoT, extract JSON
-        if is_cot and '[' in response and ']' in response:
-            match = re.search(r'\[.*\]', response, re.DOTALL)
-            if match and '"user"' in match.group(0) and '"assistant"' in match.group(0):
-                response = match.group(0)
         logger.info(f"Response generated: {len(response)} chars")
         return response.strip()

         # Decode
         full = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # FIXED RESPONSE EXTRACTION - No more truncation!
+        logger.info(f"Full generated text length: {len(full)} chars")
+        # Find the assistant response more reliably
         if "<|start_header_id|>assistant<|end_header_id|>" in full:
+            # Split and take everything after the assistant header
+            parts = full.split("<|start_header_id|>assistant<|end_header_id|>")
+            if len(parts) > 1:
+                response = parts[-1].strip()
+                logger.info(f"Extracted after assistant header: {len(response)} chars")
+            else:
+                response = full
         else:
+            # Fallback - be more conservative about cutting
+            # Only cut if we're absolutely sure where the prompt ends
+            if len(full) > len(formatted) + 100:  # Safety buffer
+                response = full[len(formatted):].strip()
+                logger.info(f"Extracted after prompt length: {len(response)} chars")
+            else:
+                # Don't cut anything if we're not sure
+                response = full.strip()
+                logger.info(f"Using full response: {len(response)} chars")
+        # For CoT requests, the model should return the JSON directly
+        # Don't try to extract JSON - trust the model's output
+        if is_cot:
+            logger.info("CoT request - using response as-is (trusting model output)")
+        logger.info(f"Final response length: {len(response)} chars")
+        logger.info(f"Response starts with: {response[:100]}...")
+        logger.info(f"Response ends with: ...{response[-100:]}")
         logger.info(f"Response generated: {len(response)} chars")
         return response.strip()

test_api.py CHANGED Viewed

@@ -1,215 +1,115 @@
 #!/usr/bin/env python3
-"""
-Test script for the Question Generation API
-Run this after your Space is deployed to test the API endpoints
-"""
 import requests
 import json
-import time
-# Replace with your actual Space URL
-BASE_URL = "https://your-space-name.hf.space"
-def test_health_endpoint():
-    """Test the health check endpoint"""
-    print("🔍 Testing health endpoint...")
     try:
-        response = requests.get(f"{BASE_URL}/health", timeout=30)
-        print(f"Status Code: {response.status_code}")
-        if response.status_code == 200:
-            data = response.json()
-            print(f"✅ Health Check Passed")
-            print(f"Model Loaded: {data['model_loaded']}")
-            print(f"Device: {data['device']}")
-            if data.get('memory_usage'):
-                memory = data['memory_usage']
-                print(f"VRAM Usage: {memory.get('allocated_gb', 0):.2f}GB / {memory.get('total_gb', 0):.2f}GB")
-            return True
         else:
-            print(f"❌ Health Check Failed: {response.text}")
-            return False
-    except requests.exceptions.RequestException as e:
-        print(f"❌ Health Check Error: {e}")
-        return False
-def test_question_generation():
-    """Test the question generation endpoint"""
-    print("\n🤔 Testing question generation...")
-    test_cases = [
-        {
-            "name": "Simple Statement",
-            "data": {
-                "statement": "Artificial intelligence is transforming healthcare by enabling more accurate diagnoses, personalized treatments, and efficient drug discovery processes.",
-                "num_questions": 3,
-                "difficulty_level": "medium"
-            }
-        },
-        {
-            "name": "Complex Statement",
-            "data": {
-                "statement": "Climate change represents one of the most significant challenges of the 21st century, involving complex interactions between atmospheric chemistry, ocean currents, biodiversity loss, and human economic systems. The greenhouse effect, primarily driven by carbon dioxide emissions from fossil fuel combustion, is causing global temperatures to rise at an unprecedented rate.",
-                "num_questions": 5,
-                "difficulty_level": "hard",
-                "temperature": 0.9
-            }
-        },
-        {
-            "name": "Short Statement",
-            "data": {
-                "statement": "Water boils at 100 degrees Celsius at sea level.",
-                "num_questions": 2,
-                "difficulty_level": "easy"
-            }
-        }
-    ]
-    for i, test_case in enumerate(test_cases, 1):
-        print(f"\n📝 Test Case {i}: {test_case['name']}")
-        print(f"Statement: {test_case['data']['statement'][:100]}...")
-        try:
-            response = requests.post(
-                f"{BASE_URL}/generate-questions",
-                json=test_case['data'],
-                timeout=60  # Increased timeout for model inference
-            )
-            print(f"Status Code: {response.status_code}")
-            if response.status_code == 200:
-                data = response.json()
-                questions = data['questions']
-                print(f"✅ Generated {len(questions)} questions:")
-                for j, question in enumerate(questions, 1):
-                    print(f"   {j}. {question}")
-                print(f"Metadata: {data['metadata']}")
-            else:
-                print(f"❌ Generation Failed: {response.text}")
-        except requests.exceptions.RequestException as e:
-            print(f"❌ Request Error: {e}")
-def test_error_handling():
-    """Test error handling"""
-    print("\n🚨 Testing error handling...")
-    # Test invalid parameters
-    invalid_tests = [
-        {
-            "name": "Missing statement",
-            "data": {"num_questions": 3}
-        },
-        {
-            "name": "Invalid num_questions",
-            "data": {
-                "statement": "Test statement",
-                "num_questions": 15  # Too high
-            }
-        },
-        {
-            "name": "Invalid temperature",
-            "data": {
-                "statement": "Test statement",
-                "temperature": 5.0  # Too high
-            }
-        }
-    ]
-    for test in invalid_tests:
-        print(f"\n🔍 Testing: {test['name']}")
-        try:
-            response = requests.post(
-                f"{BASE_URL}/generate-questions",
-                json=test['data'],
-                timeout=30
-            )
-            if response.status_code == 422:
-                print("✅ Correctly rejected invalid input")
-            else:
-                print(f"⚠️ Unexpected status code: {response.status_code}")
-        except requests.exceptions.RequestException as e:
-            print(f"❌ Request Error: {e}")
-def benchmark_performance():
-    """Simple performance benchmark"""
-    print("\n⚡ Performance Benchmark...")
-    statement = "Machine learning algorithms are becoming increasingly sophisticated, enabling computers to learn patterns from data without being explicitly programmed for every scenario."
-    times = []
-    for i in range(3):
-        print(f"Run {i+1}/3...", end=" ")
-        start_time = time.time()
-        try:
-            response = requests.post(
-                f"{BASE_URL}/generate-questions",
-                json={
-                    "statement": statement,
-                    "num_questions": 3,
-                    "difficulty_level": "medium"
-                },
-                timeout=60
-            )
-            end_time = time.time()
-            duration = end_time - start_time
-            times.append(duration)
-            if response.status_code == 200:
-                print(f"✅ {duration:.2f}s")
-            else:
-                print(f"❌ Failed ({response.status_code})")
-        except requests.exceptions.RequestException as e:
-            print(f"❌ Error: {e}")
-    if times:
-        avg_time = sum(times) / len(times)
-        print(f"\n📊 Average Response Time: {avg_time:.2f}s")
-        print(f"📊 Min: {min(times):.2f}s, Max: {max(times):.2f}s")
-def main():
-    """Run all tests"""
-    print("🚀 Starting API Tests")
-    print(f"Base URL: {BASE_URL}")
-    print("=" * 50)
-    # Test health first
-    if not test_health_endpoint():
-        print("\n❌ Health check failed. Make sure your Space is running and accessible.")
-        return
-    # Wait a moment for model to be ready
-    print("\n⏳ Waiting for model to be ready...")
-    time.sleep(5)
-    # Run tests
-    test_question_generation()
-    test_error_handling()
-    benchmark_performance()
-    print("\n" + "=" * 50)
-    print("✅ All tests completed!")
-    print("\n💡 Usage Examples:")
-    print(f"curl -X POST '{BASE_URL}/generate-questions' \\")
-    print("     -H 'Content-Type: application/json' \\")
-    print("     -d '{\"statement\": \"Your statement here\", \"num_questions\": 3}'")
 if __name__ == "__main__":
-    # Update this with your actual Space URL before running
-    if "your-space-name" in BASE_URL:
-        print("⚠️  Please update BASE_URL with your actual Space URL before running tests!")
-        print("Example: BASE_URL = 'https://username-question-generation-api.hf.space'")
-    else:
-        main()

 #!/usr/bin/env python3
+"""Test the API with the EXACT request from the logs to diagnose truncation"""
 import requests
 import json
+from gradio_client import Client
+def test_api():
+    """Test the API with the exact CoT request from the logs"""
+    # EXACT request from the logs
+    test_prompt = """Return exactly this JSON array (no other text):
+[
+  {"user": "who is going to win the ravens commanders game", "assistant": "thought: to make a prediction, i'm going to need to think through the following:\nRelative strength or power of the competing teams, which establishes strength differential on a neutral field. These relative power ratings will change over the season and from end of season to the beginning of the next season.\nEach team's previous performances.\nHome field advantage, which could vary by each home and away team.\nIndividual values for each player on each team.\nPresence of injuries or illnesses that affect a team's relative power. Note that injuries/illnesses will subtract from a team's relative power. Not only do you need to adjust based on players out for the week, but you also must adjust based on active players who are playing with an injury.\nGame factors such as weather, each team's previous schedule (e.g., byes, multiple away games in a row, etc.), travel distance/difficulty, stadium quirks, and turf types.\nMotivational factors such as revenge, rivalries, coaching changes, etc.\nSteps\nEvaluate the available information based on my thoughts.\nList out all the information we think is relevant for both teams.\nDiscuss any motivational factors - players or coaches who have a history as an example\nTalk about any weaknesses on either defense who the other team might take advantage of\nLastly make a prediction on the result and score of the game."},
+  {"user": "[new question based on: You are a broadcaster and an NFL expert.  You have years of experience coaching and playing in the N...]", "assistant": "[detailed answer consistent with system context]"},
+  {"user": "[another question based on the topic]", "assistant": "[another detailed answer consistent with system context]"}
+]
+Context for new questions:
+SYSTEM: You are a broadcaster and an NFL expert.  You have years of experience coaching and playing in the NFL.  When someone asks you how you think or to make a prediction about a game or a player, you are thoughtful and detailed thinking through each element of information you would need and judging how much each element will matter
+TOPIC: Based on the user/assistant exchange above
+Requirements:
+- First item MUST use the exact user and assistant prompts provided above
+- Items 2-3 should be NEW, diverse questions with informative responses
+- All responses should be consistent with the system context
+- Return ONLY the JSON array, no additional text"""
+    print("🧪 TESTING API WITH EXACT COT REQUEST")
+    print("=" * 60)
+    print(f"Request length: {len(test_prompt)} characters")
+    print(f"Request preview: {test_prompt[:200]}...")
+    print("=" * 60)
     try:
+        # Use Gradio Client like the actual application
+        print("📡 Connecting to Gradio API...")
+        client = Client("https://david167-question-generation-api.hf.space/")
+        print("📡 Sending request via Gradio Client...")
+        result = client.predict(
+            test_prompt,     # message
+            "[]",           # history_str
+            0.8,            # temperature
+            "",             # json_mode
+            "",             # template
+            api_name="/respond"
+        )
+        print("✅ API Response received!")
+        print(f"Result type: {type(result)}")
+        print(f"Result: {result}")
+        # Extract content based on result type
+        if isinstance(result, tuple):
+            content = result[0] if len(result) > 0 else ""
+            print("📦 Extracted from tuple")
+        elif isinstance(result, str):
+            content = result
+            print("📦 Direct string result")
         else:
+            content = str(result)
+            print("📦 Converted to string")
+        print(f"Response length: {len(content)} characters")
+        print("=" * 60)
+        print("RESPONSE CONTENT:")
+        print(content)
+        print("=" * 60)
+        # Check for truncation indicators
+        truncation_indicators = [
+            content.endswith('", \''),  # Incomplete tuple
+            'e following:' in content[:50],  # Truncated start
+            not content.strip().endswith(']'),  # Missing JSON close
+            len(content) < 500,  # Too short for complete CoT
+        ]
+        if any(truncation_indicators):
+            print("❌ TRUNCATION DETECTED!")
+            print("Issues found:")
+            if content.endswith('", \''):
+                print("  - Response ends with incomplete tuple")
+            if 'e following:' in content[:50]:
+                print("  - Response starts mid-sentence (truncated beginning)")
+            if not content.strip().endswith(']'):
+                print("  - JSON array not properly closed")
+            if len(content) < 500:
+                print("  - Response too short for complete CoT")
+        else:
+            print("✅ NO TRUNCATION DETECTED!")
+            # Try to parse as JSON
+            try:
+                if content.strip().startswith('[') and content.strip().endswith(']'):
+                    parsed = json.loads(content.strip())
+                    print(f"✅ VALID JSON: {len(parsed)} items")
+                    # Check first item for verbatim match
+                    if len(parsed) > 0 and isinstance(parsed[0], dict):
+                        first_user = parsed[0].get('user', '')
+                        if 'who is going to win the ravens commanders game' in first_user:
+                            print("✅ FIRST ITEM VERBATIM MATCH!")
+                        else:
+                            print("❌ First item not verbatim")
+                else:
+                    print("❌ Response not valid JSON array format")
+            except json.JSONDecodeError as e:
+                print(f"❌ JSON PARSE ERROR: {e}")
+    except Exception as e:
+        print(f"❌ Test failed: {e}")
 if __name__ == "__main__":
+    test_api()