Spaces:
Sleeping
Sleeping
FIX TRUNCATION: Improved response extraction logic, conservative cutting, detailed logging - NO MORE TRUNCATION && git push
Browse files- gradio_app.py +28 -8
- test_api.py +99 -199
gradio_app.py
CHANGED
|
@@ -137,17 +137,37 @@ def generate_response(prompt, temperature=0.8):
|
|
| 137 |
# Decode
|
| 138 |
full = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 139 |
|
| 140 |
-
#
|
|
|
|
|
|
|
|
|
|
| 141 |
if "<|start_header_id|>assistant<|end_header_id|>" in full:
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
else:
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
if match and '"user"' in match.group(0) and '"assistant"' in match.group(0):
|
| 150 |
-
response = match.group(0)
|
| 151 |
|
| 152 |
logger.info(f"Response generated: {len(response)} chars")
|
| 153 |
return response.strip()
|
|
|
|
| 137 |
# Decode
|
| 138 |
full = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 139 |
|
| 140 |
+
# FIXED RESPONSE EXTRACTION - No more truncation!
|
| 141 |
+
logger.info(f"Full generated text length: {len(full)} chars")
|
| 142 |
+
|
| 143 |
+
# Find the assistant response more reliably
|
| 144 |
if "<|start_header_id|>assistant<|end_header_id|>" in full:
|
| 145 |
+
# Split and take everything after the assistant header
|
| 146 |
+
parts = full.split("<|start_header_id|>assistant<|end_header_id|>")
|
| 147 |
+
if len(parts) > 1:
|
| 148 |
+
response = parts[-1].strip()
|
| 149 |
+
logger.info(f"Extracted after assistant header: {len(response)} chars")
|
| 150 |
+
else:
|
| 151 |
+
response = full
|
| 152 |
else:
|
| 153 |
+
# Fallback - be more conservative about cutting
|
| 154 |
+
# Only cut if we're absolutely sure where the prompt ends
|
| 155 |
+
if len(full) > len(formatted) + 100: # Safety buffer
|
| 156 |
+
response = full[len(formatted):].strip()
|
| 157 |
+
logger.info(f"Extracted after prompt length: {len(response)} chars")
|
| 158 |
+
else:
|
| 159 |
+
# Don't cut anything if we're not sure
|
| 160 |
+
response = full.strip()
|
| 161 |
+
logger.info(f"Using full response: {len(response)} chars")
|
| 162 |
+
|
| 163 |
+
# For CoT requests, the model should return the JSON directly
|
| 164 |
+
# Don't try to extract JSON - trust the model's output
|
| 165 |
+
if is_cot:
|
| 166 |
+
logger.info("CoT request - using response as-is (trusting model output)")
|
| 167 |
|
| 168 |
+
logger.info(f"Final response length: {len(response)} chars")
|
| 169 |
+
logger.info(f"Response starts with: {response[:100]}...")
|
| 170 |
+
logger.info(f"Response ends with: ...{response[-100:]}")
|
|
|
|
|
|
|
| 171 |
|
| 172 |
logger.info(f"Response generated: {len(response)} chars")
|
| 173 |
return response.strip()
|
test_api.py
CHANGED
|
@@ -1,215 +1,115 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
Test script for the Question Generation API
|
| 4 |
-
Run this after your Space is deployed to test the API endpoints
|
| 5 |
-
"""
|
| 6 |
|
| 7 |
import requests
|
| 8 |
import json
|
| 9 |
-
import
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
try:
|
| 19 |
-
|
| 20 |
-
print(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
| 31 |
else:
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
except requests.exceptions.RequestException as e:
|
| 36 |
-
print(f"β Health Check Error: {e}")
|
| 37 |
-
return False
|
| 38 |
-
|
| 39 |
-
def test_question_generation():
|
| 40 |
-
"""Test the question generation endpoint"""
|
| 41 |
-
print("\nπ€ Testing question generation...")
|
| 42 |
-
|
| 43 |
-
test_cases = [
|
| 44 |
-
{
|
| 45 |
-
"name": "Simple Statement",
|
| 46 |
-
"data": {
|
| 47 |
-
"statement": "Artificial intelligence is transforming healthcare by enabling more accurate diagnoses, personalized treatments, and efficient drug discovery processes.",
|
| 48 |
-
"num_questions": 3,
|
| 49 |
-
"difficulty_level": "medium"
|
| 50 |
-
}
|
| 51 |
-
},
|
| 52 |
-
{
|
| 53 |
-
"name": "Complex Statement",
|
| 54 |
-
"data": {
|
| 55 |
-
"statement": "Climate change represents one of the most significant challenges of the 21st century, involving complex interactions between atmospheric chemistry, ocean currents, biodiversity loss, and human economic systems. The greenhouse effect, primarily driven by carbon dioxide emissions from fossil fuel combustion, is causing global temperatures to rise at an unprecedented rate.",
|
| 56 |
-
"num_questions": 5,
|
| 57 |
-
"difficulty_level": "hard",
|
| 58 |
-
"temperature": 0.9
|
| 59 |
-
}
|
| 60 |
-
},
|
| 61 |
-
{
|
| 62 |
-
"name": "Short Statement",
|
| 63 |
-
"data": {
|
| 64 |
-
"statement": "Water boils at 100 degrees Celsius at sea level.",
|
| 65 |
-
"num_questions": 2,
|
| 66 |
-
"difficulty_level": "easy"
|
| 67 |
-
}
|
| 68 |
-
}
|
| 69 |
-
]
|
| 70 |
-
|
| 71 |
-
for i, test_case in enumerate(test_cases, 1):
|
| 72 |
-
print(f"\nπ Test Case {i}: {test_case['name']}")
|
| 73 |
-
print(f"Statement: {test_case['data']['statement'][:100]}...")
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
print(f"β
Generated {len(questions)} questions:")
|
| 89 |
-
for j, question in enumerate(questions, 1):
|
| 90 |
-
print(f" {j}. {question}")
|
| 91 |
-
|
| 92 |
-
print(f"Metadata: {data['metadata']}")
|
| 93 |
-
|
| 94 |
-
else:
|
| 95 |
-
print(f"β Generation Failed: {response.text}")
|
| 96 |
-
|
| 97 |
-
except requests.exceptions.RequestException as e:
|
| 98 |
-
print(f"β Request Error: {e}")
|
| 99 |
-
|
| 100 |
-
def test_error_handling():
|
| 101 |
-
"""Test error handling"""
|
| 102 |
-
print("\nπ¨ Testing error handling...")
|
| 103 |
-
|
| 104 |
-
# Test invalid parameters
|
| 105 |
-
invalid_tests = [
|
| 106 |
-
{
|
| 107 |
-
"name": "Missing statement",
|
| 108 |
-
"data": {"num_questions": 3}
|
| 109 |
-
},
|
| 110 |
-
{
|
| 111 |
-
"name": "Invalid num_questions",
|
| 112 |
-
"data": {
|
| 113 |
-
"statement": "Test statement",
|
| 114 |
-
"num_questions": 15 # Too high
|
| 115 |
-
}
|
| 116 |
-
},
|
| 117 |
-
{
|
| 118 |
-
"name": "Invalid temperature",
|
| 119 |
-
"data": {
|
| 120 |
-
"statement": "Test statement",
|
| 121 |
-
"temperature": 5.0 # Too high
|
| 122 |
-
}
|
| 123 |
-
}
|
| 124 |
-
]
|
| 125 |
-
|
| 126 |
-
for test in invalid_tests:
|
| 127 |
-
print(f"\nπ Testing: {test['name']}")
|
| 128 |
-
try:
|
| 129 |
-
response = requests.post(
|
| 130 |
-
f"{BASE_URL}/generate-questions",
|
| 131 |
-
json=test['data'],
|
| 132 |
-
timeout=30
|
| 133 |
-
)
|
| 134 |
-
|
| 135 |
-
if response.status_code == 422:
|
| 136 |
-
print("β
Correctly rejected invalid input")
|
| 137 |
-
else:
|
| 138 |
-
print(f"β οΈ Unexpected status code: {response.status_code}")
|
| 139 |
-
|
| 140 |
-
except requests.exceptions.RequestException as e:
|
| 141 |
-
print(f"β Request Error: {e}")
|
| 142 |
-
|
| 143 |
-
def benchmark_performance():
|
| 144 |
-
"""Simple performance benchmark"""
|
| 145 |
-
print("\nβ‘ Performance Benchmark...")
|
| 146 |
-
|
| 147 |
-
statement = "Machine learning algorithms are becoming increasingly sophisticated, enabling computers to learn patterns from data without being explicitly programmed for every scenario."
|
| 148 |
-
|
| 149 |
-
times = []
|
| 150 |
-
for i in range(3):
|
| 151 |
-
print(f"Run {i+1}/3...", end=" ")
|
| 152 |
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
|
|
|
|
|
|
| 164 |
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
else:
|
| 172 |
-
print(f"β Failed ({response.status_code})")
|
| 173 |
-
|
| 174 |
-
except requests.exceptions.RequestException as e:
|
| 175 |
-
print(f"β Error: {e}")
|
| 176 |
-
|
| 177 |
-
if times:
|
| 178 |
-
avg_time = sum(times) / len(times)
|
| 179 |
-
print(f"\nπ Average Response Time: {avg_time:.2f}s")
|
| 180 |
-
print(f"π Min: {min(times):.2f}s, Max: {max(times):.2f}s")
|
| 181 |
-
|
| 182 |
-
def main():
|
| 183 |
-
"""Run all tests"""
|
| 184 |
-
print("π Starting API Tests")
|
| 185 |
-
print(f"Base URL: {BASE_URL}")
|
| 186 |
-
print("=" * 50)
|
| 187 |
-
|
| 188 |
-
# Test health first
|
| 189 |
-
if not test_health_endpoint():
|
| 190 |
-
print("\nβ Health check failed. Make sure your Space is running and accessible.")
|
| 191 |
-
return
|
| 192 |
-
|
| 193 |
-
# Wait a moment for model to be ready
|
| 194 |
-
print("\nβ³ Waiting for model to be ready...")
|
| 195 |
-
time.sleep(5)
|
| 196 |
-
|
| 197 |
-
# Run tests
|
| 198 |
-
test_question_generation()
|
| 199 |
-
test_error_handling()
|
| 200 |
-
benchmark_performance()
|
| 201 |
-
|
| 202 |
-
print("\n" + "=" * 50)
|
| 203 |
-
print("β
All tests completed!")
|
| 204 |
-
print("\nπ‘ Usage Examples:")
|
| 205 |
-
print(f"curl -X POST '{BASE_URL}/generate-questions' \\")
|
| 206 |
-
print(" -H 'Content-Type: application/json' \\")
|
| 207 |
-
print(" -d '{\"statement\": \"Your statement here\", \"num_questions\": 3}'")
|
| 208 |
|
| 209 |
if __name__ == "__main__":
|
| 210 |
-
|
| 211 |
-
if "your-space-name" in BASE_URL:
|
| 212 |
-
print("β οΈ Please update BASE_URL with your actual Space URL before running tests!")
|
| 213 |
-
print("Example: BASE_URL = 'https://username-question-generation-api.hf.space'")
|
| 214 |
-
else:
|
| 215 |
-
main()
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
+
"""Test the API with the EXACT request from the logs to diagnose truncation"""
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
import requests
|
| 5 |
import json
|
| 6 |
+
from gradio_client import Client
|
| 7 |
|
| 8 |
+
def test_api():
|
| 9 |
+
"""Test the API with the exact CoT request from the logs"""
|
| 10 |
+
|
| 11 |
+
# EXACT request from the logs
|
| 12 |
+
test_prompt = """Return exactly this JSON array (no other text):
|
| 13 |
+
[
|
| 14 |
+
{"user": "who is going to win the ravens commanders game", "assistant": "thought: to make a prediction, i'm going to need to think through the following:\nRelative strength or power of the competing teams, which establishes strength differential on a neutral field. These relative power ratings will change over the season and from end of season to the beginning of the next season.\nEach team's previous performances.\nHome field advantage, which could vary by each home and away team.\nIndividual values for each player on each team.\nPresence of injuries or illnesses that affect a team's relative power. Note that injuries/illnesses will subtract from a team's relative power. Not only do you need to adjust based on players out for the week, but you also must adjust based on active players who are playing with an injury.\nGame factors such as weather, each team's previous schedule (e.g., byes, multiple away games in a row, etc.), travel distance/difficulty, stadium quirks, and turf types.\nMotivational factors such as revenge, rivalries, coaching changes, etc.\nSteps\nEvaluate the available information based on my thoughts.\nList out all the information we think is relevant for both teams.\nDiscuss any motivational factors - players or coaches who have a history as an example\nTalk about any weaknesses on either defense who the other team might take advantage of\nLastly make a prediction on the result and score of the game."},
|
| 15 |
+
{"user": "[new question based on: You are a broadcaster and an NFL expert. You have years of experience coaching and playing in the N...]", "assistant": "[detailed answer consistent with system context]"},
|
| 16 |
+
{"user": "[another question based on the topic]", "assistant": "[another detailed answer consistent with system context]"}
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
Context for new questions:
|
| 20 |
+
SYSTEM: You are a broadcaster and an NFL expert. You have years of experience coaching and playing in the NFL. When someone asks you how you think or to make a prediction about a game or a player, you are thoughtful and detailed thinking through each element of information you would need and judging how much each element will matter
|
| 21 |
+
TOPIC: Based on the user/assistant exchange above
|
| 22 |
|
| 23 |
+
Requirements:
|
| 24 |
+
- First item MUST use the exact user and assistant prompts provided above
|
| 25 |
+
- Items 2-3 should be NEW, diverse questions with informative responses
|
| 26 |
+
- All responses should be consistent with the system context
|
| 27 |
+
- Return ONLY the JSON array, no additional text"""
|
| 28 |
+
|
| 29 |
+
print("π§ͺ TESTING API WITH EXACT COT REQUEST")
|
| 30 |
+
print("=" * 60)
|
| 31 |
+
print(f"Request length: {len(test_prompt)} characters")
|
| 32 |
+
print(f"Request preview: {test_prompt[:200]}...")
|
| 33 |
+
print("=" * 60)
|
| 34 |
|
| 35 |
try:
|
| 36 |
+
# Use Gradio Client like the actual application
|
| 37 |
+
print("π‘ Connecting to Gradio API...")
|
| 38 |
+
client = Client("https://david167-question-generation-api.hf.space/")
|
| 39 |
+
|
| 40 |
+
print("π‘ Sending request via Gradio Client...")
|
| 41 |
+
result = client.predict(
|
| 42 |
+
test_prompt, # message
|
| 43 |
+
"[]", # history_str
|
| 44 |
+
0.8, # temperature
|
| 45 |
+
"", # json_mode
|
| 46 |
+
"", # template
|
| 47 |
+
api_name="/respond"
|
| 48 |
+
)
|
| 49 |
|
| 50 |
+
print("β
API Response received!")
|
| 51 |
+
print(f"Result type: {type(result)}")
|
| 52 |
+
print(f"Result: {result}")
|
| 53 |
+
|
| 54 |
+
# Extract content based on result type
|
| 55 |
+
if isinstance(result, tuple):
|
| 56 |
+
content = result[0] if len(result) > 0 else ""
|
| 57 |
+
print("π¦ Extracted from tuple")
|
| 58 |
+
elif isinstance(result, str):
|
| 59 |
+
content = result
|
| 60 |
+
print("π¦ Direct string result")
|
| 61 |
else:
|
| 62 |
+
content = str(result)
|
| 63 |
+
print("π¦ Converted to string")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
+
print(f"Response length: {len(content)} characters")
|
| 66 |
+
print("=" * 60)
|
| 67 |
+
print("RESPONSE CONTENT:")
|
| 68 |
+
print(content)
|
| 69 |
+
print("=" * 60)
|
| 70 |
+
|
| 71 |
+
# Check for truncation indicators
|
| 72 |
+
truncation_indicators = [
|
| 73 |
+
content.endswith('", \''), # Incomplete tuple
|
| 74 |
+
'e following:' in content[:50], # Truncated start
|
| 75 |
+
not content.strip().endswith(']'), # Missing JSON close
|
| 76 |
+
len(content) < 500, # Too short for complete CoT
|
| 77 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
+
if any(truncation_indicators):
|
| 80 |
+
print("β TRUNCATION DETECTED!")
|
| 81 |
+
print("Issues found:")
|
| 82 |
+
if content.endswith('", \''):
|
| 83 |
+
print(" - Response ends with incomplete tuple")
|
| 84 |
+
if 'e following:' in content[:50]:
|
| 85 |
+
print(" - Response starts mid-sentence (truncated beginning)")
|
| 86 |
+
if not content.strip().endswith(']'):
|
| 87 |
+
print(" - JSON array not properly closed")
|
| 88 |
+
if len(content) < 500:
|
| 89 |
+
print(" - Response too short for complete CoT")
|
| 90 |
+
else:
|
| 91 |
+
print("β
NO TRUNCATION DETECTED!")
|
| 92 |
|
| 93 |
+
# Try to parse as JSON
|
| 94 |
+
try:
|
| 95 |
+
if content.strip().startswith('[') and content.strip().endswith(']'):
|
| 96 |
+
parsed = json.loads(content.strip())
|
| 97 |
+
print(f"β
VALID JSON: {len(parsed)} items")
|
| 98 |
+
|
| 99 |
+
# Check first item for verbatim match
|
| 100 |
+
if len(parsed) > 0 and isinstance(parsed[0], dict):
|
| 101 |
+
first_user = parsed[0].get('user', '')
|
| 102 |
+
if 'who is going to win the ravens commanders game' in first_user:
|
| 103 |
+
print("β
FIRST ITEM VERBATIM MATCH!")
|
| 104 |
+
else:
|
| 105 |
+
print("β First item not verbatim")
|
| 106 |
+
else:
|
| 107 |
+
print("β Response not valid JSON array format")
|
| 108 |
+
except json.JSONDecodeError as e:
|
| 109 |
+
print(f"β JSON PARSE ERROR: {e}")
|
| 110 |
|
| 111 |
+
except Exception as e:
|
| 112 |
+
print(f"β Test failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
if __name__ == "__main__":
|
| 115 |
+
test_api()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|