david167 commited on
Commit
f52c60e
Β·
1 Parent(s): 1644c5e

FIX TRUNCATION: Improved response extraction logic, conservative cutting, detailed logging - NO MORE TRUNCATION && git push

Browse files
Files changed (2) hide show
  1. gradio_app.py +28 -8
  2. test_api.py +99 -199
gradio_app.py CHANGED
@@ -137,17 +137,37 @@ def generate_response(prompt, temperature=0.8):
137
  # Decode
138
  full = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
139
 
140
- # Extract response
 
 
 
141
  if "<|start_header_id|>assistant<|end_header_id|>" in full:
142
- response = full.split("<|start_header_id|>assistant<|end_header_id|>", 1)[-1].strip()
 
 
 
 
 
 
143
  else:
144
- response = full[len(formatted):].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
- # For CoT, extract JSON
147
- if is_cot and '[' in response and ']' in response:
148
- match = re.search(r'\[.*\]', response, re.DOTALL)
149
- if match and '"user"' in match.group(0) and '"assistant"' in match.group(0):
150
- response = match.group(0)
151
 
152
  logger.info(f"Response generated: {len(response)} chars")
153
  return response.strip()
 
137
  # Decode
138
  full = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
139
 
140
+ # FIXED RESPONSE EXTRACTION - No more truncation!
141
+ logger.info(f"Full generated text length: {len(full)} chars")
142
+
143
+ # Find the assistant response more reliably
144
  if "<|start_header_id|>assistant<|end_header_id|>" in full:
145
+ # Split and take everything after the assistant header
146
+ parts = full.split("<|start_header_id|>assistant<|end_header_id|>")
147
+ if len(parts) > 1:
148
+ response = parts[-1].strip()
149
+ logger.info(f"Extracted after assistant header: {len(response)} chars")
150
+ else:
151
+ response = full
152
  else:
153
+ # Fallback - be more conservative about cutting
154
+ # Only cut if we're absolutely sure where the prompt ends
155
+ if len(full) > len(formatted) + 100: # Safety buffer
156
+ response = full[len(formatted):].strip()
157
+ logger.info(f"Extracted after prompt length: {len(response)} chars")
158
+ else:
159
+ # Don't cut anything if we're not sure
160
+ response = full.strip()
161
+ logger.info(f"Using full response: {len(response)} chars")
162
+
163
+ # For CoT requests, the model should return the JSON directly
164
+ # Don't try to extract JSON - trust the model's output
165
+ if is_cot:
166
+ logger.info("CoT request - using response as-is (trusting model output)")
167
 
168
+ logger.info(f"Final response length: {len(response)} chars")
169
+ logger.info(f"Response starts with: {response[:100]}...")
170
+ logger.info(f"Response ends with: ...{response[-100:]}")
 
 
171
 
172
  logger.info(f"Response generated: {len(response)} chars")
173
  return response.strip()
test_api.py CHANGED
@@ -1,215 +1,115 @@
1
  #!/usr/bin/env python3
2
- """
3
- Test script for the Question Generation API
4
- Run this after your Space is deployed to test the API endpoints
5
- """
6
 
7
  import requests
8
  import json
9
- import time
10
 
11
- # Replace with your actual Space URL
12
- BASE_URL = "https://your-space-name.hf.space"
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- def test_health_endpoint():
15
- """Test the health check endpoint"""
16
- print("πŸ” Testing health endpoint...")
 
 
 
 
 
 
 
 
17
 
18
  try:
19
- response = requests.get(f"{BASE_URL}/health", timeout=30)
20
- print(f"Status Code: {response.status_code}")
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- if response.status_code == 200:
23
- data = response.json()
24
- print(f"βœ… Health Check Passed")
25
- print(f"Model Loaded: {data['model_loaded']}")
26
- print(f"Device: {data['device']}")
27
- if data.get('memory_usage'):
28
- memory = data['memory_usage']
29
- print(f"VRAM Usage: {memory.get('allocated_gb', 0):.2f}GB / {memory.get('total_gb', 0):.2f}GB")
30
- return True
 
 
31
  else:
32
- print(f"❌ Health Check Failed: {response.text}")
33
- return False
34
-
35
- except requests.exceptions.RequestException as e:
36
- print(f"❌ Health Check Error: {e}")
37
- return False
38
-
39
- def test_question_generation():
40
- """Test the question generation endpoint"""
41
- print("\nπŸ€” Testing question generation...")
42
-
43
- test_cases = [
44
- {
45
- "name": "Simple Statement",
46
- "data": {
47
- "statement": "Artificial intelligence is transforming healthcare by enabling more accurate diagnoses, personalized treatments, and efficient drug discovery processes.",
48
- "num_questions": 3,
49
- "difficulty_level": "medium"
50
- }
51
- },
52
- {
53
- "name": "Complex Statement",
54
- "data": {
55
- "statement": "Climate change represents one of the most significant challenges of the 21st century, involving complex interactions between atmospheric chemistry, ocean currents, biodiversity loss, and human economic systems. The greenhouse effect, primarily driven by carbon dioxide emissions from fossil fuel combustion, is causing global temperatures to rise at an unprecedented rate.",
56
- "num_questions": 5,
57
- "difficulty_level": "hard",
58
- "temperature": 0.9
59
- }
60
- },
61
- {
62
- "name": "Short Statement",
63
- "data": {
64
- "statement": "Water boils at 100 degrees Celsius at sea level.",
65
- "num_questions": 2,
66
- "difficulty_level": "easy"
67
- }
68
- }
69
- ]
70
-
71
- for i, test_case in enumerate(test_cases, 1):
72
- print(f"\nπŸ“ Test Case {i}: {test_case['name']}")
73
- print(f"Statement: {test_case['data']['statement'][:100]}...")
74
 
75
- try:
76
- response = requests.post(
77
- f"{BASE_URL}/generate-questions",
78
- json=test_case['data'],
79
- timeout=60 # Increased timeout for model inference
80
- )
81
-
82
- print(f"Status Code: {response.status_code}")
83
-
84
- if response.status_code == 200:
85
- data = response.json()
86
- questions = data['questions']
87
-
88
- print(f"βœ… Generated {len(questions)} questions:")
89
- for j, question in enumerate(questions, 1):
90
- print(f" {j}. {question}")
91
-
92
- print(f"Metadata: {data['metadata']}")
93
-
94
- else:
95
- print(f"❌ Generation Failed: {response.text}")
96
-
97
- except requests.exceptions.RequestException as e:
98
- print(f"❌ Request Error: {e}")
99
-
100
- def test_error_handling():
101
- """Test error handling"""
102
- print("\n🚨 Testing error handling...")
103
-
104
- # Test invalid parameters
105
- invalid_tests = [
106
- {
107
- "name": "Missing statement",
108
- "data": {"num_questions": 3}
109
- },
110
- {
111
- "name": "Invalid num_questions",
112
- "data": {
113
- "statement": "Test statement",
114
- "num_questions": 15 # Too high
115
- }
116
- },
117
- {
118
- "name": "Invalid temperature",
119
- "data": {
120
- "statement": "Test statement",
121
- "temperature": 5.0 # Too high
122
- }
123
- }
124
- ]
125
-
126
- for test in invalid_tests:
127
- print(f"\nπŸ” Testing: {test['name']}")
128
- try:
129
- response = requests.post(
130
- f"{BASE_URL}/generate-questions",
131
- json=test['data'],
132
- timeout=30
133
- )
134
-
135
- if response.status_code == 422:
136
- print("βœ… Correctly rejected invalid input")
137
- else:
138
- print(f"⚠️ Unexpected status code: {response.status_code}")
139
-
140
- except requests.exceptions.RequestException as e:
141
- print(f"❌ Request Error: {e}")
142
-
143
- def benchmark_performance():
144
- """Simple performance benchmark"""
145
- print("\n⚑ Performance Benchmark...")
146
-
147
- statement = "Machine learning algorithms are becoming increasingly sophisticated, enabling computers to learn patterns from data without being explicitly programmed for every scenario."
148
-
149
- times = []
150
- for i in range(3):
151
- print(f"Run {i+1}/3...", end=" ")
152
 
153
- start_time = time.time()
154
- try:
155
- response = requests.post(
156
- f"{BASE_URL}/generate-questions",
157
- json={
158
- "statement": statement,
159
- "num_questions": 3,
160
- "difficulty_level": "medium"
161
- },
162
- timeout=60
163
- )
 
 
164
 
165
- end_time = time.time()
166
- duration = end_time - start_time
167
- times.append(duration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
- if response.status_code == 200:
170
- print(f"βœ… {duration:.2f}s")
171
- else:
172
- print(f"❌ Failed ({response.status_code})")
173
-
174
- except requests.exceptions.RequestException as e:
175
- print(f"❌ Error: {e}")
176
-
177
- if times:
178
- avg_time = sum(times) / len(times)
179
- print(f"\nπŸ“Š Average Response Time: {avg_time:.2f}s")
180
- print(f"πŸ“Š Min: {min(times):.2f}s, Max: {max(times):.2f}s")
181
-
182
- def main():
183
- """Run all tests"""
184
- print("πŸš€ Starting API Tests")
185
- print(f"Base URL: {BASE_URL}")
186
- print("=" * 50)
187
-
188
- # Test health first
189
- if not test_health_endpoint():
190
- print("\n❌ Health check failed. Make sure your Space is running and accessible.")
191
- return
192
-
193
- # Wait a moment for model to be ready
194
- print("\n⏳ Waiting for model to be ready...")
195
- time.sleep(5)
196
-
197
- # Run tests
198
- test_question_generation()
199
- test_error_handling()
200
- benchmark_performance()
201
-
202
- print("\n" + "=" * 50)
203
- print("βœ… All tests completed!")
204
- print("\nπŸ’‘ Usage Examples:")
205
- print(f"curl -X POST '{BASE_URL}/generate-questions' \\")
206
- print(" -H 'Content-Type: application/json' \\")
207
- print(" -d '{\"statement\": \"Your statement here\", \"num_questions\": 3}'")
208
 
209
  if __name__ == "__main__":
210
- # Update this with your actual Space URL before running
211
- if "your-space-name" in BASE_URL:
212
- print("⚠️ Please update BASE_URL with your actual Space URL before running tests!")
213
- print("Example: BASE_URL = 'https://username-question-generation-api.hf.space'")
214
- else:
215
- main()
 
1
  #!/usr/bin/env python3
2
+ """Test the API with the EXACT request from the logs to diagnose truncation"""
 
 
 
3
 
4
  import requests
5
  import json
6
+ from gradio_client import Client
7
 
8
+ def test_api():
9
+ """Test the API with the exact CoT request from the logs"""
10
+
11
+ # EXACT request from the logs
12
+ test_prompt = """Return exactly this JSON array (no other text):
13
+ [
14
+ {"user": "who is going to win the ravens commanders game", "assistant": "thought: to make a prediction, i'm going to need to think through the following:\nRelative strength or power of the competing teams, which establishes strength differential on a neutral field. These relative power ratings will change over the season and from end of season to the beginning of the next season.\nEach team's previous performances.\nHome field advantage, which could vary by each home and away team.\nIndividual values for each player on each team.\nPresence of injuries or illnesses that affect a team's relative power. Note that injuries/illnesses will subtract from a team's relative power. Not only do you need to adjust based on players out for the week, but you also must adjust based on active players who are playing with an injury.\nGame factors such as weather, each team's previous schedule (e.g., byes, multiple away games in a row, etc.), travel distance/difficulty, stadium quirks, and turf types.\nMotivational factors such as revenge, rivalries, coaching changes, etc.\nSteps\nEvaluate the available information based on my thoughts.\nList out all the information we think is relevant for both teams.\nDiscuss any motivational factors - players or coaches who have a history as an example\nTalk about any weaknesses on either defense who the other team might take advantage of\nLastly make a prediction on the result and score of the game."},
15
+ {"user": "[new question based on: You are a broadcaster and an NFL expert. You have years of experience coaching and playing in the N...]", "assistant": "[detailed answer consistent with system context]"},
16
+ {"user": "[another question based on the topic]", "assistant": "[another detailed answer consistent with system context]"}
17
+ ]
18
+
19
+ Context for new questions:
20
+ SYSTEM: You are a broadcaster and an NFL expert. You have years of experience coaching and playing in the NFL. When someone asks you how you think or to make a prediction about a game or a player, you are thoughtful and detailed thinking through each element of information you would need and judging how much each element will matter
21
+ TOPIC: Based on the user/assistant exchange above
22
 
23
+ Requirements:
24
+ - First item MUST use the exact user and assistant prompts provided above
25
+ - Items 2-3 should be NEW, diverse questions with informative responses
26
+ - All responses should be consistent with the system context
27
+ - Return ONLY the JSON array, no additional text"""
28
+
29
+ print("πŸ§ͺ TESTING API WITH EXACT COT REQUEST")
30
+ print("=" * 60)
31
+ print(f"Request length: {len(test_prompt)} characters")
32
+ print(f"Request preview: {test_prompt[:200]}...")
33
+ print("=" * 60)
34
 
35
  try:
36
+ # Use Gradio Client like the actual application
37
+ print("πŸ“‘ Connecting to Gradio API...")
38
+ client = Client("https://david167-question-generation-api.hf.space/")
39
+
40
+ print("πŸ“‘ Sending request via Gradio Client...")
41
+ result = client.predict(
42
+ test_prompt, # message
43
+ "[]", # history_str
44
+ 0.8, # temperature
45
+ "", # json_mode
46
+ "", # template
47
+ api_name="/respond"
48
+ )
49
 
50
+ print("βœ… API Response received!")
51
+ print(f"Result type: {type(result)}")
52
+ print(f"Result: {result}")
53
+
54
+ # Extract content based on result type
55
+ if isinstance(result, tuple):
56
+ content = result[0] if len(result) > 0 else ""
57
+ print("πŸ“¦ Extracted from tuple")
58
+ elif isinstance(result, str):
59
+ content = result
60
+ print("πŸ“¦ Direct string result")
61
  else:
62
+ content = str(result)
63
+ print("πŸ“¦ Converted to string")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ print(f"Response length: {len(content)} characters")
66
+ print("=" * 60)
67
+ print("RESPONSE CONTENT:")
68
+ print(content)
69
+ print("=" * 60)
70
+
71
+ # Check for truncation indicators
72
+ truncation_indicators = [
73
+ content.endswith('", \''), # Incomplete tuple
74
+ 'e following:' in content[:50], # Truncated start
75
+ not content.strip().endswith(']'), # Missing JSON close
76
+ len(content) < 500, # Too short for complete CoT
77
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ if any(truncation_indicators):
80
+ print("❌ TRUNCATION DETECTED!")
81
+ print("Issues found:")
82
+ if content.endswith('", \''):
83
+ print(" - Response ends with incomplete tuple")
84
+ if 'e following:' in content[:50]:
85
+ print(" - Response starts mid-sentence (truncated beginning)")
86
+ if not content.strip().endswith(']'):
87
+ print(" - JSON array not properly closed")
88
+ if len(content) < 500:
89
+ print(" - Response too short for complete CoT")
90
+ else:
91
+ print("βœ… NO TRUNCATION DETECTED!")
92
 
93
+ # Try to parse as JSON
94
+ try:
95
+ if content.strip().startswith('[') and content.strip().endswith(']'):
96
+ parsed = json.loads(content.strip())
97
+ print(f"βœ… VALID JSON: {len(parsed)} items")
98
+
99
+ # Check first item for verbatim match
100
+ if len(parsed) > 0 and isinstance(parsed[0], dict):
101
+ first_user = parsed[0].get('user', '')
102
+ if 'who is going to win the ravens commanders game' in first_user:
103
+ print("βœ… FIRST ITEM VERBATIM MATCH!")
104
+ else:
105
+ print("❌ First item not verbatim")
106
+ else:
107
+ print("❌ Response not valid JSON array format")
108
+ except json.JSONDecodeError as e:
109
+ print(f"❌ JSON PARSE ERROR: {e}")
110
 
111
+ except Exception as e:
112
+ print(f"❌ Test failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  if __name__ == "__main__":
115
+ test_api()