Spaces:
Running
Running
File size: 9,359 Bytes
8a74c03 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 |
#!/usr/bin/env python3
"""
Test Yes/No Person Detector on multiple videos for accuracy verification
"""
import sys
import os
from io import BytesIO
import glob
# Add current directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
def test_multiple_videos():
"""Test Yes/No Person Detector on multiple videos"""
print("TESTING YES/NO PERSON DETECTOR - MULTIPLE VIDEOS")
print("=" * 60)
print("Verifying model accuracy across different video content")
print()
try:
from local_models import get_local_model_manager
from app import extract_frames_from_video, process_image_locally
print("+ Components loaded successfully")
except ImportError as e:
print(f"- Import error: {e}")
return
# Find all MP4 files
video_files = glob.glob("*.mp4")
if not video_files:
print("- No MP4 files found")
return
print(f"+ Found {len(video_files)} video files: {video_files}")
# Initialize models
try:
local_manager = get_local_model_manager()
print("+ Yes/No Person Detector ready")
except Exception as e:
print(f"- Model initialization error: {e}")
return
all_results = {}
# Test each video
for video_idx, video_path in enumerate(video_files):
print(f"\n" + "=" * 60)
print(f"TESTING VIDEO {video_idx + 1}: {video_path}")
print("=" * 60)
try:
# Extract frames
with open(video_path, 'rb') as f:
video_data = f.read()
video_file = BytesIO(video_data)
frames = extract_frames_from_video(video_file, fps=0.3) # Every 3+ seconds
if not frames:
print(f"- No frames extracted from {video_path}")
continue
print(f"+ Extracted {len(frames)} frames from {video_path}")
# Test first 3 frames from each video
test_frames = frames[:3]
video_results = []
for i, frame_data in enumerate(test_frames):
frame_num = i + 1
timestamp = frame_data['timestamp']
print(f"\n Frame {frame_num} ({timestamp:.1f}s):")
print(f" {'-' * 30}")
try:
result = process_image_locally(
frame_data['frame'],
"Is there a person in this image?",
'Yes/No Person Detector',
local_manager
)
if 'error' in result:
print(f" ERROR: {result['error']}")
video_results.append({
'frame': frame_num,
'timestamp': timestamp,
'answer': 'ERROR',
'confidence': 0,
'raw_response': result['error']
})
elif 'yes_no_detection' in result:
detection = result['yes_no_detection']
answer = detection.get('answer', 'UNKNOWN')
person_detected = detection.get('person_detected', False)
confidence = detection.get('confidence', 0)
raw_response = detection.get('raw_response', 'N/A')
print(f" Answer: {answer}")
print(f" Person Detected: {person_detected}")
print(f" Confidence: {confidence:.0%}")
print(f" Raw Response: '{raw_response[:50]}{'...' if len(raw_response) > 50 else ''}'")
video_results.append({
'frame': frame_num,
'timestamp': timestamp,
'answer': answer,
'person_detected': person_detected,
'confidence': confidence,
'raw_response': raw_response
})
else:
print(f" Unexpected result format: {result}")
video_results.append({
'frame': frame_num,
'timestamp': timestamp,
'answer': 'UNKNOWN',
'confidence': 0,
'raw_response': str(result)
})
except Exception as e:
print(f" ERROR: {e}")
video_results.append({
'frame': frame_num,
'timestamp': timestamp,
'answer': 'ERROR',
'confidence': 0,
'raw_response': str(e)
})
all_results[video_path] = video_results
except Exception as e:
print(f"- Failed to process {video_path}: {e}")
continue
# Comprehensive analysis
print(f"\n" + "=" * 80)
print("COMPREHENSIVE RESULTS ANALYSIS")
print("=" * 80)
# Summary table
print(f"\nRESULTS SUMMARY BY VIDEO:")
print("-" * 80)
print(f"{'Video':<20} {'Frame':<8} {'Time':<8} {'Answer':<8} {'Confidence':<12} {'Raw Response':<25}")
print("-" * 80)
total_frames = 0
yes_count = 0
no_count = 0
error_count = 0
unclear_count = 0
confidence_sum = 0
for video_name, results in all_results.items():
for result in results:
frame = result['frame']
timestamp = result['timestamp']
answer = result['answer']
confidence = result['confidence']
raw_response = result['raw_response'][:20] + "..." if len(result['raw_response']) > 20 else result['raw_response']
print(f"{video_name:<20} {frame:<8} {timestamp:<8.1f} {answer:<8} {confidence:<12.0%} {raw_response:<25}")
total_frames += 1
confidence_sum += confidence
if answer == 'YES':
yes_count += 1
elif answer == 'NO':
no_count += 1
elif answer == 'ERROR':
error_count += 1
else:
unclear_count += 1
# Overall statistics
print(f"\n" + "=" * 80)
print("OVERALL STATISTICS")
print("=" * 80)
print(f"Total frames tested: {total_frames}")
print(f"Videos tested: {len(all_results)}")
print(f"YES answers: {yes_count}")
print(f"NO answers: {no_count}")
print(f"ERROR responses: {error_count}")
print(f"UNCLEAR responses: {unclear_count}")
if total_frames > 0:
success_rate = (yes_count + no_count) / total_frames * 100
avg_confidence = confidence_sum / total_frames
print(f"Success rate: {success_rate:.1f}%")
print(f"Average confidence: {avg_confidence:.0%}")
# Accuracy assessment
print(f"\n" + "=" * 80)
print("ACCURACY ASSESSMENT")
print("=" * 80)
# Check if model is stuck giving same answer
if yes_count == total_frames and total_frames > 3:
print("WARNING: Model appears to be giving only YES answers!")
print("This suggests the model may be:")
print("- Overconfident or biased toward detecting people")
print("- Not properly processing different image content")
print("- The prompt may need adjustment")
print("\nRECOMMENDED FIXES:")
print("1. Test with images that definitely contain no people")
print("2. Adjust the prompt to be more specific")
print("3. Try different confidence thresholds")
print("4. Consider using a different base model")
elif no_count == total_frames and total_frames > 3:
print("WARNING: Model appears to be giving only NO answers!")
print("This suggests the model may be:")
print("- Too conservative in person detection")
print("- Having trouble detecting people in the images")
print("- The prompt may be too restrictive")
elif yes_count > 0 and no_count > 0:
print("GOOD: Model is giving varied responses (both YES and NO)")
print("This suggests the model is:")
print("+ Properly analyzing different image content")
print("+ Responding appropriately to image variations")
print("+ Working as expected")
else:
print("INSUFFICIENT DATA: Need more diverse test cases")
# Per-video analysis
print(f"\nPER-VIDEO BREAKDOWN:")
print("-" * 50)
for video_name, results in all_results.items():
video_yes = sum(1 for r in results if r['answer'] == 'YES')
video_no = sum(1 for r in results if r['answer'] == 'NO')
video_total = len(results)
print(f"{video_name}: {video_yes} YES, {video_no} NO (out of {video_total} frames)")
return all_results
if __name__ == "__main__":
test_multiple_videos() |