File size: 9,359 Bytes
8a74c03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#!/usr/bin/env python3
"""
Test Yes/No Person Detector on multiple videos for accuracy verification
"""
import sys
import os
from io import BytesIO
import glob

# Add current directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

def test_multiple_videos():
    """Test Yes/No Person Detector on multiple videos"""
    print("TESTING YES/NO PERSON DETECTOR - MULTIPLE VIDEOS")
    print("=" * 60)
    print("Verifying model accuracy across different video content")
    print()
    
    try:
        from local_models import get_local_model_manager
        from app import extract_frames_from_video, process_image_locally
        print("+ Components loaded successfully")
    except ImportError as e:
        print(f"- Import error: {e}")
        return
    
    # Find all MP4 files
    video_files = glob.glob("*.mp4")
    if not video_files:
        print("- No MP4 files found")
        return
    
    print(f"+ Found {len(video_files)} video files: {video_files}")
    
    # Initialize models
    try:
        local_manager = get_local_model_manager()
        print("+ Yes/No Person Detector ready")
    except Exception as e:
        print(f"- Model initialization error: {e}")
        return
    
    all_results = {}
    
    # Test each video
    for video_idx, video_path in enumerate(video_files):
        print(f"\n" + "=" * 60)
        print(f"TESTING VIDEO {video_idx + 1}: {video_path}")
        print("=" * 60)
        
        try:
            # Extract frames
            with open(video_path, 'rb') as f:
                video_data = f.read()
            
            video_file = BytesIO(video_data)
            frames = extract_frames_from_video(video_file, fps=0.3)  # Every 3+ seconds
            
            if not frames:
                print(f"- No frames extracted from {video_path}")
                continue
            
            print(f"+ Extracted {len(frames)} frames from {video_path}")
            
            # Test first 3 frames from each video
            test_frames = frames[:3]
            video_results = []
            
            for i, frame_data in enumerate(test_frames):
                frame_num = i + 1
                timestamp = frame_data['timestamp']
                
                print(f"\n  Frame {frame_num} ({timestamp:.1f}s):")
                print(f"  {'-' * 30}")
                
                try:
                    result = process_image_locally(
                        frame_data['frame'],
                        "Is there a person in this image?",
                        'Yes/No Person Detector',
                        local_manager
                    )
                    
                    if 'error' in result:
                        print(f"  ERROR: {result['error']}")
                        video_results.append({
                            'frame': frame_num,
                            'timestamp': timestamp,
                            'answer': 'ERROR',
                            'confidence': 0,
                            'raw_response': result['error']
                        })
                    elif 'yes_no_detection' in result:
                        detection = result['yes_no_detection']
                        
                        answer = detection.get('answer', 'UNKNOWN')
                        person_detected = detection.get('person_detected', False)
                        confidence = detection.get('confidence', 0)
                        raw_response = detection.get('raw_response', 'N/A')
                        
                        print(f"  Answer: {answer}")
                        print(f"  Person Detected: {person_detected}")
                        print(f"  Confidence: {confidence:.0%}")
                        print(f"  Raw Response: '{raw_response[:50]}{'...' if len(raw_response) > 50 else ''}'")
                        
                        video_results.append({
                            'frame': frame_num,
                            'timestamp': timestamp,
                            'answer': answer,
                            'person_detected': person_detected,
                            'confidence': confidence,
                            'raw_response': raw_response
                        })
                    else:
                        print(f"  Unexpected result format: {result}")
                        video_results.append({
                            'frame': frame_num,
                            'timestamp': timestamp,
                            'answer': 'UNKNOWN',
                            'confidence': 0,
                            'raw_response': str(result)
                        })
                        
                except Exception as e:
                    print(f"  ERROR: {e}")
                    video_results.append({
                        'frame': frame_num,
                        'timestamp': timestamp,
                        'answer': 'ERROR',
                        'confidence': 0,
                        'raw_response': str(e)
                    })
            
            all_results[video_path] = video_results
            
        except Exception as e:
            print(f"- Failed to process {video_path}: {e}")
            continue
    
    # Comprehensive analysis
    print(f"\n" + "=" * 80)
    print("COMPREHENSIVE RESULTS ANALYSIS")
    print("=" * 80)
    
    # Summary table
    print(f"\nRESULTS SUMMARY BY VIDEO:")
    print("-" * 80)
    print(f"{'Video':<20} {'Frame':<8} {'Time':<8} {'Answer':<8} {'Confidence':<12} {'Raw Response':<25}")
    print("-" * 80)
    
    total_frames = 0
    yes_count = 0
    no_count = 0
    error_count = 0
    unclear_count = 0
    confidence_sum = 0
    
    for video_name, results in all_results.items():
        for result in results:
            frame = result['frame']
            timestamp = result['timestamp']
            answer = result['answer']
            confidence = result['confidence']
            raw_response = result['raw_response'][:20] + "..." if len(result['raw_response']) > 20 else result['raw_response']
            
            print(f"{video_name:<20} {frame:<8} {timestamp:<8.1f} {answer:<8} {confidence:<12.0%} {raw_response:<25}")
            
            total_frames += 1
            confidence_sum += confidence
            
            if answer == 'YES':
                yes_count += 1
            elif answer == 'NO':
                no_count += 1
            elif answer == 'ERROR':
                error_count += 1
            else:
                unclear_count += 1
    
    # Overall statistics
    print(f"\n" + "=" * 80)
    print("OVERALL STATISTICS")
    print("=" * 80)
    
    print(f"Total frames tested: {total_frames}")
    print(f"Videos tested: {len(all_results)}")
    print(f"YES answers: {yes_count}")
    print(f"NO answers: {no_count}")
    print(f"ERROR responses: {error_count}")
    print(f"UNCLEAR responses: {unclear_count}")
    
    if total_frames > 0:
        success_rate = (yes_count + no_count) / total_frames * 100
        avg_confidence = confidence_sum / total_frames
        print(f"Success rate: {success_rate:.1f}%")
        print(f"Average confidence: {avg_confidence:.0%}")
    
    # Accuracy assessment
    print(f"\n" + "=" * 80)
    print("ACCURACY ASSESSMENT")
    print("=" * 80)
    
    # Check if model is stuck giving same answer
    if yes_count == total_frames and total_frames > 3:
        print("WARNING: Model appears to be giving only YES answers!")
        print("This suggests the model may be:")
        print("- Overconfident or biased toward detecting people")
        print("- Not properly processing different image content")
        print("- The prompt may need adjustment")
        print("\nRECOMMENDED FIXES:")
        print("1. Test with images that definitely contain no people")
        print("2. Adjust the prompt to be more specific")
        print("3. Try different confidence thresholds")
        print("4. Consider using a different base model")
        
    elif no_count == total_frames and total_frames > 3:
        print("WARNING: Model appears to be giving only NO answers!")
        print("This suggests the model may be:")
        print("- Too conservative in person detection")
        print("- Having trouble detecting people in the images")
        print("- The prompt may be too restrictive")
        
    elif yes_count > 0 and no_count > 0:
        print("GOOD: Model is giving varied responses (both YES and NO)")
        print("This suggests the model is:")
        print("+ Properly analyzing different image content") 
        print("+ Responding appropriately to image variations")
        print("+ Working as expected")
        
    else:
        print("INSUFFICIENT DATA: Need more diverse test cases")
    
    # Per-video analysis
    print(f"\nPER-VIDEO BREAKDOWN:")
    print("-" * 50)
    
    for video_name, results in all_results.items():
        video_yes = sum(1 for r in results if r['answer'] == 'YES')
        video_no = sum(1 for r in results if r['answer'] == 'NO')
        video_total = len(results)
        
        print(f"{video_name}: {video_yes} YES, {video_no} NO (out of {video_total} frames)")
    
    return all_results

if __name__ == "__main__":
    test_multiple_videos()