File size: 6,976 Bytes
8a74c03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#!/usr/bin/env python3
"""
Detect if a person is on train tracks using the best model and prompt
"""
import sys
import os
from io import BytesIO
import re

# Add current directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

def analyze_person_on_tracks():
    """Analyze all frames to detect if person is on train tracks"""
    print("PERSON ON TRACKS DETECTION")
    print("=" * 40)
    print("Using: Transformer (ViT-GPT2) - Best performing model")
    print()
    
    try:
        from local_models import get_local_model_manager
        from app import extract_frames_from_video, process_image_locally
        print("+ Components loaded")
    except ImportError as e:
        print(f"- Import error: {e}")
        return
    
    # Find video
    video_files = [f for f in os.listdir('.') if f.endswith('.mp4')]
    if not video_files:
        print("- No video files found")
        return
    
    video_path = video_files[0]
    print(f"+ Video: {video_path}")
    
    # Initialize model
    try:
        local_manager = get_local_model_manager()
        print("+ Transformer model ready")
    except Exception as e:
        print(f"- Model error: {e}")
        return
    
    # Extract frames
    try:
        with open(video_path, 'rb') as f:
            video_data = f.read()
        
        video_file = BytesIO(video_data)
        frames = extract_frames_from_video(video_file, fps=0.5)  # Every 2 seconds
        
        if not frames:
            print("- No frames extracted")
            return
        
        print(f"+ Extracted {len(frames)} frames for analysis")
        print()
        
    except Exception as e:
        print(f"- Frame extraction error: {e}")
        return
    
    # Optimized prompt for person detection on tracks
    optimal_prompt = "Describe the scene focusing on people and train tracks"
    
    print("ANALYSIS RESULTS:")
    print("=" * 50)
    
    person_detected_frames = []
    results = []
    
    for i, frame_data in enumerate(frames):
        frame_num = i + 1
        timestamp = frame_data['timestamp']
        
        try:
            # Use the best model (Transformer) with optimal prompt
            result = process_image_locally(
                frame_data['frame'],
                optimal_prompt,
                'Transformer (ViT-GPT2)',
                local_manager
            )
            
            if 'error' in result:
                response = f"Error: {result['error']}"
                person_on_track = False
            else:
                response = result.get('generated_text', 'No response')
                
                # Analyze response for person-on-track indicators
                person_on_track = detect_person_on_track_from_text(response)
            
            # Store result
            results.append({
                'frame': frame_num,
                'timestamp': timestamp,
                'description': response,
                'person_on_track': person_on_track
            })
            
            if person_on_track:
                person_detected_frames.append(frame_num)
            
            # Display result
            status = "🚨 PERSON ON TRACK" if person_on_track else "βœ“ Clear"
            print(f"Frame {frame_num:2d} ({timestamp:4.1f}s): {status}")
            print(f"    Description: {response}")
            print()
            
        except Exception as e:
            print(f"Frame {frame_num:2d} ({timestamp:4.1f}s): ERROR - {e}")
            results.append({
                'frame': frame_num,
                'timestamp': timestamp,
                'description': f"Error: {e}",
                'person_on_track': False
            })
            print()
    
    # Summary analysis
    print("=" * 60)
    print("DETECTION SUMMARY")
    print("=" * 60)
    
    total_frames = len(frames)
    person_frames = len(person_detected_frames)
    
    print(f"Total frames analyzed: {total_frames}")
    print(f"Frames with person on tracks: {person_frames}")
    print(f"Percentage: {100 * person_frames / total_frames:.1f}%")
    
    if person_detected_frames:
        print(f"\nPerson detected in frames: {', '.join(map(str, person_detected_frames))}")
        
        # Find time ranges
        timestamps = [results[f-1]['timestamp'] for f in person_detected_frames]
        print(f"Time periods: {min(timestamps):.1f}s - {max(timestamps):.1f}s")
    else:
        print("\nNo person clearly detected on train tracks")
    
    print(f"\nπŸ“Š CONFIDENCE ASSESSMENT:")
    confidence_scores = []
    for r in results:
        if r['person_on_track']:
            # Assess confidence based on description keywords
            desc = r['description'].lower()
            confidence = 0.5  # Base confidence
            
            if any(word in desc for word in ['person', 'man', 'boy', 'woman', 'people']):
                confidence += 0.3
            if any(word in desc for word in ['standing', 'walking', 'on', 'track', 'rail']):
                confidence += 0.2
            
            confidence_scores.append(min(confidence, 1.0))
    
    if confidence_scores:
        avg_confidence = sum(confidence_scores) / len(confidence_scores)
        print(f"Average detection confidence: {avg_confidence:.1f}/1.0")
    else:
        print("No confident detections")
    
    # Save results
    print(f"\n+ Analysis complete!")
    return results

def detect_person_on_track_from_text(description):
    """Analyze text description to determine if person is on train tracks"""
    if not description:
        return False
    
    desc_lower = description.lower()
    
    # Keywords indicating person presence
    person_keywords = ['person', 'man', 'boy', 'woman', 'girl', 'people', 'someone']
    
    # Keywords indicating track/rail location
    track_keywords = ['track', 'tracks', 'rail', 'rails', 'railway']
    
    # Positioning keywords
    position_keywords = ['on', 'standing', 'walking', 'sitting', 'near', 'beside', 'next to']
    
    # Check for person presence
    has_person = any(keyword in desc_lower for keyword in person_keywords)
    
    # Check for track presence
    has_track = any(keyword in desc_lower for keyword in track_keywords)
    
    # Check for positioning that suggests person is ON the tracks
    has_position = any(keyword in desc_lower for keyword in position_keywords)
    
    # Look for specific phrases that strongly suggest person on tracks
    strong_indicators = [
        'standing on', 'walking on', 'on the track', 'on track', 'on rail',
        'person.*track', 'man.*track', 'boy.*track'
    ]
    
    has_strong_indicator = any(re.search(pattern, desc_lower) for pattern in strong_indicators)
    
    # Decision logic
    if has_strong_indicator:
        return True
    elif has_person and has_track and has_position:
        return True
    else:
        return False

if __name__ == "__main__":
    analyze_person_on_tracks()