File size: 5,114 Bytes
8a74c03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/usr/bin/env python3
"""
Clean report of person on tracks detection results
"""
import sys
import os
from io import BytesIO
import re

# Add current directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

def create_detection_report():
    """Create clean detection report"""
    print("PERSON ON TRACKS DETECTION REPORT")
    print("=" * 50)
    
    try:
        from local_models import get_local_model_manager
        from app import extract_frames_from_video, process_image_locally
    except ImportError as e:
        print(f"Import error: {e}")
        return
    
    # Find video
    video_files = [f for f in os.listdir('.') if f.endswith('.mp4')]
    if not video_files:
        print("No video files found")
        return
    
    video_path = video_files[0]
    print(f"Video: {video_path}")
    print("Model: Transformer (ViT-GPT2)")
    print("Prompt: 'Describe the scene focusing on people and train tracks'")
    print()
    
    # Get model
    try:
        local_manager = get_local_model_manager()
    except Exception as e:
        print(f"Model error: {e}")
        return
    
    # Extract frames
    try:
        with open(video_path, 'rb') as f:
            video_data = f.read()
        
        video_file = BytesIO(video_data)
        frames = extract_frames_from_video(video_file, fps=0.5)
        
        if not frames:
            print("No frames extracted")
            return
        
        print(f"Analyzing {len(frames)} frames...")
        print()
        
    except Exception as e:
        print(f"Frame extraction error: {e}")
        return
    
    # Analyze each frame
    results = []
    person_frames = []
    
    for i, frame_data in enumerate(frames):
        frame_num = i + 1
        timestamp = frame_data['timestamp']
        
        try:
            result = process_image_locally(
                frame_data['frame'],
                "Describe the scene focusing on people and train tracks",
                'Transformer (ViT-GPT2)',
                local_manager
            )
            
            if 'error' in result:
                description = f"Error: {result['error']}"
                person_detected = False
            else:
                description = result.get('generated_text', 'No response')
                person_detected = detect_person_on_track(description)
            
            results.append({
                'frame': frame_num,
                'time': timestamp,
                'description': description,
                'person_on_track': person_detected
            })
            
            if person_detected:
                person_frames.append(frame_num)
            
            status = "[PERSON ON TRACK]" if person_detected else "[CLEAR]"
            print(f"Frame {frame_num:2d} ({timestamp:4.1f}s): {status}")
            print(f"    {description}")
            print()
            
        except Exception as e:
            print(f"Frame {frame_num:2d} ({timestamp:4.1f}s): ERROR - {e}")
            print()
    
    # Summary
    print("=" * 60)
    print("SUMMARY")
    print("=" * 60)
    
    total = len(frames)
    detected = len(person_frames)
    
    print(f"Total frames: {total}")
    print(f"Person detected on tracks: {detected}")
    print(f"Detection rate: {100 * detected / total:.1f}%")
    
    if person_frames:
        print(f"Frames with person: {', '.join(map(str, person_frames))}")
        timestamps = [results[f-1]['time'] for f in person_frames]
        print(f"Time range: {min(timestamps):.1f}s - {max(timestamps):.1f}s")
        
        print(f"\nDETAILED DETECTIONS:")
        for frame_num in person_frames:
            frame_data = results[frame_num-1]
            print(f"  Frame {frame_num} ({frame_data['time']:.1f}s): {frame_data['description']}")
    else:
        print("No clear person detections on tracks")
    
    print(f"\nRELIABILITY ASSESSMENT:")
    print("- Model designed for image description, not object detection")
    print("- Results based on text analysis of descriptions")
    print("- Best used as preliminary screening, not definitive detection")
    
    return results

def detect_person_on_track(description):
    """Simple detection logic based on description text"""
    if not description:
        return False
    
    desc = description.lower()
    
    # Person indicators
    person_words = ['person', 'man', 'boy', 'woman', 'girl', 'people']
    has_person = any(word in desc for word in person_words)
    
    # Track indicators  
    track_words = ['track', 'tracks', 'rail', 'rails']
    has_track = any(word in desc for word in track_words)
    
    # Position indicators
    position_words = ['on', 'standing', 'walking']
    has_position = any(word in desc for word in position_words)
    
    # Strong indicators
    strong_patterns = ['standing on', 'walking on', 'on the track', 'on track']
    has_strong = any(pattern in desc for pattern in strong_patterns)
    
    return has_strong or (has_person and has_track and has_position)

if __name__ == "__main__":
    create_detection_report()