bahngleis-detektor / test_person_on_track_comprehensive.py
Migjomatic's picture
Remove HF token; use env var
8a74c03
raw
history blame
13.7 kB
#!/usr/bin/env python3
"""
Comprehensive test of all videos in test folder to create best person-on-track implementation
"""
import sys
import os
from io import BytesIO
import glob
# Add current directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
def test_all_videos_person_on_track():
"""Test all videos in test folder for person-on-track scenarios"""
print("COMPREHENSIVE PERSON-ON-TRACK DETECTION ANALYSIS")
print("=" * 70)
print("Testing all videos in test folder to find best implementation")
print()
try:
from local_models import get_local_model_manager
from app import extract_frames_from_video, process_image_locally
print("+ Components loaded successfully")
except ImportError as e:
print(f"- Import error: {e}")
return
# Find all videos in test folder
test_videos = glob.glob("test\\*.mp4")
if not test_videos:
print("- No MP4 files found in test folder")
return
print(f"+ Found {len(test_videos)} test videos: {[os.path.basename(v) for v in test_videos]}")
# Initialize models
try:
local_manager = get_local_model_manager()
print("+ All models ready for testing")
except Exception as e:
print(f"- Model initialization error: {e}")
return
# Test different approaches
approaches = {
"Approach 1 - People Counter": {
"model": "People Counter",
"prompt": "Track Safety Analysis"
},
"Approach 2 - Direct CNN": {
"model": "CNN (BLIP)",
"prompt": "Is there a person standing on train tracks? Answer yes or no."
},
"Approach 3 - Detailed Transformer": {
"model": "Transformer (ViT-GPT2)",
"prompt": "Describe people and train tracks in this image"
},
"Approach 4 - Safety Focus": {
"model": "CNN (BLIP)",
"prompt": "Describe any safety concerns with people near train tracks"
}
}
all_results = {}
# Test each video with each approach
for video_idx, video_path in enumerate(test_videos):
video_name = os.path.basename(video_path)
print(f"\n" + "=" * 70)
print(f"TESTING VIDEO {video_idx + 1}: {video_name}")
print("=" * 70)
try:
# Extract frames
with open(video_path, 'rb') as f:
video_data = f.read()
video_file = BytesIO(video_data)
frames = extract_frames_from_video(video_file, fps=0.5) # Every 2 seconds
if not frames:
print(f"- No frames extracted from {video_name}")
continue
print(f"+ Extracted {len(frames)} frames from {video_name}")
# Test 2-3 frames per video to get representative sample
test_frames = frames[:min(3, len(frames))]
video_results = {}
# Test each approach on this video
for approach_name, config in approaches.items():
print(f"\n Testing {approach_name}:")
print(f" {'-' * 40}")
approach_results = []
for frame_idx, frame_data in enumerate(test_frames):
frame_num = frame_idx + 1
timestamp = frame_data['timestamp']
try:
result = process_image_locally(
frame_data['frame'],
config["prompt"],
config["model"],
local_manager
)
# Analyze result for person-on-track
person_on_track_analysis = analyze_for_person_on_track(result, config["model"])
approach_results.append({
'frame': frame_num,
'timestamp': timestamp,
'raw_result': result,
'person_on_track': person_on_track_analysis['on_track'],
'confidence': person_on_track_analysis['confidence'],
'reasoning': person_on_track_analysis['reasoning']
})
status = "ON TRACK" if person_on_track_analysis['on_track'] else "SAFE"
print(f" Frame {frame_num} ({timestamp:.1f}s): {status} - {person_on_track_analysis['confidence']:.0%} confidence")
print(f" Reasoning: {person_on_track_analysis['reasoning'][:80]}...")
except Exception as e:
approach_results.append({
'frame': frame_num,
'timestamp': timestamp,
'raw_result': {'error': str(e)},
'person_on_track': False,
'confidence': 0,
'reasoning': f"Error: {str(e)}"
})
print(f" Frame {frame_num} ({timestamp:.1f}s): ERROR - {str(e)}")
video_results[approach_name] = approach_results
all_results[video_name] = video_results
except Exception as e:
print(f"- Failed to process {video_name}: {e}")
continue
# Comprehensive analysis and recommendation
analyze_all_approaches(all_results, approaches)
return all_results
def analyze_for_person_on_track(result, model_type):
"""Analyze model result to determine if person is on train tracks"""
if 'error' in result:
return {
'on_track': False,
'confidence': 0,
'reasoning': f"Error in processing: {result['error']}"
}
# Handle different result types
if 'people_analysis' in result:
# People Counter result
analysis = result['people_analysis']
on_track = analysis.get('on_tracks', False) or analysis.get('safety_risk', False)
confidence = analysis.get('confidence', 0)
reasoning = analysis.get('analysis_summary', 'People Counter analysis')
return {
'on_track': on_track,
'confidence': confidence,
'reasoning': reasoning
}
elif 'yes_no_detection' in result:
# Yes/No detector result
detection = result['yes_no_detection']
# For track detection, we need more than just person presence
return {
'on_track': False, # Yes/No detector doesn't check tracks specifically
'confidence': 0.3,
'reasoning': "Yes/No detector not suitable for track-specific detection"
}
elif 'generated_text' in result:
# Text analysis result
text = result['generated_text'].lower()
# Keywords for person on tracks
person_keywords = ['person', 'people', 'man', 'woman', 'human', 'individual']
track_keywords = ['track', 'tracks', 'rail', 'rails', 'railway']
position_keywords = ['on', 'standing', 'walking', 'sitting', 'crossing']
danger_keywords = ['danger', 'unsafe', 'risk', 'hazard', 'warning']
# Strong indicators
strong_patterns = [
'person on track', 'man on track', 'woman on track',
'standing on track', 'walking on track', 'person crossing',
'on the tracks', 'on train tracks', 'on railway'
]
# Count indicators
person_mentions = sum(1 for kw in person_keywords if kw in text)
track_mentions = sum(1 for kw in track_keywords if kw in text)
position_mentions = sum(1 for kw in position_keywords if kw in text)
danger_mentions = sum(1 for kw in danger_keywords if kw in text)
strong_indicators = sum(1 for pattern in strong_patterns if pattern in text)
# Decision logic
if strong_indicators > 0:
on_track = True
confidence = min(0.8 + strong_indicators * 0.1, 1.0)
reasoning = f"Strong indicators: {strong_indicators} pattern matches"
elif person_mentions > 0 and track_mentions > 0 and position_mentions > 0:
on_track = True
confidence = 0.6 + min(person_mentions + track_mentions + position_mentions, 3) * 0.1
reasoning = f"Person + track + position keywords: {person_mentions}+{track_mentions}+{position_mentions}"
elif danger_mentions > 0 and (person_mentions > 0 or track_mentions > 0):
on_track = True
confidence = 0.5 + danger_mentions * 0.1
reasoning = f"Safety concern mentioned with people/tracks: {danger_mentions} danger keywords"
else:
on_track = False
confidence = 0.7 if person_mentions == 0 else 0.4
reasoning = f"No clear person-on-track indicators. Person:{person_mentions}, Track:{track_mentions}"
return {
'on_track': on_track,
'confidence': confidence,
'reasoning': reasoning
}
else:
return {
'on_track': False,
'confidence': 0,
'reasoning': "Unknown result format"
}
def analyze_all_approaches(all_results, approaches):
"""Analyze all approaches and provide recommendations"""
print(f"\n" + "=" * 80)
print("COMPREHENSIVE ANALYSIS OF ALL APPROACHES")
print("=" * 80)
# Calculate performance metrics for each approach
approach_metrics = {}
for approach_name in approaches.keys():
total_frames = 0
on_track_detections = 0
avg_confidence = 0
error_count = 0
for video_name, video_results in all_results.items():
if approach_name in video_results:
for frame_result in video_results[approach_name]:
total_frames += 1
if frame_result['person_on_track']:
on_track_detections += 1
avg_confidence += frame_result['confidence']
if 'error' in frame_result.get('raw_result', {}):
error_count += 1
if total_frames > 0:
avg_confidence = avg_confidence / total_frames
detection_rate = on_track_detections / total_frames * 100
error_rate = error_count / total_frames * 100
else:
avg_confidence = 0
detection_rate = 0
error_rate = 100
approach_metrics[approach_name] = {
'total_frames': total_frames,
'on_track_detections': on_track_detections,
'detection_rate': detection_rate,
'avg_confidence': avg_confidence,
'error_rate': error_rate
}
# Display results table
print(f"\nAPPROACH PERFORMANCE COMPARISON:")
print("-" * 80)
print(f"{'Approach':<25} {'Frames':<8} {'On-Track':<10} {'Rate':<8} {'Confidence':<12} {'Errors':<8}")
print("-" * 80)
for approach, metrics in approach_metrics.items():
print(f"{approach:<25} {metrics['total_frames']:<8} {metrics['on_track_detections']:<10} "
f"{metrics['detection_rate']:<8.1f}% {metrics['avg_confidence']:<12.0%} {metrics['error_rate']:<8.1f}%")
# Find best approach
best_approach = max(approach_metrics.items(),
key=lambda x: x[1]['avg_confidence'] * (100 - x[1]['error_rate']) / 100)
print(f"\n" + "=" * 80)
print("RECOMMENDATIONS")
print("=" * 80)
print(f"BEST APPROACH: {best_approach[0]}")
print(f" - Average Confidence: {best_approach[1]['avg_confidence']:.0%}")
print(f" - Detection Rate: {best_approach[1]['detection_rate']:.1f}%")
print(f" - Error Rate: {best_approach[1]['error_rate']:.1f}%")
print(f" - Total Frames Tested: {best_approach[1]['total_frames']}")
# Detailed recommendations
print(f"\nDETAILED ANALYSIS:")
if best_approach[0] == "Approach 1 - People Counter":
print("+ People Counter is most effective for track safety")
print("+ Uses specialized multi-prompt analysis")
print("+ Provides detailed safety risk assessment")
elif "CNN" in best_approach[0]:
print("+ CNN model provides good balance of speed and accuracy")
print("+ Direct prompting works well for specific scenarios")
print("+ Consider using for real-time applications")
elif "Transformer" in best_approach[0]:
print("+ Transformer model provides detailed scene understanding")
print("+ Better for complex scene analysis")
print("+ Higher computational cost but more accurate descriptions")
# Video-by-video breakdown
print(f"\nPER-VIDEO ANALYSIS:")
print("-" * 50)
for video_name, video_results in all_results.items():
print(f"\n{video_name}:")
for approach_name, results in video_results.items():
on_track_frames = sum(1 for r in results if r['person_on_track'])
total_frames = len(results)
print(f" {approach_name}: {on_track_frames}/{total_frames} frames with person on track")
if __name__ == "__main__":
test_all_videos_person_on_track()