Spaces:

Migjomatic
/

bahngleis-detektor

Running

App Files Files Community

bahngleis-detektor / test_person_on_track_comprehensive.py

Migjomatic

Remove HF token; use env var

8a74c03 3 months ago

raw

history blame

13.7 kB

	#!/usr/bin/env python3
	"""
	Comprehensive test of all videos in test folder to create best person-on-track implementation
	"""
	import sys
	import os
	from io import BytesIO
	import glob

	# Add current directory to path
	sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

	def test_all_videos_person_on_track():
	"""Test all videos in test folder for person-on-track scenarios"""
	print("COMPREHENSIVE PERSON-ON-TRACK DETECTION ANALYSIS")
	print("=" * 70)
	print("Testing all videos in test folder to find best implementation")
	print()

	try:
	from local_models import get_local_model_manager
	from app import extract_frames_from_video, process_image_locally
	print("+ Components loaded successfully")
	except ImportError as e:
	print(f"- Import error: {e}")
	return

	# Find all videos in test folder
	test_videos = glob.glob("test\\*.mp4")
	if not test_videos:
	print("- No MP4 files found in test folder")
	return

	print(f"+ Found {len(test_videos)} test videos: {[os.path.basename(v) for v in test_videos]}")

	# Initialize models
	try:
	local_manager = get_local_model_manager()
	print("+ All models ready for testing")
	except Exception as e:
	print(f"- Model initialization error: {e}")
	return

	# Test different approaches
	approaches = {
	"Approach 1 - People Counter": {
	"model": "People Counter",
	"prompt": "Track Safety Analysis"
	},
	"Approach 2 - Direct CNN": {
	"model": "CNN (BLIP)",
	"prompt": "Is there a person standing on train tracks? Answer yes or no."
	},
	"Approach 3 - Detailed Transformer": {
	"model": "Transformer (ViT-GPT2)",
	"prompt": "Describe people and train tracks in this image"
	},
	"Approach 4 - Safety Focus": {
	"model": "CNN (BLIP)",
	"prompt": "Describe any safety concerns with people near train tracks"
	}
	}

	all_results = {}

	# Test each video with each approach
	for video_idx, video_path in enumerate(test_videos):
	video_name = os.path.basename(video_path)
	print(f"\n" + "=" * 70)
	print(f"TESTING VIDEO {video_idx + 1}: {video_name}")
	print("=" * 70)

	try:
	# Extract frames
	with open(video_path, 'rb') as f:
	video_data = f.read()

	video_file = BytesIO(video_data)
	frames = extract_frames_from_video(video_file, fps=0.5) # Every 2 seconds

	if not frames:
	print(f"- No frames extracted from {video_name}")
	continue

	print(f"+ Extracted {len(frames)} frames from {video_name}")

	# Test 2-3 frames per video to get representative sample
	test_frames = frames[:min(3, len(frames))]
	video_results = {}

	# Test each approach on this video
	for approach_name, config in approaches.items():
	print(f"\n Testing {approach_name}:")
	print(f" {'-' * 40}")

	approach_results = []

	for frame_idx, frame_data in enumerate(test_frames):
	frame_num = frame_idx + 1
	timestamp = frame_data['timestamp']

	try:
	result = process_image_locally(
	frame_data['frame'],
	config["prompt"],
	config["model"],
	local_manager
	)

	# Analyze result for person-on-track
	person_on_track_analysis = analyze_for_person_on_track(result, config["model"])

	approach_results.append({
	'frame': frame_num,
	'timestamp': timestamp,
	'raw_result': result,
	'person_on_track': person_on_track_analysis['on_track'],
	'confidence': person_on_track_analysis['confidence'],
	'reasoning': person_on_track_analysis['reasoning']
	})

	status = "ON TRACK" if person_on_track_analysis['on_track'] else "SAFE"
	print(f" Frame {frame_num} ({timestamp:.1f}s): {status} - {person_on_track_analysis['confidence']:.0%} confidence")
	print(f" Reasoning: {person_on_track_analysis['reasoning'][:80]}...")

	except Exception as e:
	approach_results.append({
	'frame': frame_num,
	'timestamp': timestamp,
	'raw_result': {'error': str(e)},
	'person_on_track': False,
	'confidence': 0,
	'reasoning': f"Error: {str(e)}"
	})
	print(f" Frame {frame_num} ({timestamp:.1f}s): ERROR - {str(e)}")

	video_results[approach_name] = approach_results

	all_results[video_name] = video_results

	except Exception as e:
	print(f"- Failed to process {video_name}: {e}")
	continue

	# Comprehensive analysis and recommendation
	analyze_all_approaches(all_results, approaches)

	return all_results

	def analyze_for_person_on_track(result, model_type):
	"""Analyze model result to determine if person is on train tracks"""

	if 'error' in result:
	return {
	'on_track': False,
	'confidence': 0,
	'reasoning': f"Error in processing: {result['error']}"
	}

	# Handle different result types
	if 'people_analysis' in result:
	# People Counter result
	analysis = result['people_analysis']
	on_track = analysis.get('on_tracks', False) or analysis.get('safety_risk', False)
	confidence = analysis.get('confidence', 0)
	reasoning = analysis.get('analysis_summary', 'People Counter analysis')

	return {
	'on_track': on_track,
	'confidence': confidence,
	'reasoning': reasoning
	}

	elif 'yes_no_detection' in result:
	# Yes/No detector result
	detection = result['yes_no_detection']
	# For track detection, we need more than just person presence
	return {
	'on_track': False, # Yes/No detector doesn't check tracks specifically
	'confidence': 0.3,
	'reasoning': "Yes/No detector not suitable for track-specific detection"
	}

	elif 'generated_text' in result:
	# Text analysis result
	text = result['generated_text'].lower()

	# Keywords for person on tracks
	person_keywords = ['person', 'people', 'man', 'woman', 'human', 'individual']
	track_keywords = ['track', 'tracks', 'rail', 'rails', 'railway']
	position_keywords = ['on', 'standing', 'walking', 'sitting', 'crossing']
	danger_keywords = ['danger', 'unsafe', 'risk', 'hazard', 'warning']

	# Strong indicators
	strong_patterns = [
	'person on track', 'man on track', 'woman on track',
	'standing on track', 'walking on track', 'person crossing',
	'on the tracks', 'on train tracks', 'on railway'
	]

	# Count indicators
	person_mentions = sum(1 for kw in person_keywords if kw in text)
	track_mentions = sum(1 for kw in track_keywords if kw in text)
	position_mentions = sum(1 for kw in position_keywords if kw in text)
	danger_mentions = sum(1 for kw in danger_keywords if kw in text)
	strong_indicators = sum(1 for pattern in strong_patterns if pattern in text)

	# Decision logic
	if strong_indicators > 0:
	on_track = True
	confidence = min(0.8 + strong_indicators * 0.1, 1.0)
	reasoning = f"Strong indicators: {strong_indicators} pattern matches"

	elif person_mentions > 0 and track_mentions > 0 and position_mentions > 0:
	on_track = True
	confidence = 0.6 + min(person_mentions + track_mentions + position_mentions, 3) * 0.1
	reasoning = f"Person + track + position keywords: {person_mentions}+{track_mentions}+{position_mentions}"

	elif danger_mentions > 0 and (person_mentions > 0 or track_mentions > 0):
	on_track = True
	confidence = 0.5 + danger_mentions * 0.1
	reasoning = f"Safety concern mentioned with people/tracks: {danger_mentions} danger keywords"

	else:
	on_track = False
	confidence = 0.7 if person_mentions == 0 else 0.4
	reasoning = f"No clear person-on-track indicators. Person:{person_mentions}, Track:{track_mentions}"

	return {
	'on_track': on_track,
	'confidence': confidence,
	'reasoning': reasoning
	}

	else:
	return {
	'on_track': False,
	'confidence': 0,
	'reasoning': "Unknown result format"
	}

	def analyze_all_approaches(all_results, approaches):
	"""Analyze all approaches and provide recommendations"""

	print(f"\n" + "=" * 80)
	print("COMPREHENSIVE ANALYSIS OF ALL APPROACHES")
	print("=" * 80)

	# Calculate performance metrics for each approach
	approach_metrics = {}

	for approach_name in approaches.keys():
	total_frames = 0
	on_track_detections = 0
	avg_confidence = 0
	error_count = 0

	for video_name, video_results in all_results.items():
	if approach_name in video_results:
	for frame_result in video_results[approach_name]:
	total_frames += 1
	if frame_result['person_on_track']:
	on_track_detections += 1
	avg_confidence += frame_result['confidence']
	if 'error' in frame_result.get('raw_result', {}):
	error_count += 1

	if total_frames > 0:
	avg_confidence = avg_confidence / total_frames
	detection_rate = on_track_detections / total_frames * 100
	error_rate = error_count / total_frames * 100
	else:
	avg_confidence = 0
	detection_rate = 0
	error_rate = 100

	approach_metrics[approach_name] = {
	'total_frames': total_frames,
	'on_track_detections': on_track_detections,
	'detection_rate': detection_rate,
	'avg_confidence': avg_confidence,
	'error_rate': error_rate
	}

	# Display results table
	print(f"\nAPPROACH PERFORMANCE COMPARISON:")
	print("-" * 80)
	print(f"{'Approach':<25} {'Frames':<8} {'On-Track':<10} {'Rate':<8} {'Confidence':<12} {'Errors':<8}")
	print("-" * 80)

	for approach, metrics in approach_metrics.items():
	print(f"{approach:<25} {metrics['total_frames']:<8} {metrics['on_track_detections']:<10} "
	f"{metrics['detection_rate']:<8.1f}% {metrics['avg_confidence']:<12.0%} {metrics['error_rate']:<8.1f}%")

	# Find best approach
	best_approach = max(approach_metrics.items(),
	key=lambda x: x[1]['avg_confidence'] * (100 - x[1]['error_rate']) / 100)

	print(f"\n" + "=" * 80)
	print("RECOMMENDATIONS")
	print("=" * 80)

	print(f"BEST APPROACH: {best_approach[0]}")
	print(f" - Average Confidence: {best_approach[1]['avg_confidence']:.0%}")
	print(f" - Detection Rate: {best_approach[1]['detection_rate']:.1f}%")
	print(f" - Error Rate: {best_approach[1]['error_rate']:.1f}%")
	print(f" - Total Frames Tested: {best_approach[1]['total_frames']}")

	# Detailed recommendations
	print(f"\nDETAILED ANALYSIS:")

	if best_approach[0] == "Approach 1 - People Counter":
	print("+ People Counter is most effective for track safety")
	print("+ Uses specialized multi-prompt analysis")
	print("+ Provides detailed safety risk assessment")

	elif "CNN" in best_approach[0]:
	print("+ CNN model provides good balance of speed and accuracy")
	print("+ Direct prompting works well for specific scenarios")
	print("+ Consider using for real-time applications")

	elif "Transformer" in best_approach[0]:
	print("+ Transformer model provides detailed scene understanding")
	print("+ Better for complex scene analysis")
	print("+ Higher computational cost but more accurate descriptions")

	# Video-by-video breakdown
	print(f"\nPER-VIDEO ANALYSIS:")
	print("-" * 50)

	for video_name, video_results in all_results.items():
	print(f"\n{video_name}:")
	for approach_name, results in video_results.items():
	on_track_frames = sum(1 for r in results if r['person_on_track'])
	total_frames = len(results)
	print(f" {approach_name}: {on_track_frames}/{total_frames} frames with person on track")

	if __name__ == "__main__":
	test_all_videos_person_on_track()