Spaces:

Migjomatic
/

bahngleis-detektor

Running

App Files Files Community

bahngleis-detektor / test_multiple_videos.py

Migjomatic

Remove HF token; use env var

8a74c03 3 months ago

raw

history blame

9.36 kB

	#!/usr/bin/env python3
	"""
	Test Yes/No Person Detector on multiple videos for accuracy verification
	"""
	import sys
	import os
	from io import BytesIO
	import glob

	# Add current directory to path
	sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

	def test_multiple_videos():
	"""Test Yes/No Person Detector on multiple videos"""
	print("TESTING YES/NO PERSON DETECTOR - MULTIPLE VIDEOS")
	print("=" * 60)
	print("Verifying model accuracy across different video content")
	print()

	try:
	from local_models import get_local_model_manager
	from app import extract_frames_from_video, process_image_locally
	print("+ Components loaded successfully")
	except ImportError as e:
	print(f"- Import error: {e}")
	return

	# Find all MP4 files
	video_files = glob.glob("*.mp4")
	if not video_files:
	print("- No MP4 files found")
	return

	print(f"+ Found {len(video_files)} video files: {video_files}")

	# Initialize models
	try:
	local_manager = get_local_model_manager()
	print("+ Yes/No Person Detector ready")
	except Exception as e:
	print(f"- Model initialization error: {e}")
	return

	all_results = {}

	# Test each video
	for video_idx, video_path in enumerate(video_files):
	print(f"\n" + "=" * 60)
	print(f"TESTING VIDEO {video_idx + 1}: {video_path}")
	print("=" * 60)

	try:
	# Extract frames
	with open(video_path, 'rb') as f:
	video_data = f.read()

	video_file = BytesIO(video_data)
	frames = extract_frames_from_video(video_file, fps=0.3) # Every 3+ seconds

	if not frames:
	print(f"- No frames extracted from {video_path}")
	continue

	print(f"+ Extracted {len(frames)} frames from {video_path}")

	# Test first 3 frames from each video
	test_frames = frames[:3]
	video_results = []

	for i, frame_data in enumerate(test_frames):
	frame_num = i + 1
	timestamp = frame_data['timestamp']

	print(f"\n Frame {frame_num} ({timestamp:.1f}s):")
	print(f" {'-' * 30}")

	try:
	result = process_image_locally(
	frame_data['frame'],
	"Is there a person in this image?",
	'Yes/No Person Detector',
	local_manager
	)

	if 'error' in result:
	print(f" ERROR: {result['error']}")
	video_results.append({
	'frame': frame_num,
	'timestamp': timestamp,
	'answer': 'ERROR',
	'confidence': 0,
	'raw_response': result['error']
	})
	elif 'yes_no_detection' in result:
	detection = result['yes_no_detection']

	answer = detection.get('answer', 'UNKNOWN')
	person_detected = detection.get('person_detected', False)
	confidence = detection.get('confidence', 0)
	raw_response = detection.get('raw_response', 'N/A')

	print(f" Answer: {answer}")
	print(f" Person Detected: {person_detected}")
	print(f" Confidence: {confidence:.0%}")
	print(f" Raw Response: '{raw_response[:50]}{'...' if len(raw_response) > 50 else ''}'")

	video_results.append({
	'frame': frame_num,
	'timestamp': timestamp,
	'answer': answer,
	'person_detected': person_detected,
	'confidence': confidence,
	'raw_response': raw_response
	})
	else:
	print(f" Unexpected result format: {result}")
	video_results.append({
	'frame': frame_num,
	'timestamp': timestamp,
	'answer': 'UNKNOWN',
	'confidence': 0,
	'raw_response': str(result)
	})

	except Exception as e:
	print(f" ERROR: {e}")
	video_results.append({
	'frame': frame_num,
	'timestamp': timestamp,
	'answer': 'ERROR',
	'confidence': 0,
	'raw_response': str(e)
	})

	all_results[video_path] = video_results

	except Exception as e:
	print(f"- Failed to process {video_path}: {e}")
	continue

	# Comprehensive analysis
	print(f"\n" + "=" * 80)
	print("COMPREHENSIVE RESULTS ANALYSIS")
	print("=" * 80)

	# Summary table
	print(f"\nRESULTS SUMMARY BY VIDEO:")
	print("-" * 80)
	print(f"{'Video':<20} {'Frame':<8} {'Time':<8} {'Answer':<8} {'Confidence':<12} {'Raw Response':<25}")
	print("-" * 80)

	total_frames = 0
	yes_count = 0
	no_count = 0
	error_count = 0
	unclear_count = 0
	confidence_sum = 0

	for video_name, results in all_results.items():
	for result in results:
	frame = result['frame']
	timestamp = result['timestamp']
	answer = result['answer']
	confidence = result['confidence']
	raw_response = result['raw_response'][:20] + "..." if len(result['raw_response']) > 20 else result['raw_response']

	print(f"{video_name:<20} {frame:<8} {timestamp:<8.1f} {answer:<8} {confidence:<12.0%} {raw_response:<25}")

	total_frames += 1
	confidence_sum += confidence

	if answer == 'YES':
	yes_count += 1
	elif answer == 'NO':
	no_count += 1
	elif answer == 'ERROR':
	error_count += 1
	else:
	unclear_count += 1

	# Overall statistics
	print(f"\n" + "=" * 80)
	print("OVERALL STATISTICS")
	print("=" * 80)

	print(f"Total frames tested: {total_frames}")
	print(f"Videos tested: {len(all_results)}")
	print(f"YES answers: {yes_count}")
	print(f"NO answers: {no_count}")
	print(f"ERROR responses: {error_count}")
	print(f"UNCLEAR responses: {unclear_count}")

	if total_frames > 0:
	success_rate = (yes_count + no_count) / total_frames * 100
	avg_confidence = confidence_sum / total_frames
	print(f"Success rate: {success_rate:.1f}%")
	print(f"Average confidence: {avg_confidence:.0%}")

	# Accuracy assessment
	print(f"\n" + "=" * 80)
	print("ACCURACY ASSESSMENT")
	print("=" * 80)

	# Check if model is stuck giving same answer
	if yes_count == total_frames and total_frames > 3:
	print("WARNING: Model appears to be giving only YES answers!")
	print("This suggests the model may be:")
	print("- Overconfident or biased toward detecting people")
	print("- Not properly processing different image content")
	print("- The prompt may need adjustment")
	print("\nRECOMMENDED FIXES:")
	print("1. Test with images that definitely contain no people")
	print("2. Adjust the prompt to be more specific")
	print("3. Try different confidence thresholds")
	print("4. Consider using a different base model")

	elif no_count == total_frames and total_frames > 3:
	print("WARNING: Model appears to be giving only NO answers!")
	print("This suggests the model may be:")
	print("- Too conservative in person detection")
	print("- Having trouble detecting people in the images")
	print("- The prompt may be too restrictive")

	elif yes_count > 0 and no_count > 0:
	print("GOOD: Model is giving varied responses (both YES and NO)")
	print("This suggests the model is:")
	print("+ Properly analyzing different image content")
	print("+ Responding appropriately to image variations")
	print("+ Working as expected")

	else:
	print("INSUFFICIENT DATA: Need more diverse test cases")

	# Per-video analysis
	print(f"\nPER-VIDEO BREAKDOWN:")
	print("-" * 50)

	for video_name, results in all_results.items():
	video_yes = sum(1 for r in results if r['answer'] == 'YES')
	video_no = sum(1 for r in results if r['answer'] == 'NO')
	video_total = len(results)

	print(f"{video_name}: {video_yes} YES, {video_no} NO (out of {video_total} frames)")

	return all_results

	if __name__ == "__main__":
	test_multiple_videos()