Spaces:

Migjomatic
/

bahngleis-detektor

Running

App Files Files Community

bahngleis-detektor / test_simple_counting.py

Migjomatic

Remove HF token; use env var

8a74c03 3 months ago

raw

history blame contribute delete

3.26 kB

	#!/usr/bin/env python3
	"""
	Simple test to see raw model outputs for counting
	"""
	import sys
	import os
	from io import BytesIO

	# Add current directory to path
	sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

	def test_simple_counting():
	"""Test counting with both models"""
	print("Simple Counting Test")
	print("=" * 30)

	try:
	from local_models import get_local_model_manager
	from app import extract_frames_from_video, process_image_locally
	print("+ Imported successfully")
	except ImportError as e:
	print(f"- Import error: {e}")
	return

	# Find video file
	video_files = [f for f in os.listdir('.') if f.endswith('.mp4')]
	if not video_files:
	print("- No video files found")
	return

	video_path = video_files[0]
	print(f"+ Using: {video_path[:30]}...")

	# Get models
	try:
	local_manager = get_local_model_manager()
	print("+ Models ready")
	except Exception as e:
	print(f"- Error: {e}")
	return

	# Get one frame
	try:
	with open(video_path, 'rb') as f:
	video_data = f.read()

	video_file = BytesIO(video_data)
	frames = extract_frames_from_video(video_file, fps=0.1)

	if not frames:
	print("- No frames")
	return

	test_frame = frames[1]['frame'] # Use second frame which showed a person
	print(f"+ Using frame at t={frames[1]['timestamp']:.1f}s")

	except Exception as e:
	print(f"- Frame error: {e}")
	return

	# Test specific prompts
	test_prompts = [
	"Count the number of people in this scene",
	"How many people do you see?",
	"one person or two people?",
	"Describe what you see"
	]

	for prompt in test_prompts:
	print(f"\n--- Prompt: '{prompt}' ---")

	# Test CNN
	try:
	result = process_image_locally(test_frame, prompt, 'CNN (BLIP)', local_manager)
	cnn_response = result.get('generated_text', 'No response') if 'error' not in result else f"Error: {result['error']}"
	print(f"CNN: '{cnn_response}'")
	except Exception as e:
	print(f"CNN: Exception - {e}")

	# Test Transformer
	try:
	result = process_image_locally(test_frame, prompt, 'Transformer (ViT-GPT2)', local_manager)
	trans_response = result.get('generated_text', 'No response') if 'error' not in result else f"Error: {result['error']}"
	print(f"Transformer: '{trans_response}'")
	except Exception as e:
	print(f"Transformer: Exception - {e}")

	print("\n" + "=" * 40)
	print("ANALYSIS:")
	print("- Neither model is designed for counting")
	print("- Both provide descriptions, not counts")
	print("- Transformer (ViT-GPT2) is better for descriptions")
	print("- CNN (BLIP) has prompt repetition issues")
	print("\nRECOMMENDAT ION:")
	print("Use descriptive prompts like:")
	print(" 'Describe what you see'")
	print(" 'What is happening in this image?'")
	print("Rather than counting prompts.")

	if __name__ == "__main__":
	test_simple_counting()