bahngleis-detektor / test_simple_counting.py
Migjomatic's picture
Remove HF token; use env var
8a74c03
#!/usr/bin/env python3
"""
Simple test to see raw model outputs for counting
"""
import sys
import os
from io import BytesIO
# Add current directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
def test_simple_counting():
"""Test counting with both models"""
print("Simple Counting Test")
print("=" * 30)
try:
from local_models import get_local_model_manager
from app import extract_frames_from_video, process_image_locally
print("+ Imported successfully")
except ImportError as e:
print(f"- Import error: {e}")
return
# Find video file
video_files = [f for f in os.listdir('.') if f.endswith('.mp4')]
if not video_files:
print("- No video files found")
return
video_path = video_files[0]
print(f"+ Using: {video_path[:30]}...")
# Get models
try:
local_manager = get_local_model_manager()
print("+ Models ready")
except Exception as e:
print(f"- Error: {e}")
return
# Get one frame
try:
with open(video_path, 'rb') as f:
video_data = f.read()
video_file = BytesIO(video_data)
frames = extract_frames_from_video(video_file, fps=0.1)
if not frames:
print("- No frames")
return
test_frame = frames[1]['frame'] # Use second frame which showed a person
print(f"+ Using frame at t={frames[1]['timestamp']:.1f}s")
except Exception as e:
print(f"- Frame error: {e}")
return
# Test specific prompts
test_prompts = [
"Count the number of people in this scene",
"How many people do you see?",
"one person or two people?",
"Describe what you see"
]
for prompt in test_prompts:
print(f"\n--- Prompt: '{prompt}' ---")
# Test CNN
try:
result = process_image_locally(test_frame, prompt, 'CNN (BLIP)', local_manager)
cnn_response = result.get('generated_text', 'No response') if 'error' not in result else f"Error: {result['error']}"
print(f"CNN: '{cnn_response}'")
except Exception as e:
print(f"CNN: Exception - {e}")
# Test Transformer
try:
result = process_image_locally(test_frame, prompt, 'Transformer (ViT-GPT2)', local_manager)
trans_response = result.get('generated_text', 'No response') if 'error' not in result else f"Error: {result['error']}"
print(f"Transformer: '{trans_response}'")
except Exception as e:
print(f"Transformer: Exception - {e}")
print("\n" + "=" * 40)
print("ANALYSIS:")
print("- Neither model is designed for counting")
print("- Both provide descriptions, not counts")
print("- Transformer (ViT-GPT2) is better for descriptions")
print("- CNN (BLIP) has prompt repetition issues")
print("\nRECOMMENDAT ION:")
print("Use descriptive prompts like:")
print(" 'Describe what you see'")
print(" 'What is happening in this image?'")
print("Rather than counting prompts.")
if __name__ == "__main__":
test_simple_counting()