Spaces:

Migjomatic
/

bahngleis-detektor

Running

File size: 3,259 Bytes

8a74c03

#!/usr/bin/env python3
"""
Simple test to see raw model outputs for counting
"""
import sys
import os
from io import BytesIO

# Add current directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))

def test_simple_counting():
    """Test counting with both models"""
    print("Simple Counting Test")
    print("=" * 30)
    
    try:
        from local_models import get_local_model_manager
        from app import extract_frames_from_video, process_image_locally
        print("+ Imported successfully")
    except ImportError as e:
        print(f"- Import error: {e}")
        return
    
    # Find video file
    video_files = [f for f in os.listdir('.') if f.endswith('.mp4')]
    if not video_files:
        print("- No video files found")
        return
    
    video_path = video_files[0]
    print(f"+ Using: {video_path[:30]}...")
    
    # Get models
    try:
        local_manager = get_local_model_manager()
        print("+ Models ready")
    except Exception as e:
        print(f"- Error: {e}")
        return
    
    # Get one frame
    try:
        with open(video_path, 'rb') as f:
            video_data = f.read()
        
        video_file = BytesIO(video_data)
        frames = extract_frames_from_video(video_file, fps=0.1)
        
        if not frames:
            print("- No frames")
            return
        
        test_frame = frames[1]['frame']  # Use second frame which showed a person
        print(f"+ Using frame at t={frames[1]['timestamp']:.1f}s")
        
    except Exception as e:
        print(f"- Frame error: {e}")
        return
    
    # Test specific prompts
    test_prompts = [
        "Count the number of people in this scene",
        "How many people do you see?", 
        "one person or two people?",
        "Describe what you see"
    ]
    
    for prompt in test_prompts:
        print(f"\n--- Prompt: '{prompt}' ---")
        
        # Test CNN
        try:
            result = process_image_locally(test_frame, prompt, 'CNN (BLIP)', local_manager)
            cnn_response = result.get('generated_text', 'No response') if 'error' not in result else f"Error: {result['error']}"
            print(f"CNN: '{cnn_response}'")
        except Exception as e:
            print(f"CNN: Exception - {e}")
        
        # Test Transformer  
        try:
            result = process_image_locally(test_frame, prompt, 'Transformer (ViT-GPT2)', local_manager)
            trans_response = result.get('generated_text', 'No response') if 'error' not in result else f"Error: {result['error']}"
            print(f"Transformer: '{trans_response}'")
        except Exception as e:
            print(f"Transformer: Exception - {e}")
    
    print("\n" + "=" * 40)
    print("ANALYSIS:")
    print("- Neither model is designed for counting")
    print("- Both provide descriptions, not counts") 
    print("- Transformer (ViT-GPT2) is better for descriptions")
    print("- CNN (BLIP) has prompt repetition issues")
    print("\nRECOMMENDAT ION:")
    print("Use descriptive prompts like:")
    print("  'Describe what you see'")
    print("  'What is happening in this image?'")
    print("Rather than counting prompts.")

if __name__ == "__main__":
    test_simple_counting()