Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Simple test to see raw model outputs for counting | |
| """ | |
| import sys | |
| import os | |
| from io import BytesIO | |
| # Add current directory to path | |
| sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) | |
| def test_simple_counting(): | |
| """Test counting with both models""" | |
| print("Simple Counting Test") | |
| print("=" * 30) | |
| try: | |
| from local_models import get_local_model_manager | |
| from app import extract_frames_from_video, process_image_locally | |
| print("+ Imported successfully") | |
| except ImportError as e: | |
| print(f"- Import error: {e}") | |
| return | |
| # Find video file | |
| video_files = [f for f in os.listdir('.') if f.endswith('.mp4')] | |
| if not video_files: | |
| print("- No video files found") | |
| return | |
| video_path = video_files[0] | |
| print(f"+ Using: {video_path[:30]}...") | |
| # Get models | |
| try: | |
| local_manager = get_local_model_manager() | |
| print("+ Models ready") | |
| except Exception as e: | |
| print(f"- Error: {e}") | |
| return | |
| # Get one frame | |
| try: | |
| with open(video_path, 'rb') as f: | |
| video_data = f.read() | |
| video_file = BytesIO(video_data) | |
| frames = extract_frames_from_video(video_file, fps=0.1) | |
| if not frames: | |
| print("- No frames") | |
| return | |
| test_frame = frames[1]['frame'] # Use second frame which showed a person | |
| print(f"+ Using frame at t={frames[1]['timestamp']:.1f}s") | |
| except Exception as e: | |
| print(f"- Frame error: {e}") | |
| return | |
| # Test specific prompts | |
| test_prompts = [ | |
| "Count the number of people in this scene", | |
| "How many people do you see?", | |
| "one person or two people?", | |
| "Describe what you see" | |
| ] | |
| for prompt in test_prompts: | |
| print(f"\n--- Prompt: '{prompt}' ---") | |
| # Test CNN | |
| try: | |
| result = process_image_locally(test_frame, prompt, 'CNN (BLIP)', local_manager) | |
| cnn_response = result.get('generated_text', 'No response') if 'error' not in result else f"Error: {result['error']}" | |
| print(f"CNN: '{cnn_response}'") | |
| except Exception as e: | |
| print(f"CNN: Exception - {e}") | |
| # Test Transformer | |
| try: | |
| result = process_image_locally(test_frame, prompt, 'Transformer (ViT-GPT2)', local_manager) | |
| trans_response = result.get('generated_text', 'No response') if 'error' not in result else f"Error: {result['error']}" | |
| print(f"Transformer: '{trans_response}'") | |
| except Exception as e: | |
| print(f"Transformer: Exception - {e}") | |
| print("\n" + "=" * 40) | |
| print("ANALYSIS:") | |
| print("- Neither model is designed for counting") | |
| print("- Both provide descriptions, not counts") | |
| print("- Transformer (ViT-GPT2) is better for descriptions") | |
| print("- CNN (BLIP) has prompt repetition issues") | |
| print("\nRECOMMENDAT ION:") | |
| print("Use descriptive prompts like:") | |
| print(" 'Describe what you see'") | |
| print(" 'What is happening in this image?'") | |
| print("Rather than counting prompts.") | |
| if __name__ == "__main__": | |
| test_simple_counting() |