bahngleis-detektor / test_yes_no_models.py
Migjomatic's picture
Remove HF token; use env var
8a74c03
raw
history blame
9.23 kB
#!/usr/bin/env python3
"""
Test multiple models for simple yes/no person detection
"""
import sys
import os
from io import BytesIO
import requests
import base64
from PIL import Image
# Add current directory to path
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
def test_yes_no_models():
"""Test multiple models for yes/no person detection"""
print("TESTING MULTIPLE MODELS FOR YES/NO PERSON DETECTION")
print("=" * 60)
try:
from local_models import get_local_model_manager
from app import extract_frames_from_video, process_image_locally, query_huggingface_api
print("+ Components loaded successfully")
except ImportError as e:
print(f"- Import error: {e}")
return
# Find video file
video_files = [f for f in os.listdir('.') if f.endswith('.mp4')]
if not video_files:
print("- No MP4 files found")
return
video_path = video_files[0]
print(f"+ Using video: {video_path[:50]}...")
# Extract 3 test frames
try:
with open(video_path, 'rb') as f:
video_data = f.read()
video_file = BytesIO(video_data)
frames = extract_frames_from_video(video_file, fps=0.3) # Every 3+ seconds
if len(frames) < 3:
print(f"- Only {len(frames)} frames extracted, need at least 3")
return
test_frames = frames[:3] # Use first 3 frames
print(f"+ Using {len(test_frames)} frames for testing")
except Exception as e:
print(f"- Frame extraction error: {e}")
return
# Initialize local models
try:
local_manager = get_local_model_manager()
print("+ Local models ready")
except Exception as e:
print(f"- Local model error: {e}")
return
# Define models to test
models_to_test = {
"Local CNN (BLIP)": {
"type": "local",
"model_name": "CNN (BLIP)",
"prompt": "Is there a person in this image? Answer only yes or no."
},
"Local Transformer": {
"type": "local",
"model_name": "Transformer (ViT-GPT2)",
"prompt": "Is there a person in this image? Answer only yes or no."
},
"Remote BLIP": {
"type": "remote",
"model_name": "Salesforce/blip-image-captioning-large",
"prompt": "Is there a person in this image? Answer only yes or no."
},
"Remote GIT": {
"type": "remote",
"model_name": "microsoft/git-large-coco",
"prompt": "Is there a person in this image? Answer only yes or no."
},
"Remote ViT-GPT2": {
"type": "remote",
"model_name": "nlpconnect/vit-gpt2-image-captioning",
"prompt": "Is there a person in this image? Answer only yes or no."
}
}
# API token (you may need to update this)
api_token = "os.getenv("HF_TOKEN")"
# Results storage
results = {}
print(f"\nTesting {len(models_to_test)} models on {len(test_frames)} frames:")
print("=" * 80)
# Test each model
for model_display_name, config in models_to_test.items():
print(f"\nTesting: {model_display_name}")
print("-" * 50)
model_results = []
for i, frame_data in enumerate(test_frames):
frame_num = i + 1
timestamp = frame_data['timestamp']
try:
if config["type"] == "local":
# Test local model
result = process_image_locally(
frame_data['frame'],
config["prompt"],
config["model_name"],
local_manager
)
if 'error' in result:
response = f"ERROR: {result['error']}"
yes_no = "ERROR"
else:
response = result.get('generated_text', 'No response')
yes_no = extract_yes_no(response)
else:
# Test remote model
result = query_huggingface_api(
frame_data['frame'],
config["prompt"],
config["model_name"],
api_token
)
if 'error' in result:
response = f"ERROR: {result['error']}"
yes_no = "ERROR"
else:
# Handle different response formats
if isinstance(result, list) and len(result) > 0:
response = result[0].get('generated_text', str(result[0]))
elif 'generated_text' in result:
response = result['generated_text']
else:
response = str(result)
yes_no = extract_yes_no(response)
model_results.append({
'frame': frame_num,
'timestamp': timestamp,
'response': response[:100] + "..." if len(response) > 100 else response,
'yes_no': yes_no
})
print(f" Frame {frame_num} ({timestamp:.1f}s): {yes_no} - {response[:50]}...")
except Exception as e:
model_results.append({
'frame': frame_num,
'timestamp': timestamp,
'response': f"Exception: {str(e)}",
'yes_no': "ERROR"
})
print(f" Frame {frame_num} ({timestamp:.1f}s): ERROR - {str(e)}")
results[model_display_name] = model_results
# Create comparison table
print(f"\n" + "=" * 80)
print("RESULTS COMPARISON TABLE")
print("=" * 80)
# Header
header = f"{'Frame':<8} {'Time':<8}"
for model_name in models_to_test.keys():
header += f" {model_name:<15}"
print(header)
print("-" * len(header))
# Data rows
for i in range(len(test_frames)):
frame_num = i + 1
timestamp = test_frames[i]['timestamp']
row = f"{frame_num:<8} {timestamp:<8.1f}"
for model_name in models_to_test.keys():
yes_no = results[model_name][i]['yes_no']
row += f" {yes_no:<15}"
print(row)
# Analysis and recommendation
print(f"\n" + "=" * 80)
print("ANALYSIS & RECOMMENDATION")
print("=" * 80)
# Count successful yes/no responses per model
model_scores = {}
for model_name, model_results in results.items():
success_count = sum(1 for r in model_results if r['yes_no'] in ['YES', 'NO'])
error_count = sum(1 for r in model_results if r['yes_no'] == 'ERROR')
unclear_count = sum(1 for r in model_results if r['yes_no'] == 'UNCLEAR')
model_scores[model_name] = {
'success': success_count,
'error': error_count,
'unclear': unclear_count,
'success_rate': success_count / len(model_results) * 100
}
print("\nModel Performance:")
print(f"{'Model':<20} {'Success':<8} {'Errors':<8} {'Unclear':<8} {'Success Rate':<12}")
print("-" * 70)
for model_name, scores in model_scores.items():
print(f"{model_name:<20} {scores['success']:<8} {scores['error']:<8} {scores['unclear']:<8} {scores['success_rate']:<12.1f}%")
# Find best model
best_model = max(model_scores.items(), key=lambda x: x[1]['success_rate'])
print(f"\n🏆 BEST MODEL: {best_model[0]}")
print(f" Success Rate: {best_model[1]['success_rate']:.1f}%")
print(f" Recommendation: Use this model for yes/no person detection")
return results, best_model[0]
def extract_yes_no(response):
"""Extract yes/no from model response"""
if not response:
return "UNCLEAR"
response_lower = response.lower().strip()
# Direct yes/no detection
if response_lower == "yes" or response_lower.startswith("yes"):
return "YES"
elif response_lower == "no" or response_lower.startswith("no"):
return "NO"
# Look for yes/no anywhere in response
if "yes" in response_lower and "no" not in response_lower:
return "YES"
elif "no" in response_lower and "yes" not in response_lower:
return "NO"
# Check for person-related keywords as backup
person_words = ['person', 'people', 'man', 'woman', 'boy', 'girl', 'human']
if any(word in response_lower for word in person_words):
return "YES"
# If response contains negative words
negative_words = ['not', 'none', 'empty', 'no one', 'nobody']
if any(word in response_lower for word in negative_words):
return "NO"
return "UNCLEAR"
if __name__ == "__main__":
test_yes_no_models()