ai-certificate / scripts /evaluate.py
jo8780's picture
Initial Space deploy: AI Certificate Analyzer
17f1739
#!/usr/bin/env python3
"""
Script to evaluate the certificate analyzer
"""
import argparse
import sys
from pathlib import Path
import json
from datetime import datetime
# Add app to path
sys.path.append(str(Path(__file__).parent.parent))
def evaluate_on_synthetic_data(analyzer, test_data_dir: str):
"""Evaluate analyzer on synthetic test data"""
test_path = Path(test_data_dir)
images_dir = test_path / "images"
labels_dir = test_path / "labels"
results = []
# Get test images
image_files = list(images_dir.glob("*.png"))[:100] # Test on 100 samples
print(f"Evaluating on {len(image_files)} test samples...")
for i, img_path in enumerate(image_files):
try:
# Load image
import cv2
image = cv2.imread(str(img_path))
if image is None:
print(f"Warning: Failed to load {img_path}")
continue
# Load ground truth
label_path = labels_dir / f"{img_path.stem}.json"
if not label_path.exists():
print(f"Warning: No label for {img_path.name}")
continue
with open(label_path, 'r') as f:
ground_truth = json.load(f)
# Analyze with analyzer
# Note: This requires the analyzer to have a method for direct image analysis
# For now, we'll simulate analysis
# Calculate metrics
result = {
"image": img_path.name,
"ground_truth": ground_truth,
"analysis_time": 0.0,
"success": True
}
results.append(result)
if (i + 1) % 10 == 0:
print(f"Processed {i + 1}/{len(image_files)} samples")
except Exception as e:
print(f"Error processing {img_path.name}: {e}")
results.append({
"image": img_path.name,
"error": str(e),
"success": False
})
return results
def calculate_metrics(results):
"""Calculate evaluation metrics"""
successful = [r for r in results if r.get('success', False)]
total = len(results)
if total == 0:
return {
"total_samples": 0,
"success_rate": 0.0,
"average_time": 0.0
}
# Calculate average processing time
processing_times = [r.get('analysis_time', 0) for r in successful]
avg_time = sum(processing_times) / len(processing_times) if processing_times else 0
# Calculate field extraction accuracy (simplified)
field_accuracy = {
"name": 0.0,
"student_id": 0.0,
"university": 0.0,
"course": 0.0
}
# This would require comparing extracted fields with ground truth
# For now, we'll return placeholder metrics
return {
"total_samples": total,
"successful_samples": len(successful),
"success_rate": len(successful) / total,
"average_processing_time": avg_time,
"field_accuracy": field_accuracy,
"evaluation_timestamp": datetime.now().isoformat()
}
def main():
parser = argparse.ArgumentParser(description="Evaluate certificate analyzer")
parser.add_argument("--test-data", type=str, default="data/training/synthetic",
help="Directory with test data")
parser.add_argument("--output", type=str, default="evaluation_results.json",
help="Output file for results")
args = parser.parse_args()
print("Initializing analyzer...")
from app.analyzers.certificate_analyzer import ProductionCertificateAnalyzer
analyzer = ProductionCertificateAnalyzer(use_ml=True)
print(f"Running evaluation on {args.test_data}...")
results = evaluate_on_synthetic_data(analyzer, args.test_data)
print("Calculating metrics...")
metrics = calculate_metrics(results)
# Save results
output_data = {
"evaluation": metrics,
"sample_results": results[:10], # Include first 10 results
"timestamp": datetime.now().isoformat()
}
with open(args.output, 'w') as f:
json.dump(output_data, f, indent=2)
print(f"\n✅ Evaluation complete!")
print(f" Results saved to: {args.output}")
print(f"\nMetrics:")
print(f" Total samples: {metrics['total_samples']}")
print(f" Success rate: {metrics['success_rate']:.1%}")
print(f" Avg processing time: {metrics['average_processing_time']:.2f}s")
if __name__ == "__main__":
main()