Spaces:
Running
Running
File size: 4,723 Bytes
17f1739 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | #!/usr/bin/env python3
"""
Script to evaluate the certificate analyzer
"""
import argparse
import sys
from pathlib import Path
import json
from datetime import datetime
# Add app to path
sys.path.append(str(Path(__file__).parent.parent))
def evaluate_on_synthetic_data(analyzer, test_data_dir: str):
"""Evaluate analyzer on synthetic test data"""
test_path = Path(test_data_dir)
images_dir = test_path / "images"
labels_dir = test_path / "labels"
results = []
# Get test images
image_files = list(images_dir.glob("*.png"))[:100] # Test on 100 samples
print(f"Evaluating on {len(image_files)} test samples...")
for i, img_path in enumerate(image_files):
try:
# Load image
import cv2
image = cv2.imread(str(img_path))
if image is None:
print(f"Warning: Failed to load {img_path}")
continue
# Load ground truth
label_path = labels_dir / f"{img_path.stem}.json"
if not label_path.exists():
print(f"Warning: No label for {img_path.name}")
continue
with open(label_path, 'r') as f:
ground_truth = json.load(f)
# Analyze with analyzer
# Note: This requires the analyzer to have a method for direct image analysis
# For now, we'll simulate analysis
# Calculate metrics
result = {
"image": img_path.name,
"ground_truth": ground_truth,
"analysis_time": 0.0,
"success": True
}
results.append(result)
if (i + 1) % 10 == 0:
print(f"Processed {i + 1}/{len(image_files)} samples")
except Exception as e:
print(f"Error processing {img_path.name}: {e}")
results.append({
"image": img_path.name,
"error": str(e),
"success": False
})
return results
def calculate_metrics(results):
"""Calculate evaluation metrics"""
successful = [r for r in results if r.get('success', False)]
total = len(results)
if total == 0:
return {
"total_samples": 0,
"success_rate": 0.0,
"average_time": 0.0
}
# Calculate average processing time
processing_times = [r.get('analysis_time', 0) for r in successful]
avg_time = sum(processing_times) / len(processing_times) if processing_times else 0
# Calculate field extraction accuracy (simplified)
field_accuracy = {
"name": 0.0,
"student_id": 0.0,
"university": 0.0,
"course": 0.0
}
# This would require comparing extracted fields with ground truth
# For now, we'll return placeholder metrics
return {
"total_samples": total,
"successful_samples": len(successful),
"success_rate": len(successful) / total,
"average_processing_time": avg_time,
"field_accuracy": field_accuracy,
"evaluation_timestamp": datetime.now().isoformat()
}
def main():
parser = argparse.ArgumentParser(description="Evaluate certificate analyzer")
parser.add_argument("--test-data", type=str, default="data/training/synthetic",
help="Directory with test data")
parser.add_argument("--output", type=str, default="evaluation_results.json",
help="Output file for results")
args = parser.parse_args()
print("Initializing analyzer...")
from app.analyzers.certificate_analyzer import ProductionCertificateAnalyzer
analyzer = ProductionCertificateAnalyzer(use_ml=True)
print(f"Running evaluation on {args.test_data}...")
results = evaluate_on_synthetic_data(analyzer, args.test_data)
print("Calculating metrics...")
metrics = calculate_metrics(results)
# Save results
output_data = {
"evaluation": metrics,
"sample_results": results[:10], # Include first 10 results
"timestamp": datetime.now().isoformat()
}
with open(args.output, 'w') as f:
json.dump(output_data, f, indent=2)
print(f"\n✅ Evaluation complete!")
print(f" Results saved to: {args.output}")
print(f"\nMetrics:")
print(f" Total samples: {metrics['total_samples']}")
print(f" Success rate: {metrics['success_rate']:.1%}")
print(f" Avg processing time: {metrics['average_processing_time']:.2f}s")
if __name__ == "__main__":
main() |