Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Script to evaluate the certificate analyzer | |
| """ | |
| import argparse | |
| import sys | |
| from pathlib import Path | |
| import json | |
| from datetime import datetime | |
| # Add app to path | |
| sys.path.append(str(Path(__file__).parent.parent)) | |
| def evaluate_on_synthetic_data(analyzer, test_data_dir: str): | |
| """Evaluate analyzer on synthetic test data""" | |
| test_path = Path(test_data_dir) | |
| images_dir = test_path / "images" | |
| labels_dir = test_path / "labels" | |
| results = [] | |
| # Get test images | |
| image_files = list(images_dir.glob("*.png"))[:100] # Test on 100 samples | |
| print(f"Evaluating on {len(image_files)} test samples...") | |
| for i, img_path in enumerate(image_files): | |
| try: | |
| # Load image | |
| import cv2 | |
| image = cv2.imread(str(img_path)) | |
| if image is None: | |
| print(f"Warning: Failed to load {img_path}") | |
| continue | |
| # Load ground truth | |
| label_path = labels_dir / f"{img_path.stem}.json" | |
| if not label_path.exists(): | |
| print(f"Warning: No label for {img_path.name}") | |
| continue | |
| with open(label_path, 'r') as f: | |
| ground_truth = json.load(f) | |
| # Analyze with analyzer | |
| # Note: This requires the analyzer to have a method for direct image analysis | |
| # For now, we'll simulate analysis | |
| # Calculate metrics | |
| result = { | |
| "image": img_path.name, | |
| "ground_truth": ground_truth, | |
| "analysis_time": 0.0, | |
| "success": True | |
| } | |
| results.append(result) | |
| if (i + 1) % 10 == 0: | |
| print(f"Processed {i + 1}/{len(image_files)} samples") | |
| except Exception as e: | |
| print(f"Error processing {img_path.name}: {e}") | |
| results.append({ | |
| "image": img_path.name, | |
| "error": str(e), | |
| "success": False | |
| }) | |
| return results | |
| def calculate_metrics(results): | |
| """Calculate evaluation metrics""" | |
| successful = [r for r in results if r.get('success', False)] | |
| total = len(results) | |
| if total == 0: | |
| return { | |
| "total_samples": 0, | |
| "success_rate": 0.0, | |
| "average_time": 0.0 | |
| } | |
| # Calculate average processing time | |
| processing_times = [r.get('analysis_time', 0) for r in successful] | |
| avg_time = sum(processing_times) / len(processing_times) if processing_times else 0 | |
| # Calculate field extraction accuracy (simplified) | |
| field_accuracy = { | |
| "name": 0.0, | |
| "student_id": 0.0, | |
| "university": 0.0, | |
| "course": 0.0 | |
| } | |
| # This would require comparing extracted fields with ground truth | |
| # For now, we'll return placeholder metrics | |
| return { | |
| "total_samples": total, | |
| "successful_samples": len(successful), | |
| "success_rate": len(successful) / total, | |
| "average_processing_time": avg_time, | |
| "field_accuracy": field_accuracy, | |
| "evaluation_timestamp": datetime.now().isoformat() | |
| } | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Evaluate certificate analyzer") | |
| parser.add_argument("--test-data", type=str, default="data/training/synthetic", | |
| help="Directory with test data") | |
| parser.add_argument("--output", type=str, default="evaluation_results.json", | |
| help="Output file for results") | |
| args = parser.parse_args() | |
| print("Initializing analyzer...") | |
| from app.analyzers.certificate_analyzer import ProductionCertificateAnalyzer | |
| analyzer = ProductionCertificateAnalyzer(use_ml=True) | |
| print(f"Running evaluation on {args.test_data}...") | |
| results = evaluate_on_synthetic_data(analyzer, args.test_data) | |
| print("Calculating metrics...") | |
| metrics = calculate_metrics(results) | |
| # Save results | |
| output_data = { | |
| "evaluation": metrics, | |
| "sample_results": results[:10], # Include first 10 results | |
| "timestamp": datetime.now().isoformat() | |
| } | |
| with open(args.output, 'w') as f: | |
| json.dump(output_data, f, indent=2) | |
| print(f"\n✅ Evaluation complete!") | |
| print(f" Results saved to: {args.output}") | |
| print(f"\nMetrics:") | |
| print(f" Total samples: {metrics['total_samples']}") | |
| print(f" Success rate: {metrics['success_rate']:.1%}") | |
| print(f" Avg processing time: {metrics['average_processing_time']:.2f}s") | |
| if __name__ == "__main__": | |
| main() |