| import csv | |
| from benchmarking import BenchmarkEvaluator | |
| import logging | |
| import glob | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| def read_models(models_file): | |
| """Read model names from text file""" | |
| try: | |
| with open(models_file, 'r') as f: | |
| return [line.strip() for line in f if line.strip()] | |
| except Exception as e: | |
| logger.error(f"Error reading models file: {e}") | |
| return [] | |
| def evaluate_model(model_name, file_path): | |
| """Evaluate a single model and return its results""" | |
| try: | |
| evaluator = BenchmarkEvaluator(model_name) | |
| results = evaluator.run_benchmark(file_path) | |
| return { | |
| 'model_name': model_name, | |
| 'accuracy': results['accuracy'], | |
| 'total_samples': results['total_samples'], | |
| 'processed_samples': results['processed_samples'] | |
| } | |
| except Exception as e: | |
| logger.error(f"Error evaluating model {model_name}: {e}") | |
| return None | |
| def save_results_to_csv(results, output_file='model_results.csv'): | |
| """Save evaluation results to CSV file""" | |
| fieldnames = ['model_name', 'accuracy', 'total_samples', 'processed_samples'] | |
| try: | |
| with open(output_file, 'w', newline='') as csvfile: | |
| writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
| writer.writeheader() | |
| writer.writerows(results) | |
| logger.info(f"Results saved to {output_file}") | |
| except Exception as e: | |
| logger.error(f"Error saving results to CSV: {e}") | |
| def main(): | |
| FILE_PATH = "_Benchmarking_DB.json" | |
| models = [ | |
| "../kista_checkpoint-1199", | |
| "../kista_checkpoint-2398", | |
| "../kista_checkpoint-3597" | |
| ] | |
| if not models: | |
| logger.error("No models found in file") | |
| return | |
| all_results = [] | |
| for model_name in models: | |
| logger.info(f"Evaluating model: {model_name}") | |
| result = evaluate_model(model_name, FILE_PATH) | |
| if result: | |
| all_results.append(result) | |
| print(f"\nResults for {model_name}:") | |
| print(f"Accuracy: {result['accuracy']:.2%}") | |
| print(f"Total samples: {result['total_samples']}") | |
| print(f"Processed_samples: {result['processed_samples']}") | |
| save_results_to_csv(all_results) | |
| if __name__ == "__main__": | |
| main() | |