import os import numpy as np import pandas as pd from sklearn.metrics import ( accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report, roc_curve ) from tensorflow.keras.models import load_model import matplotlib.pyplot as plt # Paths MODEL_PATH = "./models" DATASET_PATH = "./processed_datasets" # Model and dataset filenames MODELS = [ "final_model_binary_augmented.h5", "final_model_binary_log_mel.h5", "final_model_binary_mfcc.h5", "final_model_multi_augmented.h5", "final_model_multi_log_mel.h5", "final_model_multi_mfcc.h5" ] DATASETS = { "binary_augmented": ("X_test_binary_augmented.npy", "y_test_binary_augmented.npy"), "binary_log_mel": ("X_test_binary_log_mel.npy", "y_test_binary_log_mel.npy"), "binary_mfcc": ("X_test_binary_mfcc.npy", "y_test_binary_mfcc.npy"), "multi_augmented": ("X_test_multi_augmented.npy", "y_test_multi_augmented.npy"), "multi_log_mel": ("X_test_multi_log_mel.npy", "y_test_multi_log_mel.npy"), "multi_mfcc": ("X_test_multi_mfcc.npy", "y_test_multi_mfcc.npy") } # Metrics dictionary metrics_dict = [] # Function to evaluate a model def evaluate_model(model, X_test, y_test, mode): y_pred_prob = model.predict(X_test) y_pred = np.argmax(y_pred_prob, axis=1) y_true = np.argmax(y_test, axis=1) accuracy = accuracy_score(y_true, y_pred) precision = precision_score(y_true, y_pred, average='weighted') recall = recall_score(y_true, y_pred, average='weighted') f1 = f1_score(y_true, y_pred, average='weighted') auc = roc_auc_score(y_test, y_pred_prob, multi_class='ovr') conf_matrix = confusion_matrix(y_true, y_pred) print(f"--- Evaluation for {mode} ---") print(f"Accuracy: {accuracy:.4f}") print(f"Precision: {precision:.4f}") print(f"Recall: {recall:.4f}") print(f"F1 Score: {f1:.4f}") print(f"ROC-AUC: {auc:.4f}") print("Confusion Matrix:") print(conf_matrix) print("\n") # Log metrics metrics_dict.append({ "Model": mode, "Accuracy": accuracy, "Precision": precision, "Recall": recall, "F1 Score": f1, "ROC-AUC": auc }) # Plot ROC curve fpr = {} tpr = {} for i in range(y_test.shape[1]): fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred_prob[:, i]) plt.figure(figsize=(10, 6)) for i, label in enumerate(np.unique(y_true)): plt.plot(fpr[i], tpr[i], label=f"Class {label} ROC") plt.plot([0, 1], [0, 1], 'k--', label='Chance') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title(f"ROC Curve - {mode}") plt.legend() plt.savefig(f"roc_curve_{mode}.png") plt.close() # Evaluate all models for model_name in MODELS: mode_key = model_name.replace("final_model_", "").replace(".h5", "").replace(" ", "_").lower() dataset = DATASETS.get(mode_key) if dataset: # Load the model and dataset model_path = os.path.join(MODEL_PATH, model_name) model = load_model(model_path) X_test_path, y_test_path = dataset X_test = np.load(os.path.join(DATASET_PATH, X_test_path)) y_test = np.load(os.path.join(DATASET_PATH, y_test_path)) # Evaluate the model evaluate_model(model, X_test, y_test, mode_key) else: print(f"No dataset found for model: {model_name}") # Save metrics as a CSV metrics_df = pd.DataFrame(metrics_dict) metrics_df.to_csv("model_evaluation_summary.csv", index=False) print("Evaluation complete. Summary saved as 'model_evaluation_summary.csv'.")