amp / TestModels.py
magnumical's picture
Upload 72 files
279af50 verified
import os
import numpy as np
import pandas as pd
from sklearn.metrics import (
accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report, roc_curve
)
from tensorflow.keras.models import load_model
import matplotlib.pyplot as plt
# Paths
MODEL_PATH = "./models"
DATASET_PATH = "./processed_datasets"
# Model and dataset filenames
MODELS = [
"final_model_binary_augmented.h5",
"final_model_binary_log_mel.h5",
"final_model_binary_mfcc.h5",
"final_model_multi_augmented.h5",
"final_model_multi_log_mel.h5",
"final_model_multi_mfcc.h5"
]
DATASETS = {
"binary_augmented": ("X_test_binary_augmented.npy", "y_test_binary_augmented.npy"),
"binary_log_mel": ("X_test_binary_log_mel.npy", "y_test_binary_log_mel.npy"),
"binary_mfcc": ("X_test_binary_mfcc.npy", "y_test_binary_mfcc.npy"),
"multi_augmented": ("X_test_multi_augmented.npy", "y_test_multi_augmented.npy"),
"multi_log_mel": ("X_test_multi_log_mel.npy", "y_test_multi_log_mel.npy"),
"multi_mfcc": ("X_test_multi_mfcc.npy", "y_test_multi_mfcc.npy")
}
# Metrics dictionary
metrics_dict = []
# Function to evaluate a model
def evaluate_model(model, X_test, y_test, mode):
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = np.argmax(y_test, axis=1)
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')
auc = roc_auc_score(y_test, y_pred_prob, multi_class='ovr')
conf_matrix = confusion_matrix(y_true, y_pred)
print(f"--- Evaluation for {mode} ---")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC-AUC: {auc:.4f}")
print("Confusion Matrix:")
print(conf_matrix)
print("\n")
# Log metrics
metrics_dict.append({
"Model": mode,
"Accuracy": accuracy,
"Precision": precision,
"Recall": recall,
"F1 Score": f1,
"ROC-AUC": auc
})
# Plot ROC curve
fpr = {}
tpr = {}
for i in range(y_test.shape[1]):
fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred_prob[:, i])
plt.figure(figsize=(10, 6))
for i, label in enumerate(np.unique(y_true)):
plt.plot(fpr[i], tpr[i], label=f"Class {label} ROC")
plt.plot([0, 1], [0, 1], 'k--', label='Chance')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title(f"ROC Curve - {mode}")
plt.legend()
plt.savefig(f"roc_curve_{mode}.png")
plt.close()
# Evaluate all models
for model_name in MODELS:
mode_key = model_name.replace("final_model_", "").replace(".h5", "").replace(" ", "_").lower()
dataset = DATASETS.get(mode_key)
if dataset:
# Load the model and dataset
model_path = os.path.join(MODEL_PATH, model_name)
model = load_model(model_path)
X_test_path, y_test_path = dataset
X_test = np.load(os.path.join(DATASET_PATH, X_test_path))
y_test = np.load(os.path.join(DATASET_PATH, y_test_path))
# Evaluate the model
evaluate_model(model, X_test, y_test, mode_key)
else:
print(f"No dataset found for model: {model_name}")
# Save metrics as a CSV
metrics_df = pd.DataFrame(metrics_dict)
metrics_df.to_csv("model_evaluation_summary.csv", index=False)
print("Evaluation complete. Summary saved as 'model_evaluation_summary.csv'.")