Spaces:

magnumical
/

amp

Sleeping

App Files Files Community

amp / TestModels.py

magnumical

Upload 72 files

279af50 verified about 1 year ago

raw

history blame contribute delete

3.74 kB



	import os
	import numpy as np
	import pandas as pd
	from sklearn.metrics import (
	accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report, roc_curve
	)
	from tensorflow.keras.models import load_model
	import matplotlib.pyplot as plt

	# Paths
	MODEL_PATH = "./models"
	DATASET_PATH = "./processed_datasets"

	# Model and dataset filenames
	MODELS = [
	"final_model_binary_augmented.h5",
	"final_model_binary_log_mel.h5",
	"final_model_binary_mfcc.h5",
	"final_model_multi_augmented.h5",
	"final_model_multi_log_mel.h5",
	"final_model_multi_mfcc.h5"
	]

	DATASETS = {
	"binary_augmented": ("X_test_binary_augmented.npy", "y_test_binary_augmented.npy"),
	"binary_log_mel": ("X_test_binary_log_mel.npy", "y_test_binary_log_mel.npy"),
	"binary_mfcc": ("X_test_binary_mfcc.npy", "y_test_binary_mfcc.npy"),
	"multi_augmented": ("X_test_multi_augmented.npy", "y_test_multi_augmented.npy"),
	"multi_log_mel": ("X_test_multi_log_mel.npy", "y_test_multi_log_mel.npy"),
	"multi_mfcc": ("X_test_multi_mfcc.npy", "y_test_multi_mfcc.npy")
	}

	# Metrics dictionary
	metrics_dict = []

	# Function to evaluate a model
	def evaluate_model(model, X_test, y_test, mode):
	y_pred_prob = model.predict(X_test)
	y_pred = np.argmax(y_pred_prob, axis=1)
	y_true = np.argmax(y_test, axis=1)

	accuracy = accuracy_score(y_true, y_pred)
	precision = precision_score(y_true, y_pred, average='weighted')
	recall = recall_score(y_true, y_pred, average='weighted')
	f1 = f1_score(y_true, y_pred, average='weighted')
	auc = roc_auc_score(y_test, y_pred_prob, multi_class='ovr')
	conf_matrix = confusion_matrix(y_true, y_pred)

	print(f"--- Evaluation for {mode} ---")
	print(f"Accuracy: {accuracy:.4f}")
	print(f"Precision: {precision:.4f}")
	print(f"Recall: {recall:.4f}")
	print(f"F1 Score: {f1:.4f}")
	print(f"ROC-AUC: {auc:.4f}")
	print("Confusion Matrix:")
	print(conf_matrix)
	print("\n")

	# Log metrics
	metrics_dict.append({
	"Model": mode,
	"Accuracy": accuracy,
	"Precision": precision,
	"Recall": recall,
	"F1 Score": f1,
	"ROC-AUC": auc
	})

	# Plot ROC curve
	fpr = {}
	tpr = {}
	for i in range(y_test.shape[1]):
	fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_pred_prob[:, i])
	plt.figure(figsize=(10, 6))
	for i, label in enumerate(np.unique(y_true)):
	plt.plot(fpr[i], tpr[i], label=f"Class {label} ROC")
	plt.plot([0, 1], [0, 1], 'k--', label='Chance')
	plt.xlabel('False Positive Rate')
	plt.ylabel('True Positive Rate')
	plt.title(f"ROC Curve - {mode}")
	plt.legend()
	plt.savefig(f"roc_curve_{mode}.png")
	plt.close()

	# Evaluate all models
	for model_name in MODELS:
	mode_key = model_name.replace("final_model_", "").replace(".h5", "").replace(" ", "_").lower()
	dataset = DATASETS.get(mode_key)

	if dataset:
	# Load the model and dataset
	model_path = os.path.join(MODEL_PATH, model_name)
	model = load_model(model_path)

	X_test_path, y_test_path = dataset
	X_test = np.load(os.path.join(DATASET_PATH, X_test_path))
	y_test = np.load(os.path.join(DATASET_PATH, y_test_path))

	# Evaluate the model
	evaluate_model(model, X_test, y_test, mode_key)
	else:
	print(f"No dataset found for model: {model_name}")

	# Save metrics as a CSV
	metrics_df = pd.DataFrame(metrics_dict)
	metrics_df.to_csv("model_evaluation_summary.csv", index=False)
	print("Evaluation complete. Summary saved as 'model_evaluation_summary.csv'.")