""" Model evaluation script Evaluates models and generates metrics/plots """ import os import pandas as pd import numpy as np import pickle import json from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve, auc) import matplotlib.pyplot as plt import yaml def load_params(): """Load parameters from params.yaml""" with open("params.yaml", "r") as f: return yaml.safe_load(f) def create_evaluation_plots(y_true, y_pred, y_proba, strategy_type, output_dir="plots"): """Create evaluation plots""" os.makedirs(output_dir, exist_ok=True) # Confusion Matrix cm = confusion_matrix(y_true, y_pred) plt.figure(figsize=(8, 6)) plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues) plt.title(f'Confusion Matrix - {strategy_type} Strategy') plt.colorbar() tick_marks = np.arange(2) plt.xticks(tick_marks, ['HOLD', 'BUY']) plt.yticks(tick_marks, ['HOLD', 'BUY']) plt.ylabel('True label') plt.xlabel('Predicted label') # Add text annotations thresh = cm.max() / 2. for i, j in np.ndindex(cm.shape): plt.text(j, i, format(cm[i, j], 'd'), horizontalalignment="center", color="white" if cm[i, j] > thresh else "black") plt.tight_layout() plt.savefig(f"{output_dir}/confusion_matrix_{strategy_type.lower()}.png") plt.close() # ROC Curve (if probabilities available) if y_proba is not None and len(np.unique(y_true)) > 1: try: fpr, tpr, _ = roc_curve(y_true, y_proba) roc_auc = auc(fpr, tpr) plt.figure(figsize=(8, 6)) plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})') plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title(f'ROC Curve - {strategy_type} Strategy') plt.legend(loc="lower right") plt.tight_layout() plt.savefig(f"{output_dir}/roc_curve_{strategy_type.lower()}.png") plt.close() except: pass def main(): """Main evaluation function""" params = load_params() # Load data df = pd.read_parquet("data/processed/indicators.parquet") df = df.dropna(subset=["rsi", "sma_10", "sma_20"]) # Prepare features features = ["sma_10", "sma_20", "rsi", "volatility", "price_position"] X = df[features].fillna(0) os.makedirs("metrics", exist_ok=True) os.makedirs("plots", exist_ok=True) results = {} # Evaluate both strategies for strategy_type in ["TOP", "BOTTOM"]: model_path = f"models/{strategy_type.lower()}_strategy_model.pkl" if not os.path.exists(model_path): print(f"Model not found: {model_path}") continue # Load model with open(model_path, "rb") as f: model = pickle.load(f) # Create labels if strategy_type == "TOP": y = ((df["price_position"] > 70) & (df["rsi"] > 50) & (df["rsi"] < 70)).astype(int) else: y = ((df["price_position"] < 30) & (df["rsi"] < 30)).astype(int) # Predictions y_pred = model.predict(X) try: y_proba = model.predict_proba(X)[:, 1] except: y_proba = None # Metrics accuracy = accuracy_score(y, y_pred) precision = precision_score(y, y_pred, zero_division=0) recall = recall_score(y, y_pred, zero_division=0) f1 = f1_score(y, y_pred, zero_division=0) results[strategy_type] = { "accuracy": float(accuracy), "precision": float(precision), "recall": float(recall), "f1_score": float(f1), "n_samples": int(len(y)), "n_positive": int(y.sum()) } # Create plots create_evaluation_plots(y, y_pred, y_proba, strategy_type) print(f"{strategy_type} Strategy Evaluation:") print(f" Accuracy: {accuracy:.3f}") print(f" Precision: {precision:.3f}") print(f" Recall: {recall:.3f}") print(f" F1 Score: {f1:.3f}") # Save metrics with open("metrics/evaluation_metrics.json", "w") as f: json.dump(results, f, indent=2) print("\nEvaluation complete!") if __name__ == "__main__": main()