Investment_Assistant / scripts /evaluate_model.py
Egeekle's picture
Add MLOps, RAG, monitoring, and utility dependencies to requirements.txt
7a658e1
"""
Model evaluation script
Evaluates models and generates metrics/plots
"""
import os
import pandas as pd
import numpy as np
import pickle
import json
from sklearn.metrics import (accuracy_score, precision_score, recall_score,
f1_score, confusion_matrix, roc_curve, auc)
import matplotlib.pyplot as plt
import yaml
def load_params():
"""Load parameters from params.yaml"""
with open("params.yaml", "r") as f:
return yaml.safe_load(f)
def create_evaluation_plots(y_true, y_pred, y_proba, strategy_type, output_dir="plots"):
"""Create evaluation plots"""
os.makedirs(output_dir, exist_ok=True)
# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(8, 6))
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title(f'Confusion Matrix - {strategy_type} Strategy')
plt.colorbar()
tick_marks = np.arange(2)
plt.xticks(tick_marks, ['HOLD', 'BUY'])
plt.yticks(tick_marks, ['HOLD', 'BUY'])
plt.ylabel('True label')
plt.xlabel('Predicted label')
# Add text annotations
thresh = cm.max() / 2.
for i, j in np.ndindex(cm.shape):
plt.text(j, i, format(cm[i, j], 'd'),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.savefig(f"{output_dir}/confusion_matrix_{strategy_type.lower()}.png")
plt.close()
# ROC Curve (if probabilities available)
if y_proba is not None and len(np.unique(y_true)) > 1:
try:
fpr, tpr, _ = roc_curve(y_true, y_proba)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2,
label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title(f'ROC Curve - {strategy_type} Strategy')
plt.legend(loc="lower right")
plt.tight_layout()
plt.savefig(f"{output_dir}/roc_curve_{strategy_type.lower()}.png")
plt.close()
except:
pass
def main():
"""Main evaluation function"""
params = load_params()
# Load data
df = pd.read_parquet("data/processed/indicators.parquet")
df = df.dropna(subset=["rsi", "sma_10", "sma_20"])
# Prepare features
features = ["sma_10", "sma_20", "rsi", "volatility", "price_position"]
X = df[features].fillna(0)
os.makedirs("metrics", exist_ok=True)
os.makedirs("plots", exist_ok=True)
results = {}
# Evaluate both strategies
for strategy_type in ["TOP", "BOTTOM"]:
model_path = f"models/{strategy_type.lower()}_strategy_model.pkl"
if not os.path.exists(model_path):
print(f"Model not found: {model_path}")
continue
# Load model
with open(model_path, "rb") as f:
model = pickle.load(f)
# Create labels
if strategy_type == "TOP":
y = ((df["price_position"] > 70) &
(df["rsi"] > 50) & (df["rsi"] < 70)).astype(int)
else:
y = ((df["price_position"] < 30) & (df["rsi"] < 30)).astype(int)
# Predictions
y_pred = model.predict(X)
try:
y_proba = model.predict_proba(X)[:, 1]
except:
y_proba = None
# Metrics
accuracy = accuracy_score(y, y_pred)
precision = precision_score(y, y_pred, zero_division=0)
recall = recall_score(y, y_pred, zero_division=0)
f1 = f1_score(y, y_pred, zero_division=0)
results[strategy_type] = {
"accuracy": float(accuracy),
"precision": float(precision),
"recall": float(recall),
"f1_score": float(f1),
"n_samples": int(len(y)),
"n_positive": int(y.sum())
}
# Create plots
create_evaluation_plots(y, y_pred, y_proba, strategy_type)
print(f"{strategy_type} Strategy Evaluation:")
print(f" Accuracy: {accuracy:.3f}")
print(f" Precision: {precision:.3f}")
print(f" Recall: {recall:.3f}")
print(f" F1 Score: {f1:.3f}")
# Save metrics
with open("metrics/evaluation_metrics.json", "w") as f:
json.dump(results, f, indent=2)
print("\nEvaluation complete!")
if __name__ == "__main__":
main()