Spaces:

Egeekle
/

Investment_Assistant

Build error

File size: 4,685 Bytes

7a658e1

"""
Model evaluation script
Evaluates models and generates metrics/plots
"""
import os
import pandas as pd
import numpy as np
import pickle
import json
from sklearn.metrics import (accuracy_score, precision_score, recall_score, 
                            f1_score, confusion_matrix, roc_curve, auc)
import matplotlib.pyplot as plt
import yaml

def load_params():
    """Load parameters from params.yaml"""
    with open("params.yaml", "r") as f:
        return yaml.safe_load(f)

def create_evaluation_plots(y_true, y_pred, y_proba, strategy_type, output_dir="plots"):
    """Create evaluation plots"""
    os.makedirs(output_dir, exist_ok=True)
    
    # Confusion Matrix
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(f'Confusion Matrix - {strategy_type} Strategy')
    plt.colorbar()
    tick_marks = np.arange(2)
    plt.xticks(tick_marks, ['HOLD', 'BUY'])
    plt.yticks(tick_marks, ['HOLD', 'BUY'])
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
    # Add text annotations
    thresh = cm.max() / 2.
    for i, j in np.ndindex(cm.shape):
        plt.text(j, i, format(cm[i, j], 'd'),
                horizontalalignment="center",
                color="white" if cm[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.savefig(f"{output_dir}/confusion_matrix_{strategy_type.lower()}.png")
    plt.close()
    
    # ROC Curve (if probabilities available)
    if y_proba is not None and len(np.unique(y_true)) > 1:
        try:
            fpr, tpr, _ = roc_curve(y_true, y_proba)
            roc_auc = auc(fpr, tpr)
            
            plt.figure(figsize=(8, 6))
            plt.plot(fpr, tpr, color='darkorange', lw=2, 
                    label=f'ROC curve (AUC = {roc_auc:.2f})')
            plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
            plt.xlim([0.0, 1.0])
            plt.ylim([0.0, 1.05])
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title(f'ROC Curve - {strategy_type} Strategy')
            plt.legend(loc="lower right")
            plt.tight_layout()
            plt.savefig(f"{output_dir}/roc_curve_{strategy_type.lower()}.png")
            plt.close()
        except:
            pass

def main():
    """Main evaluation function"""
    params = load_params()
    
    # Load data
    df = pd.read_parquet("data/processed/indicators.parquet")
    df = df.dropna(subset=["rsi", "sma_10", "sma_20"])
    
    # Prepare features
    features = ["sma_10", "sma_20", "rsi", "volatility", "price_position"]
    X = df[features].fillna(0)
    
    os.makedirs("metrics", exist_ok=True)
    os.makedirs("plots", exist_ok=True)
    
    results = {}
    
    # Evaluate both strategies
    for strategy_type in ["TOP", "BOTTOM"]:
        model_path = f"models/{strategy_type.lower()}_strategy_model.pkl"
        if not os.path.exists(model_path):
            print(f"Model not found: {model_path}")
            continue
        
        # Load model
        with open(model_path, "rb") as f:
            model = pickle.load(f)
        
        # Create labels
        if strategy_type == "TOP":
            y = ((df["price_position"] > 70) & 
                 (df["rsi"] > 50) & (df["rsi"] < 70)).astype(int)
        else:
            y = ((df["price_position"] < 30) & (df["rsi"] < 30)).astype(int)
        
        # Predictions
        y_pred = model.predict(X)
        try:
            y_proba = model.predict_proba(X)[:, 1]
        except:
            y_proba = None
        
        # Metrics
        accuracy = accuracy_score(y, y_pred)
        precision = precision_score(y, y_pred, zero_division=0)
        recall = recall_score(y, y_pred, zero_division=0)
        f1 = f1_score(y, y_pred, zero_division=0)
        
        results[strategy_type] = {
            "accuracy": float(accuracy),
            "precision": float(precision),
            "recall": float(recall),
            "f1_score": float(f1),
            "n_samples": int(len(y)),
            "n_positive": int(y.sum())
        }
        
        # Create plots
        create_evaluation_plots(y, y_pred, y_proba, strategy_type)
        
        print(f"{strategy_type} Strategy Evaluation:")
        print(f"  Accuracy: {accuracy:.3f}")
        print(f"  Precision: {precision:.3f}")
        print(f"  Recall: {recall:.3f}")
        print(f"  F1 Score: {f1:.3f}")
    
    # Save metrics
    with open("metrics/evaluation_metrics.json", "w") as f:
        json.dump(results, f, indent=2)
    
    print("\nEvaluation complete!")

if __name__ == "__main__":
    main()