File size: 4,249 Bytes

b2b32ab

import gradio as gr
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import xgboost as xgb
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report
)
import seaborn as sns
import matplotlib.pyplot as plt


def train_and_evaluate_model():
    # Step 1: Generate synthetic dataset
    np.random.seed(42)
    n_records = 10000
    data = {
        'pe_ratio': np.random.uniform(5, 50, n_records),
        'de_ratio': np.random.uniform(0.1, 3.0, n_records),
        'roe': np.random.uniform(5, 40, n_records),
        'market_cap': np.random.uniform(500, 100000, n_records),
        'dividend_yield': np.random.uniform(0.5, 5.0, n_records),
        'stock_rating': np.random.choice(['Buy', 'Hold', 'Sell'], n_records, p=[0.4, 0.4, 0.2])
    }

    df = pd.DataFrame(data)

    # Step 2: Prepare data
    X = df.drop('stock_rating', axis=1)
    y = df['stock_rating']

    # Step 3: Encode target
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)

    # Step 4: Train/test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
    )

    # Step 5: Feature scaling
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Step 6: Train model using XGBoost
    model = xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss')
    model.fit(X_train_scaled, y_train)

    # Step 7: Predict
    y_pred = model.predict(X_test_scaled)

    # Step 8: Decode labels
    y_test_labels = le.inverse_transform(y_test)
    y_pred_labels = le.inverse_transform(y_pred)

    # Step 9: Metrics
    acc = accuracy_score(y_test_labels, y_pred_labels)
    prec = precision_score(y_test_labels, y_pred_labels, average='weighted', zero_division=0)
    rec = recall_score(y_test_labels, y_pred_labels, average='weighted', zero_division=0)
    f1 = f1_score(y_test_labels, y_pred_labels, average='weighted', zero_division=0)
    
    # Step 10: Create Classification Report as DataFrame (with zero_division fix)
    report_dict = classification_report(y_test_labels, y_pred_labels, output_dict=True, zero_division=0)
    report_df = pd.DataFrame(report_dict).transpose().round(2)

    # Step 11: Plot classification report as table with grid
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.axis('off')
    tbl = ax.table(
        cellText=report_df.values,
        colLabels=report_df.columns,
        rowLabels=report_df.index,
        cellLoc='center',
        loc='center'
    )
    tbl.auto_set_font_size(False)
    tbl.set_fontsize(10)
    tbl.scale(1.2, 1.2)
    for key, cell in tbl.get_celld().items():
        cell.set_linewidth(0.8)
    cr_path = "classification_report.png"
    plt.savefig(cr_path, bbox_inches='tight')
    plt.close()

    # Step 12: Confusion matrix
    cm = confusion_matrix(y_test_labels, y_pred_labels, labels=le.classes_)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=le.classes_, yticklabels=le.classes_)
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix")
    cm_path = "confusion_matrix.png"
    plt.savefig(cm_path, bbox_inches='tight')
    plt.close()

    # Step 13: Return outputs
    output = f"""
### ✅ Evaluation Metrics:
- **Accuracy:** {acc:.2f}
- **Precision:** {prec:.2f}
- **Recall:** {rec:.2f}
- **F1 Score:** {f1:.2f}
"""
    return output, cr_path, cm_path

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 Stock Rating Prediction Model Evaluation")
    gr.Markdown("Click the button below to train the model on synthetic stock data and evaluate its performance.")

    eval_btn = gr.Button("Run Model Evaluation")
    output_md = gr.Markdown()
    report_img = gr.Image(type="filepath", label="📊 Classification Report")
    cm_img = gr.Image(type="filepath", label="📉 Confusion Matrix")

    eval_btn.click(fn=train_and_evaluate_model,
                   outputs=[output_md, report_img, cm_img])

demo.launch()