File size: 4,249 Bytes
b2b32ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import xgboost as xgb
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report
)
import seaborn as sns
import matplotlib.pyplot as plt


def train_and_evaluate_model():
    # Step 1: Generate synthetic dataset
    np.random.seed(42)
    n_records = 10000
    data = {
        'pe_ratio': np.random.uniform(5, 50, n_records),
        'de_ratio': np.random.uniform(0.1, 3.0, n_records),
        'roe': np.random.uniform(5, 40, n_records),
        'market_cap': np.random.uniform(500, 100000, n_records),
        'dividend_yield': np.random.uniform(0.5, 5.0, n_records),
        'stock_rating': np.random.choice(['Buy', 'Hold', 'Sell'], n_records, p=[0.4, 0.4, 0.2])
    }

    df = pd.DataFrame(data)

    # Step 2: Prepare data
    X = df.drop('stock_rating', axis=1)
    y = df['stock_rating']

    # Step 3: Encode target
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)

    # Step 4: Train/test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
    )

    # Step 5: Feature scaling
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Step 6: Train model using XGBoost
    model = xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss')
    model.fit(X_train_scaled, y_train)

    # Step 7: Predict
    y_pred = model.predict(X_test_scaled)

    # Step 8: Decode labels
    y_test_labels = le.inverse_transform(y_test)
    y_pred_labels = le.inverse_transform(y_pred)

    # Step 9: Metrics
    acc = accuracy_score(y_test_labels, y_pred_labels)
    prec = precision_score(y_test_labels, y_pred_labels, average='weighted', zero_division=0)
    rec = recall_score(y_test_labels, y_pred_labels, average='weighted', zero_division=0)
    f1 = f1_score(y_test_labels, y_pred_labels, average='weighted', zero_division=0)
    
    # Step 10: Create Classification Report as DataFrame (with zero_division fix)
    report_dict = classification_report(y_test_labels, y_pred_labels, output_dict=True, zero_division=0)
    report_df = pd.DataFrame(report_dict).transpose().round(2)

    # Step 11: Plot classification report as table with grid
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.axis('off')
    tbl = ax.table(
        cellText=report_df.values,
        colLabels=report_df.columns,
        rowLabels=report_df.index,
        cellLoc='center',
        loc='center'
    )
    tbl.auto_set_font_size(False)
    tbl.set_fontsize(10)
    tbl.scale(1.2, 1.2)
    for key, cell in tbl.get_celld().items():
        cell.set_linewidth(0.8)
    cr_path = "classification_report.png"
    plt.savefig(cr_path, bbox_inches='tight')
    plt.close()

    # Step 12: Confusion matrix
    cm = confusion_matrix(y_test_labels, y_pred_labels, labels=le.classes_)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=le.classes_, yticklabels=le.classes_)
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix")
    cm_path = "confusion_matrix.png"
    plt.savefig(cm_path, bbox_inches='tight')
    plt.close()

    # Step 13: Return outputs
    output = f"""
### ✅ Evaluation Metrics:
- **Accuracy:** {acc:.2f}
- **Precision:** {prec:.2f}
- **Recall:** {rec:.2f}
- **F1 Score:** {f1:.2f}
"""
    return output, cr_path, cm_path

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 Stock Rating Prediction Model Evaluation")
    gr.Markdown("Click the button below to train the model on synthetic stock data and evaluate its performance.")

    eval_btn = gr.Button("Run Model Evaluation")
    output_md = gr.Markdown()
    report_img = gr.Image(type="filepath", label="📊 Classification Report")
    cm_img = gr.Image(type="filepath", label="📉 Confusion Matrix")

    eval_btn.click(fn=train_and_evaluate_model,
                   outputs=[output_md, report_img, cm_img])

demo.launch()