File size: 4,220 Bytes
206cf5e
ffeb2d2
df4599b
ffeb2d2
 
 
18cac6a
 
 
 
634ffc9
 
 
abb88cb
206cf5e
abb88cb
206cf5e
abb88cb
 
 
 
 
 
 
 
 
634ffc9
abb88cb
 
 
206cf5e
 
abb88cb
 
206cf5e
abb88cb
206cf5e
abb88cb
206cf5e
abb88cb
206cf5e
abb88cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d2a687
 
 
634ffc9
 
0d2a687
18cac6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
634ffc9
18cac6a
 
 
 
abb88cb
 
206cf5e
 
 
 
 
634ffc9
18cac6a
206cf5e
 
18cac6a
abb88cb
 
 
 
 
 
206cf5e
18cac6a
206cf5e
abb88cb
df4599b
abb88cb
 
 
469362a
abb88cb
18cac6a
 
abb88cb
18cac6a
 
df4599b
634ffc9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, classification_report
)
import seaborn as sns
import matplotlib.pyplot as plt


def train_and_evaluate_model():
    # Step 1: Generate synthetic dataset
    np.random.seed(42)
    n_records = 10000
    data = {
        'pe_ratio': np.random.uniform(5, 50, n_records),
        'de_ratio': np.random.uniform(0.1, 3.0, n_records),
        'roe': np.random.uniform(5, 40, n_records),
        'market_cap': np.random.uniform(500, 100000, n_records),
        'dividend_yield': np.random.uniform(0.5, 5.0, n_records),
        'stock_rating': np.random.choice(['Buy', 'Hold', 'Sell'], n_records, p=[0.4, 0.4, 0.2])
    }

    df = pd.DataFrame(data)

    # Step 2: Prepare data
    X = df.drop('stock_rating', axis=1)
    y = df['stock_rating']

    # Step 3: Encode target
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)

    # Step 4: Train/test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
    )

    # Step 5: Feature scaling
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Step 6: Train model
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train_scaled, y_train)

    # Step 7: Predict
    y_pred = model.predict(X_test_scaled)

    # Step 8: Decode labels
    y_test_labels = le.inverse_transform(y_test)
    y_pred_labels = le.inverse_transform(y_pred)

    # Step 9: Metrics
    acc = accuracy_score(y_test_labels, y_pred_labels)
    prec = precision_score(y_test_labels, y_pred_labels, average='weighted', zero_division=0)
    rec = recall_score(y_test_labels, y_pred_labels, average='weighted', zero_division=0)
    f1 = f1_score(y_test_labels, y_pred_labels, average='weighted', zero_division=0)
    
    # Step 10: Create Classification Report as DataFrame (with zero_division fix)
    report_dict = classification_report(y_test_labels, y_pred_labels, output_dict=True, zero_division=0)
    report_df = pd.DataFrame(report_dict).transpose().round(2)

    # Step 11: Plot classification report as table with grid
    fig, ax = plt.subplots(figsize=(8, 4))
    ax.axis('off')
    tbl = ax.table(
        cellText=report_df.values,
        colLabels=report_df.columns,
        rowLabels=report_df.index,
        cellLoc='center',
        loc='center'
    )
    tbl.auto_set_font_size(False)
    tbl.set_fontsize(10)
    tbl.scale(1.2, 1.2)
    for key, cell in tbl.get_celld().items():
        cell.set_linewidth(0.8)
    cr_path = "classification_report.png"
    plt.savefig(cr_path, bbox_inches='tight')
    plt.close()

    # Step 12: Confusion matrix
    cm = confusion_matrix(y_test_labels, y_pred_labels, labels=le.classes_)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=le.classes_, yticklabels=le.classes_)
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title("Confusion Matrix")
    cm_path = "confusion_matrix.png"
    plt.savefig(cm_path, bbox_inches='tight')
    plt.close()

    # Step 13: Return outputs
    output = f"""
### βœ… Evaluation Metrics:
- **Accuracy:** {acc:.2f}
- **Precision:** {prec:.2f}
- **Recall:** {rec:.2f}
- **F1 Score:** {f1:.2f}
"""
    return output, cr_path, cm_path

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## 🧠 Stock Rating Prediction Model Evaluation")
    gr.Markdown("Click the button below to train the model on synthetic stock data and evaluate its performance.")

    eval_btn = gr.Button("Run Model Evaluation")
    output_md = gr.Markdown()
    report_img = gr.Image(type="filepath", label="πŸ“Š Classification Report")
    cm_img = gr.Image(type="filepath", label="πŸ“‰ Confusion Matrix")

    eval_btn.click(fn=train_and_evaluate_model,
                   outputs=[output_md, report_img, cm_img])

demo.launch()