import gradio as gr import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler, LabelEncoder import xgboost as xgb from sklearn.metrics import ( accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report ) import seaborn as sns import matplotlib.pyplot as plt def train_and_evaluate_model(): # Step 1: Generate synthetic dataset np.random.seed(42) n_records = 10000 data = { 'pe_ratio': np.random.uniform(5, 50, n_records), 'de_ratio': np.random.uniform(0.1, 3.0, n_records), 'roe': np.random.uniform(5, 40, n_records), 'market_cap': np.random.uniform(500, 100000, n_records), 'dividend_yield': np.random.uniform(0.5, 5.0, n_records), 'stock_rating': np.random.choice(['Buy', 'Hold', 'Sell'], n_records, p=[0.4, 0.4, 0.2]) } df = pd.DataFrame(data) # Step 2: Prepare data X = df.drop('stock_rating', axis=1) y = df['stock_rating'] # Step 3: Encode target le = LabelEncoder() y_encoded = le.fit_transform(y) # Step 4: Train/test split X_train, X_test, y_train, y_test = train_test_split( X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded ) # Step 5: Feature scaling scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # Step 6: Train model using XGBoost model = xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss') model.fit(X_train_scaled, y_train) # Step 7: Predict y_pred = model.predict(X_test_scaled) # Step 8: Decode labels y_test_labels = le.inverse_transform(y_test) y_pred_labels = le.inverse_transform(y_pred) # Step 9: Metrics acc = accuracy_score(y_test_labels, y_pred_labels) prec = precision_score(y_test_labels, y_pred_labels, average='weighted', zero_division=0) rec = recall_score(y_test_labels, y_pred_labels, average='weighted', zero_division=0) f1 = f1_score(y_test_labels, y_pred_labels, average='weighted', zero_division=0) # Step 10: Create Classification Report as DataFrame (with zero_division fix) report_dict = classification_report(y_test_labels, y_pred_labels, output_dict=True, zero_division=0) report_df = pd.DataFrame(report_dict).transpose().round(2) # Step 11: Plot classification report as table with grid fig, ax = plt.subplots(figsize=(8, 4)) ax.axis('off') tbl = ax.table( cellText=report_df.values, colLabels=report_df.columns, rowLabels=report_df.index, cellLoc='center', loc='center' ) tbl.auto_set_font_size(False) tbl.set_fontsize(10) tbl.scale(1.2, 1.2) for key, cell in tbl.get_celld().items(): cell.set_linewidth(0.8) cr_path = "classification_report.png" plt.savefig(cr_path, bbox_inches='tight') plt.close() # Step 12: Confusion matrix cm = confusion_matrix(y_test_labels, y_pred_labels, labels=le.classes_) plt.figure(figsize=(6, 5)) sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=le.classes_, yticklabels=le.classes_) plt.xlabel("Predicted") plt.ylabel("Actual") plt.title("Confusion Matrix") cm_path = "confusion_matrix.png" plt.savefig(cm_path, bbox_inches='tight') plt.close() # Step 13: Return outputs output = f""" ### ✅ Evaluation Metrics: - **Accuracy:** {acc:.2f} - **Precision:** {prec:.2f} - **Recall:** {rec:.2f} - **F1 Score:** {f1:.2f} """ return output, cr_path, cm_path # Gradio Interface with gr.Blocks() as demo: gr.Markdown("## 🧠 Stock Rating Prediction Model Evaluation") gr.Markdown("Click the button below to train the model on synthetic stock data and evaluate its performance.") eval_btn = gr.Button("Run Model Evaluation") output_md = gr.Markdown() report_img = gr.Image(type="filepath", label="📊 Classification Report") cm_img = gr.Image(type="filepath", label="📉 Confusion Matrix") eval_btn.click(fn=train_and_evaluate_model, outputs=[output_md, report_img, cm_img]) demo.launch()