from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report import matplotlib.pyplot as plt import seaborn as sns import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler, LabelEncoder from sklearn.ensemble import RandomForestClassifier # Step 1: Generate synthetic dataset np.random.seed(42) n_records = 10000 data = { 'pe_ratio': np.random.uniform(5, 50, n_records), 'de_ratio': np.random.uniform(0.1, 3.0, n_records), 'roe': np.random.uniform(5, 40, n_records), 'market_cap': np.random.uniform(500, 100000, n_records), 'dividend_yield': np.random.uniform(0.5, 5.0, n_records), 'stock_rating': np.random.choice(['Buy', 'Hold', 'Sell'], n_records, p=[0.4, 0.4, 0.2]) } df = pd.DataFrame(data) # Step 2: Prepare data X = df.drop('stock_rating', axis=1) y = df['stock_rating'] # Step 3: Encode target le = LabelEncoder() y_encoded = le.fit_transform(y) # Step 4: Train/test split (stratified) X_train, X_test, y_train, y_test = train_test_split( X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded ) # Step 5: Feature scaling scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # Step 6: Train model model = RandomForestClassifier(random_state=42) model.fit(X_train_scaled, y_train) # Step 7️ Predict using your trained model y_pred = model.predict(X_test_scaled) # Step 8️ Inverse transform using correct label encoder y_test_labels = le.inverse_transform(y_test) y_pred_labels = le.inverse_transform(y_pred) # Step 9️ Print basic metrics print("✅ Accuracy:", accuracy_score(y_test_labels, y_pred_labels)) print("✅ Precision:", precision_score(y_test_labels, y_pred_labels, average='weighted')) print("✅ Recall:", recall_score(y_test_labels, y_pred_labels, average='weighted')) print("✅ F1 Score:", f1_score(y_test_labels, y_pred_labels, average='weighted')) # Step 10️ Detailed breakdown per class print("\n📊 Classification Report:") print(classification_report(y_test_labels, y_pred_labels)) # Step 11️ Confusion Matrix cm = confusion_matrix(y_test_labels, y_pred_labels, labels=le.classes_) plt.figure(figsize=(6, 5)) sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=le.classes_, yticklabels=le.classes_) plt.xlabel("Predicted Label") plt.ylabel("True Label") plt.title("📉 Confusion Matrix") plt.show()