File size: 2,535 Bytes
f1eeb45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
# Step 1: Generate synthetic dataset
np.random.seed(42)
n_records = 10000
data = {
'pe_ratio': np.random.uniform(5, 50, n_records),
'de_ratio': np.random.uniform(0.1, 3.0, n_records),
'roe': np.random.uniform(5, 40, n_records),
'market_cap': np.random.uniform(500, 100000, n_records),
'dividend_yield': np.random.uniform(0.5, 5.0, n_records),
'stock_rating': np.random.choice(['Buy', 'Hold', 'Sell'], n_records, p=[0.4, 0.4, 0.2])
}
df = pd.DataFrame(data)
# Step 2: Prepare data
X = df.drop('stock_rating', axis=1)
y = df['stock_rating']
# Step 3: Encode target
le = LabelEncoder()
y_encoded = le.fit_transform(y)
# Step 4: Train/test split (stratified)
X_train, X_test, y_train, y_test = train_test_split(
X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)
# Step 5: Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Step 6: Train model
model = RandomForestClassifier(random_state=42)
model.fit(X_train_scaled, y_train)
# Step 7️ Predict using your trained model
y_pred = model.predict(X_test_scaled)
# Step 8️ Inverse transform using correct label encoder
y_test_labels = le.inverse_transform(y_test)
y_pred_labels = le.inverse_transform(y_pred)
# Step 9️ Print basic metrics
print("✅ Accuracy:", accuracy_score(y_test_labels, y_pred_labels))
print("✅ Precision:", precision_score(y_test_labels, y_pred_labels, average='weighted'))
print("✅ Recall:", recall_score(y_test_labels, y_pred_labels, average='weighted'))
print("✅ F1 Score:", f1_score(y_test_labels, y_pred_labels, average='weighted'))
# Step 10️ Detailed breakdown per class
print("\n📊 Classification Report:")
print(classification_report(y_test_labels, y_pred_labels))
# Step 11️ Confusion Matrix
cm = confusion_matrix(y_test_labels, y_pred_labels, labels=le.classes_)
plt.figure(figsize=(6, 5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
xticklabels=le.classes_,
yticklabels=le.classes_)
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.title("📉 Confusion Matrix")
plt.show()
|