|
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report |
|
|
import matplotlib.pyplot as plt |
|
|
import seaborn as sns |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
|
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.preprocessing import StandardScaler, LabelEncoder |
|
|
from sklearn.ensemble import RandomForestClassifier |
|
|
|
|
|
|
|
|
np.random.seed(42) |
|
|
n_records = 10000 |
|
|
|
|
|
data = { |
|
|
'pe_ratio': np.random.uniform(5, 50, n_records), |
|
|
'de_ratio': np.random.uniform(0.1, 3.0, n_records), |
|
|
'roe': np.random.uniform(5, 40, n_records), |
|
|
'market_cap': np.random.uniform(500, 100000, n_records), |
|
|
'dividend_yield': np.random.uniform(0.5, 5.0, n_records), |
|
|
'stock_rating': np.random.choice(['Buy', 'Hold', 'Sell'], n_records, p=[0.4, 0.4, 0.2]) |
|
|
} |
|
|
|
|
|
df = pd.DataFrame(data) |
|
|
|
|
|
|
|
|
X = df.drop('stock_rating', axis=1) |
|
|
y = df['stock_rating'] |
|
|
|
|
|
|
|
|
le = LabelEncoder() |
|
|
y_encoded = le.fit_transform(y) |
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split( |
|
|
X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded |
|
|
) |
|
|
|
|
|
|
|
|
scaler = StandardScaler() |
|
|
X_train_scaled = scaler.fit_transform(X_train) |
|
|
X_test_scaled = scaler.transform(X_test) |
|
|
|
|
|
|
|
|
model = RandomForestClassifier(random_state=42) |
|
|
model.fit(X_train_scaled, y_train) |
|
|
|
|
|
|
|
|
y_pred = model.predict(X_test_scaled) |
|
|
|
|
|
|
|
|
y_test_labels = le.inverse_transform(y_test) |
|
|
y_pred_labels = le.inverse_transform(y_pred) |
|
|
|
|
|
|
|
|
print("✅ Accuracy:", accuracy_score(y_test_labels, y_pred_labels)) |
|
|
print("✅ Precision:", precision_score(y_test_labels, y_pred_labels, average='weighted')) |
|
|
print("✅ Recall:", recall_score(y_test_labels, y_pred_labels, average='weighted')) |
|
|
print("✅ F1 Score:", f1_score(y_test_labels, y_pred_labels, average='weighted')) |
|
|
|
|
|
|
|
|
print("\n📊 Classification Report:") |
|
|
print(classification_report(y_test_labels, y_pred_labels)) |
|
|
|
|
|
|
|
|
cm = confusion_matrix(y_test_labels, y_pred_labels, labels=le.classes_) |
|
|
|
|
|
plt.figure(figsize=(6, 5)) |
|
|
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", |
|
|
xticklabels=le.classes_, |
|
|
yticklabels=le.classes_) |
|
|
plt.xlabel("Predicted Label") |
|
|
plt.ylabel("True Label") |
|
|
plt.title("📉 Confusion Matrix") |
|
|
plt.show() |
|
|
|