Stock_Advice_App / generate_stock_train_data.py
Yatheshr's picture
Upload 6 files
28bc08e verified
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import joblib
import os
# Set random seed for reproducibility
np.random.seed(42)
# 1. Generate Synthetic Stock Data
n = 10000
data = {
"PE": np.random.uniform(5, 50, n), # Price to Earnings Ratio
"DE": np.random.uniform(0, 3, n), # Debt to Equity Ratio
"ROE": np.random.uniform(-10, 40, n), # Return on Equity (%)
"MarketCap": np.random.uniform(1000, 100000, n), # Market Cap (₹ Cr)
"DividendYield": np.random.uniform(0, 10, n), # Dividend Yield (%)
"Rating": np.random.choice(["Buy", "Hold", "Sell"], n, p=[0.4, 0.4, 0.2]) # Target label
}
df = pd.DataFrame(data)
# 2. Encode the Target Label
label_encoder = LabelEncoder()
df["RatingEncoded"] = label_encoder.fit_transform(df["Rating"])
# 3️. Prepare Features and Target
X = df[["PE", "DE", "ROE", "MarketCap", "DividendYield"]]
y = df["RatingEncoded"]
# 4️. Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 5️. Scale the Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
# 6️. Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)
# 7️. Save the Model, Scaler, and Encoder
output_dir = "stock_recommendation_space"
os.makedirs(output_dir, exist_ok=True)
joblib.dump(model, os.path.join(output_dir, "stock_model.pkl"))
joblib.dump(scaler, os.path.join(output_dir, "scaler.pkl"))
joblib.dump(label_encoder, os.path.join(output_dir, "label_encoder.pkl"))
print("✅ Model, Scaler, and Encoder saved successfully.")