Spaces:

roshcheeku
/

STOCKBACK

Sleeping

App Files Files Community

STOCKBACK / lstm.py

roshcheeku

Upload 56 files

605fc75 verified 7 months ago

raw

history blame contribute delete

4.53 kB

	import pandas as pd
	import numpy as np
	from sklearn.preprocessing import MinMaxScaler
	from tensorflow.keras.models import Sequential, load_model
	from tensorflow.keras.layers import LSTM, Dense, Dropout
	import matplotlib.pyplot as plt
	import seaborn as sns

	# Load the scraped combined stock data
	all_stock_data = pd.read_csv('scraped_combined_stock_data.csv')
	all_stock_data['Date'] = pd.to_datetime(all_stock_data['Date'])

	# Ensure 'Close' is numeric
	all_stock_data['Close'] = pd.to_numeric(all_stock_data['Close'], errors='coerce')

	# Drop rows with NaN values in 'Close'
	all_stock_data = all_stock_data.dropna(subset=['Close'])

	# Check the dataset
	print("Stock Data Shape:", all_stock_data.shape)
	print(all_stock_data.head())

	# Summary statistics and visualization
	summary_stats = all_stock_data.groupby('Stock_Type')['Close'].describe()
	print(summary_stats)

	plt.figure(figsize=(12, 6))
	for stock in all_stock_data['Stock_Type'].unique():
	stock_prices = all_stock_data[all_stock_data['Stock_Type'] == stock]
	plt.plot(stock_prices['Date'], stock_prices['Close'], label=stock)
	plt.title('Stock Closing Price Trends (All Stocks)')
	plt.xlabel('Date')
	plt.ylabel('Closing Price')
	plt.legend()
	plt.show()

	# Volatility analysis
	all_stock_data['Daily_Return'] = all_stock_data.groupby('Stock_Type')['Close'].pct_change()

	plt.figure(figsize=(12, 6))
	sns.boxplot(data=all_stock_data, x='Stock_Type', y='Daily_Return', palette='coolwarm')
	plt.title('Stock Price Volatility (Daily Returns)')
	plt.ylabel('Daily Return')
	plt.xlabel('Stock Type')
	plt.show()

	# Sequence creation for LSTM
	def create_sequences(data, time_steps=60):
	X, y = [], []
	for i in range(len(data) - time_steps):
	X.append(data[i:i + time_steps, 0])
	y.append(data[i + time_steps, 0])
	return np.array(X), np.array(y)

	# LSTM Preparation Function
	def preprocess_for_lstm(stock_data, stock_type, time_steps=60):
	stock_data = stock_data[stock_data['Stock_Type'] == stock_type]
	prices = stock_data[['Close']].values

	scaler = MinMaxScaler(feature_range=(0, 1))
	prices_scaled = scaler.fit_transform(prices)

	if len(prices_scaled) > time_steps:
	X, y = create_sequences(prices_scaled, time_steps)
	print(f"{stock_type}: Shape of X: {X.shape}, Shape of y: {y.shape}")
	X = X.reshape(X.shape[0], X.shape[1], 1)
	return X, y, scaler
	else:
	raise ValueError(f"{stock_type}: Insufficient data: {len(prices_scaled)} rows available, {time_steps} required.")

	# Train and save LSTM model for each stock
	for stock in all_stock_data['Stock_Type'].unique():
	try:
	print(f"Processing {stock}...")
	X, y, scaler = preprocess_for_lstm(all_stock_data, stock, time_steps=60)

	# Build LSTM model
	model = Sequential([
	LSTM(50, return_sequences=True, input_shape=(X.shape[1], 1)),
	Dropout(0.2),
	LSTM(50, return_sequences=False),
	Dropout(0.2),
	Dense(25),
	Dense(1)
	])

	model.compile(optimizer='adam', loss='mean_squared_error')
	model.fit(X, y, batch_size=32, epochs=50, verbose=1)

	# Save the model
	model_filename = f'lstm_{stock}_model.h5'
	model.save(model_filename)
	print(f"LSTM model for {stock} saved as '{model_filename}'")
	except ValueError as e:
	print(e)

	# Predict for a specific stock (e.g., 'AAPL')
	stock_to_predict = 'AAPL'
	try:
	stock_prices = all_stock_data[all_stock_data['Stock_Type'] == stock_to_predict]['Close'].values
	if len(stock_prices) >= 60:
	# Use the scaler from preprocessing
	_, _, scaler = preprocess_for_lstm(all_stock_data, stock_to_predict, time_steps=60)
	last_60_prices = stock_prices[-60:].reshape(-1, 1)
	last_60_scaled = scaler.transform(last_60_prices).reshape(1, -1, 1)

	# Load the saved model
	model = load_model(f'lstm_{stock_to_predict}_model.h5')

	predicted_price_scaled = model.predict(last_60_scaled)
	predicted_price = scaler.inverse_transform(predicted_price_scaled)

	print(f"Predicted Next Day Price for {stock_to_predict}: {predicted_price[0][0]}")
	else:
	print(f"Insufficient data for prediction: {len(stock_prices)} rows available, 60 required.")
	except NameError as e:
	print(f"Error during prediction: {e}")