import pandas as pd import numpy as np from sklearn.preprocessing import MinMaxScaler from tensorflow.keras.models import Sequential, load_model from tensorflow.keras.layers import LSTM, Dense, Dropout import matplotlib.pyplot as plt import seaborn as sns # Load the scraped combined stock data all_stock_data = pd.read_csv('scraped_combined_stock_data.csv') all_stock_data['Date'] = pd.to_datetime(all_stock_data['Date']) # Ensure 'Close' is numeric all_stock_data['Close'] = pd.to_numeric(all_stock_data['Close'], errors='coerce') # Drop rows with NaN values in 'Close' all_stock_data = all_stock_data.dropna(subset=['Close']) # Check the dataset print("Stock Data Shape:", all_stock_data.shape) print(all_stock_data.head()) # Summary statistics and visualization summary_stats = all_stock_data.groupby('Stock_Type')['Close'].describe() print(summary_stats) plt.figure(figsize=(12, 6)) for stock in all_stock_data['Stock_Type'].unique(): stock_prices = all_stock_data[all_stock_data['Stock_Type'] == stock] plt.plot(stock_prices['Date'], stock_prices['Close'], label=stock) plt.title('Stock Closing Price Trends (All Stocks)') plt.xlabel('Date') plt.ylabel('Closing Price') plt.legend() plt.show() # Volatility analysis all_stock_data['Daily_Return'] = all_stock_data.groupby('Stock_Type')['Close'].pct_change() plt.figure(figsize=(12, 6)) sns.boxplot(data=all_stock_data, x='Stock_Type', y='Daily_Return', palette='coolwarm') plt.title('Stock Price Volatility (Daily Returns)') plt.ylabel('Daily Return') plt.xlabel('Stock Type') plt.show() # Sequence creation for LSTM def create_sequences(data, time_steps=60): X, y = [], [] for i in range(len(data) - time_steps): X.append(data[i:i + time_steps, 0]) y.append(data[i + time_steps, 0]) return np.array(X), np.array(y) # LSTM Preparation Function def preprocess_for_lstm(stock_data, stock_type, time_steps=60): stock_data = stock_data[stock_data['Stock_Type'] == stock_type] prices = stock_data[['Close']].values scaler = MinMaxScaler(feature_range=(0, 1)) prices_scaled = scaler.fit_transform(prices) if len(prices_scaled) > time_steps: X, y = create_sequences(prices_scaled, time_steps) print(f"{stock_type}: Shape of X: {X.shape}, Shape of y: {y.shape}") X = X.reshape(X.shape[0], X.shape[1], 1) return X, y, scaler else: raise ValueError(f"{stock_type}: Insufficient data: {len(prices_scaled)} rows available, {time_steps} required.") # Train and save LSTM model for each stock for stock in all_stock_data['Stock_Type'].unique(): try: print(f"Processing {stock}...") X, y, scaler = preprocess_for_lstm(all_stock_data, stock, time_steps=60) # Build LSTM model model = Sequential([ LSTM(50, return_sequences=True, input_shape=(X.shape[1], 1)), Dropout(0.2), LSTM(50, return_sequences=False), Dropout(0.2), Dense(25), Dense(1) ]) model.compile(optimizer='adam', loss='mean_squared_error') model.fit(X, y, batch_size=32, epochs=50, verbose=1) # Save the model model_filename = f'lstm_{stock}_model.h5' model.save(model_filename) print(f"LSTM model for {stock} saved as '{model_filename}'") except ValueError as e: print(e) # Predict for a specific stock (e.g., 'AAPL') stock_to_predict = 'AAPL' try: stock_prices = all_stock_data[all_stock_data['Stock_Type'] == stock_to_predict]['Close'].values if len(stock_prices) >= 60: # Use the scaler from preprocessing _, _, scaler = preprocess_for_lstm(all_stock_data, stock_to_predict, time_steps=60) last_60_prices = stock_prices[-60:].reshape(-1, 1) last_60_scaled = scaler.transform(last_60_prices).reshape(1, -1, 1) # Load the saved model model = load_model(f'lstm_{stock_to_predict}_model.h5') predicted_price_scaled = model.predict(last_60_scaled) predicted_price = scaler.inverse_transform(predicted_price_scaled) print(f"Predicted Next Day Price for {stock_to_predict}: {predicted_price[0][0]}") else: print(f"Insufficient data for prediction: {len(stock_prices)} rows available, 60 required.") except NameError as e: print(f"Error during prediction: {e}")