Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.preprocessing import MinMaxScaler | |
| from tensorflow.keras.models import Sequential, load_model | |
| from tensorflow.keras.layers import LSTM, Dense, Dropout | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| # Load the scraped combined stock data | |
| all_stock_data = pd.read_csv('scraped_combined_stock_data.csv') | |
| all_stock_data['Date'] = pd.to_datetime(all_stock_data['Date']) | |
| # Ensure 'Close' is numeric | |
| all_stock_data['Close'] = pd.to_numeric(all_stock_data['Close'], errors='coerce') | |
| # Drop rows with NaN values in 'Close' | |
| all_stock_data = all_stock_data.dropna(subset=['Close']) | |
| # Check the dataset | |
| print("Stock Data Shape:", all_stock_data.shape) | |
| print(all_stock_data.head()) | |
| # Summary statistics and visualization | |
| summary_stats = all_stock_data.groupby('Stock_Type')['Close'].describe() | |
| print(summary_stats) | |
| plt.figure(figsize=(12, 6)) | |
| for stock in all_stock_data['Stock_Type'].unique(): | |
| stock_prices = all_stock_data[all_stock_data['Stock_Type'] == stock] | |
| plt.plot(stock_prices['Date'], stock_prices['Close'], label=stock) | |
| plt.title('Stock Closing Price Trends (All Stocks)') | |
| plt.xlabel('Date') | |
| plt.ylabel('Closing Price') | |
| plt.legend() | |
| plt.show() | |
| # Volatility analysis | |
| all_stock_data['Daily_Return'] = all_stock_data.groupby('Stock_Type')['Close'].pct_change() | |
| plt.figure(figsize=(12, 6)) | |
| sns.boxplot(data=all_stock_data, x='Stock_Type', y='Daily_Return', palette='coolwarm') | |
| plt.title('Stock Price Volatility (Daily Returns)') | |
| plt.ylabel('Daily Return') | |
| plt.xlabel('Stock Type') | |
| plt.show() | |
| # Sequence creation for LSTM | |
| def create_sequences(data, time_steps=60): | |
| X, y = [], [] | |
| for i in range(len(data) - time_steps): | |
| X.append(data[i:i + time_steps, 0]) | |
| y.append(data[i + time_steps, 0]) | |
| return np.array(X), np.array(y) | |
| # LSTM Preparation Function | |
| def preprocess_for_lstm(stock_data, stock_type, time_steps=60): | |
| stock_data = stock_data[stock_data['Stock_Type'] == stock_type] | |
| prices = stock_data[['Close']].values | |
| scaler = MinMaxScaler(feature_range=(0, 1)) | |
| prices_scaled = scaler.fit_transform(prices) | |
| if len(prices_scaled) > time_steps: | |
| X, y = create_sequences(prices_scaled, time_steps) | |
| print(f"{stock_type}: Shape of X: {X.shape}, Shape of y: {y.shape}") | |
| X = X.reshape(X.shape[0], X.shape[1], 1) | |
| return X, y, scaler | |
| else: | |
| raise ValueError(f"{stock_type}: Insufficient data: {len(prices_scaled)} rows available, {time_steps} required.") | |
| # Train and save LSTM model for each stock | |
| for stock in all_stock_data['Stock_Type'].unique(): | |
| try: | |
| print(f"Processing {stock}...") | |
| X, y, scaler = preprocess_for_lstm(all_stock_data, stock, time_steps=60) | |
| # Build LSTM model | |
| model = Sequential([ | |
| LSTM(50, return_sequences=True, input_shape=(X.shape[1], 1)), | |
| Dropout(0.2), | |
| LSTM(50, return_sequences=False), | |
| Dropout(0.2), | |
| Dense(25), | |
| Dense(1) | |
| ]) | |
| model.compile(optimizer='adam', loss='mean_squared_error') | |
| model.fit(X, y, batch_size=32, epochs=50, verbose=1) | |
| # Save the model | |
| model_filename = f'lstm_{stock}_model.h5' | |
| model.save(model_filename) | |
| print(f"LSTM model for {stock} saved as '{model_filename}'") | |
| except ValueError as e: | |
| print(e) | |
| # Predict for a specific stock (e.g., 'AAPL') | |
| stock_to_predict = 'AAPL' | |
| try: | |
| stock_prices = all_stock_data[all_stock_data['Stock_Type'] == stock_to_predict]['Close'].values | |
| if len(stock_prices) >= 60: | |
| # Use the scaler from preprocessing | |
| _, _, scaler = preprocess_for_lstm(all_stock_data, stock_to_predict, time_steps=60) | |
| last_60_prices = stock_prices[-60:].reshape(-1, 1) | |
| last_60_scaled = scaler.transform(last_60_prices).reshape(1, -1, 1) | |
| # Load the saved model | |
| model = load_model(f'lstm_{stock_to_predict}_model.h5') | |
| predicted_price_scaled = model.predict(last_60_scaled) | |
| predicted_price = scaler.inverse_transform(predicted_price_scaled) | |
| print(f"Predicted Next Day Price for {stock_to_predict}: {predicted_price[0][0]}") | |
| else: | |
| print(f"Insufficient data for prediction: {len(stock_prices)} rows available, 60 required.") | |
| except NameError as e: | |
| print(f"Error during prediction: {e}") | |