import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, LSTM from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_squared_error import tensorflow as tf import io # 函數定義 def create_dataset(dataset, look_back=1): dataX, dataY = [], [] for i in range(len(dataset)-look_back-1): a = dataset[i:(i+look_back), 0] dataX.append(a) dataY.append(dataset[i + look_back, 0]) return np.array(dataX), np.array(dataY) def train_and_predict(file): # 載入和預處理數據 dataframe = pd.read_csv(file, usecols=[1], engine='python') dataset = dataframe.values.astype('float32') # 正規化數據集 scaler = MinMaxScaler(feature_range=(0, 1)) dataset = scaler.fit_transform(dataset) # 分割訓練集和測試集 train_size = int(len(dataset) * 0.67) test_size = len(dataset) - train_size train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:] # 重塑為 X=t 和 Y=t+1 look_back = 1 trainX, trainY = create_dataset(train, look_back) testX, testY = create_dataset(test, look_back) trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1])) # 創建和訓練 LSTM 網絡 model = Sequential() model.add(LSTM(4, input_shape=(1, look_back))) model.add(Dense(1)) model.compile(loss='mean_squared_error', optimizer='adam') model.fit(trainX, trainY, epochs=50, batch_size=1, verbose=0) # 進行預測 trainPredict = model.predict(trainX) testPredict = model.predict(testX) # 反轉預測 trainPredict = scaler.inverse_transform(trainPredict) trainY = scaler.inverse_transform([trainY]) testPredict = scaler.inverse_transform(testPredict) testY = scaler.inverse_transform([testY]) # 計算 RMSE trainScore = np.sqrt(mean_squared_error(trainY[0], trainPredict[:,0])) testScore = np.sqrt(mean_squared_error(testY[0], testPredict[:,0])) # 準備繪圖數據 trainPredictPlot = np.empty_like(dataset) trainPredictPlot[:, :] = np.nan trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict testPredictPlot = np.empty_like(dataset) testPredictPlot[:, :] = np.nan testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict return scaler.inverse_transform(dataset), trainPredictPlot, testPredictPlot, trainScore, testScore st.title("LSTM Stock Price Prediction") uploaded_file = st.file_uploader("Choose a CSV file", type="csv") if uploaded_file is not None: # 讀取文件並進行預測 original_data, train_predict, test_predict, train_score, test_score = train_and_predict(uploaded_file) # 顯示結果 st.subheader("Prediction Results") # 繪製圖表 fig, ax = plt.subplots(figsize=(12, 6)) ax.plot(original_data, label='Original Data', color='blue') ax.plot(train_predict, label='Training Predictions', linestyle='--', color='green') ax.plot(test_predict, label='Test Predictions', linestyle='--', color='red') ax.set_xlabel('Time') ax.set_ylabel('Stock Price') ax.set_title('Original Data and Predictions') ax.legend() ax.grid(True, linestyle='--', alpha=0.7) st.pyplot(fig) # 顯示評分 col1, col2 = st.columns(2) with col1: st.metric("Train Score (RMSE)", f"{train_score:.2f}") with col2: st.metric("Test Score (RMSE)", f"{test_score:.2f}") else: st.info("Please upload a CSV file to start the prediction.") st.markdown(""" This application uses an LSTM (Long Short-Term Memory) neural network to predict stock prices based on historical data. To use: 1. Upload a CSV file containing historical stock price data. 2. The app will train an LSTM model on this data and make predictions. 3. You'll see a graph showing the original data and predictions, along with RMSE scores for training and test sets. Note: The CSV file should have the stock prices in the second column. """)