AlanRex commited on
Commit
043f849
·
verified ·
1 Parent(s): 9812b07

Upload 4 files

Browse files
.gitattributes CHANGED
@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  stock_lstm_model_v2.keras filter=lfs diff=lfs merge=lfs -text
37
  9CE6ABB0E688BCE5A5B3E69920220912-20250909.xlsx filter=lfs diff=lfs merge=lfs -text
38
  期末專案輸入資料20220912-20250909.xlsx filter=lfs diff=lfs merge=lfs -text
 
 
36
  stock_lstm_model_v2.keras filter=lfs diff=lfs merge=lfs -text
37
  9CE6ABB0E688BCE5A5B3E69920220912-20250909.xlsx filter=lfs diff=lfs merge=lfs -text
38
  期末專案輸入資料20220912-20250909.xlsx filter=lfs diff=lfs merge=lfs -text
39
+ taiwan_stock_predictor.keras filter=lfs diff=lfs merge=lfs -text
model_predictor.py CHANGED
@@ -1,152 +1,572 @@
1
- # model_predictor.py
2
-
3
- import numpy as np
4
- import pandas as pd
5
- from tensorflow.keras.models import load_model
6
- import joblib
7
- import yfinance as yf
8
-
9
- # --- 模型與設定檔 (未來訓練好後,請將檔案放在同目錄下) ---
10
- MODEL_FILE = 'stock_predictor_model.h5'
11
- SCALER_X_FILE = 'scaler_X.pkl'
12
- SCALER_Y_FILE = 'scaler_y.pkl'
13
- LOOKBACK_DAYS = 30 # 必須與訓練時的 LOOKBACK_DAYS 相同
14
-
15
- # --- 啟動時載入模型與縮放器 (只會載入一次) ---
16
- try:
17
- model = load_model(MODEL_FILE)
18
- scaler_X = joblib.load(SCALER_X_FILE)
19
- scaler_y = joblib.load(SCALER_Y_FILE)
20
- print("進階 LSTM 模型與縮放器載入成功。")
21
- except Exception as e:
22
- print(f"提示:未找到或無法載入進階模型檔案 ({e})。應用將使用簡易統計模型。")
23
- model = None
24
-
25
- # --- 從 app.py 複製過來的技術指標計算函式 ---
26
- # 確保資料準備的邏輯一致
27
- def calculate_technical_indicators(df):
28
- """計算技術指標"""
29
- if df.empty: return df
30
- df['MA5'] = df['Close'].rolling(window=5).mean()
31
- df['MA20'] = df['Close'].rolling(window=20).mean()
32
- delta = df['Close'].diff()
33
- gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
34
- loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
35
- rs = gain / loss
36
- df['RSI'] = 100 - (100 / (1 + rs))
37
- exp1 = df['Close'].ewm(span=12).mean()
38
- exp2 = df['Close'].ewm(span=26).mean()
39
- df['MACD'] = exp1 - exp2
40
- df['MACD_Signal'] = df['MACD'].ewm(span=9).mean()
41
- df['MACD_Histogram'] = df['MACD'] - df['MACD_Signal']
42
- low_min = df['Low'].rolling(window=9).min()
43
- high_max = df['High'].rolling(window=9).max()
44
- rsv = (df['Close'] - low_min) / (high_max - low_min) * 100
45
- df['K'] = rsv.ewm(com=2).mean()
46
- df['D'] = df['K'].ewm(com=2).mean()
47
- df['up_move'] = df['High'] - df['High'].shift(1)
48
- df['down_move'] = df['Low'].shift(1) - df['Low']
49
- df['+DM'] = np.where((df['up_move'] > df['down_move']) & (df['up_move'] > 0), df['up_move'], 0)
50
- df['-DM'] = np.where((df['down_move'] > df['up_move']) & (df['down_move'] > 0), df['down_move'], 0)
51
- df['TR'] = np.max([df['High'] - df['Low'], abs(df['High'] - df['Close'].shift(1)), abs(df['Low'] - df['Close'].shift(1))], axis=0)
52
- df['+DI'] = (df['+DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
53
- df['-DI'] = (df['-DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
54
- df['DX'] = abs(df['+DI'] - df['-DI']) / (df['+DI'] + df['-DI']) * 100
55
- df['ADX'] = df['DX'].ewm(com=13, adjust=False).mean()
56
- return df
57
-
58
-
59
- def get_all_features_for_model(period="3y"):
60
- """獲取並整合模型需要的所有15個特徵。"""
61
- print("正在下載市場數據以準備進階模型輸入...")
62
- tickers = {'^TWII': 'TWII', '^SOX': 'SOX', 'TSM': 'TSM_ADR'}
63
- data_yf = yf.download(list(tickers.keys()), period=period, auto_adjust=True)
64
-
65
- twii_data = data_yf.loc[:, ('Open', 'High', 'Low', 'Close', 'Volume')]['TWII'].copy()
66
-
67
- print("正在計算技術指標...")
68
- df_main = calculate_technical_indicators(twii_data)
69
-
70
- print("正在合併外部市場與檔案數據...")
71
- df_main['費城 半導體'] = data_yf['Close']['SOX']
72
- df_main['台積電 ADR'] = data_yf['Close']['TSM_ADR']
73
-
74
- try:
75
- df_climate = pd.read_csv('business_climate.csv')
76
- df_climate['Date'] = pd.to_datetime(df_climate['Date'].astype(str) + '-01')
77
- df_climate = df_climate.set_index('Date').rename(columns={'Index': '景氣燈號'})
78
-
79
- df_pmi = pd.read_csv('taiwan_pmi.csv')
80
- df_pmi['Date'] = pd.to_datetime(df_pmi['DATE'].astype(str) + '-01')
81
- df_pmi = df_pmi.set_index('Date').rename(columns={'INDEX': 'PMI'})
82
-
83
- df_main = pd.merge(df_main, df_climate, left_index=True, right_index=True, how='left')
84
- df_main = pd.merge(df_main, df_pmi, left_index=True, right_index=True, how='left')
85
- except FileNotFoundError as e:
86
- print(f"警告: 找不到檔案 {e.filename},相關欄位將為空。")
87
- df_main['景氣燈號'] = np.nan
88
- df_main['PMI'] = np.nan
89
-
90
- df_main.fillna(method='ffill', inplace=True)
91
- df_main.dropna(inplace=True)
92
-
93
- df_final = df_main.rename(columns={
94
- 'Close': '加權指數', 'Volume': '成交量', 'K': 'K線', 'D': 'D線',
95
- 'MACD_Signal': 'MACD信號線', 'MACD_Histogram': 'MACD柱狀圖'
96
- })
97
-
98
- print("所有特徵整合完畢!")
99
- return df_final
100
-
101
-
102
- def advanced_lstm_predict(predict_days: int = 5):
103
- """使用訓練好的 LSTM 模型進行預測。"""
104
- if model is None:
105
- print("進階模型未載入,無法進行預測。")
106
- return None
107
-
108
- try:
109
- # 1. 獲取並整合所有最新資料
110
- all_features_df = get_all_features_for_model()
111
- if len(all_features_df) < LOOKBACK_DAYS:
112
- print("資料長度不足,無法進行進階預測。")
113
- return None
114
-
115
- # 2. 準備輸入資料
116
- FEATURES = [
117
- '加權指數', '成交量', '費城 半導體', '台積電 ADR', 'RSI', 'MACD',
118
- 'MACD信號線', 'MACD柱狀圖', 'K線', 'D線', '+DI', '-DI', 'ADX',
119
- '景氣燈號', 'PMI'
120
- ]
121
-
122
- last_sequence_df = all_features_df[FEATURES].tail(LOOKBACK_DAYS)
123
-
124
- if last_sequence_df.isnull().values.any():
125
- print("警告:輸入的序列資料中存在缺失值,無法預測。")
126
- return None
127
-
128
- input_scaled = scaler_X.transform(last_sequence_df)
129
- input_reshaped = np.reshape(input_scaled, (1, LOOKBACK_DAYS, len(FEATURES)))
130
-
131
- # 3. 執行預測
132
- prediction_scaled = model.predict(input_reshaped)
133
- prediction_unscaled = scaler_y.inverse_transform(prediction_scaled)
134
-
135
- # 4. 處理預測結果
136
- target_map = {1: 0, 5: 1, 10: 2}
137
- if predict_days not in target_map:
138
- predict_days = 5 # 預設值
139
-
140
- predicted_price = prediction_unscaled[0][target_map[predict_days]]
141
-
142
- last_price = all_features_df['加權指數'].iloc[-1]
143
- change_pct = ((predicted_price - last_price) / last_price) * 100
144
-
145
- return {
146
- 'predicted_price': predicted_price,
147
- 'change_pct': change_pct,
148
- 'confidence': 0.85 # 可設為固定值
149
- }
150
- except Exception as e:
151
- print(f"執行進階預測時發生錯誤: {e}")
152
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """model_predictor.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1CaAPRdPsp3Jt5tQ3BLVcK19euWZmFme5
8
+ """
9
+
10
+ # model_predictor.py
11
+ # 進階LSTM模型預測器,適用於HUGING_FACE_V4.2
12
+
13
+ import os
14
+ import numpy as np
15
+ import pandas as pd
16
+ import yfinance as yf
17
+ from datetime import datetime, timedelta
18
+ import joblib
19
+ from sklearn.preprocessing import StandardScaler, RobustScaler
20
+ from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
21
+ import warnings
22
+ warnings.filterwarnings('ignore')
23
+
24
+ # TensorFlow/Keras 相關
25
+ try:
26
+ import tensorflow as tf
27
+ from tensorflow.keras.models import Sequential, load_model
28
+ from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, GRU, Bidirectional
29
+ from tensorflow.keras.optimizers import Adam
30
+ from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
31
+ from tensorflow.keras.regularizers import l1_l2
32
+ print("TensorFlow 載入成功")
33
+ except ImportError:
34
+ print("警告:TensorFlow 未安裝,模型將無法正常運作")
35
+ tf = None
36
+
37
+ class AdvancedStockPredictor:
38
+ def __init__(self, model_name='taiwan_stock_predictor'):
39
+ self.model_name = model_name
40
+ self.model = None
41
+ self.scaler_X = RobustScaler()
42
+ self.scaler_y = StandardScaler()
43
+ self.sequence_length = 60 # 使用60天的歷史數據
44
+ self.feature_names = [
45
+ 'volume', 'rate', 'DJI', 'NAS', 'SOX', 'SP500', 'TSM_ADR',
46
+ 'RSI', 'MACD', 'MACDsign', 'MACDvol', 'K', 'D',
47
+ '+DI', '-DI', 'ADX', 'business_climate', 'PMI'
48
+ ]
49
+ self.target_names = ['close_1d', 'close_5d', 'close_10d', 'close_20d', 'close_60d']
50
+ self.is_trained = False
51
+
52
+ def fetch_yfinance_data(self, start_date='2022-09-12', end_date='2025-09-08'):
53
+ """從 yfinance 獲取所需的市場數據"""
54
+ print("正在從 yfinance 獲取數據...")
55
+
56
+ # 定義股票代碼
57
+ symbols = {
58
+ 'TAIEX': '^TWII', # 台股指數
59
+ 'DJI': '^DJI', # 道瓊工業指數
60
+ 'NAS': '^IXIC', # 納斯達克
61
+ 'SOX': '^SOX', # 費城半導體指數
62
+ 'SP500': '^GSPC', # 標普500
63
+ 'TSM_ADR': 'TSM' # 台積電ADR
64
+ }
65
+
66
+ data_dict = {}
67
+
68
+ for name, symbol in symbols.items():
69
+ try:
70
+ stock = yf.Ticker(symbol)
71
+ hist = stock.history(start=start_date, end=end_date)
72
+ if not hist.empty:
73
+ data_dict[name] = hist
74
+ print(f"成功獲取 {name} 數據: {len(hist)} 筆記錄")
75
+ else:
76
+ print(f"警告:無法獲取 {name} 數據")
77
+ except Exception as e:
78
+ print(f"錯誤:獲取 {name} 數據時發生錯誤: {e}")
79
+
80
+ return data_dict
81
+
82
+ def load_economic_data(self):
83
+ """載入經濟數據檔案"""
84
+ economic_data = {}
85
+
86
+ # 載入景氣燈號
87
+ try:
88
+ if os.path.exists('business_climate.csv'):
89
+ bc_df = pd.read_csv('business_climate.csv')
90
+ if len(bc_df.columns) >= 2:
91
+ bc_df.columns = ['Date', 'business_climate']
92
+ # 統一時區處理
93
+ bc_df['Date'] = pd.to_datetime(bc_df['Date'], errors='coerce').dt.tz_localize(None)
94
+ bc_df = bc_df.dropna(subset=['Date'])
95
+ bc_df.set_index('Date', inplace=True)
96
+ economic_data['business_climate'] = bc_df
97
+ print(f"成功載入景氣燈號數據: {len(bc_df)} 筆記錄")
98
+ except Exception as e:
99
+ print(f"載入景氣燈號數據時發生錯誤: {e}")
100
+
101
+ # 載入 PMI 數據
102
+ try:
103
+ if os.path.exists('taiwan_pmi.csv'):
104
+ pmi_df = pd.read_csv('taiwan_pmi.csv')
105
+ if len(pmi_df.columns) >= 2:
106
+ pmi_df.columns = ['Date', 'PMI']
107
+ # 統一時區處理
108
+ pmi_df['Date'] = pd.to_datetime(pmi_df['Date'], errors='coerce').dt.tz_localize(None)
109
+ pmi_df = pmi_df.dropna(subset=['Date'])
110
+ pmi_df.set_index('Date', inplace=True)
111
+ economic_data['PMI'] = pmi_df
112
+ print(f"成功載入 PMI 數據: {len(pmi_df)} 筆記錄")
113
+ except Exception as e:
114
+ print(f"載入 PMI 數據時發生錯誤: {e}")
115
+
116
+ return economic_data
117
+
118
+ def calculate_technical_indicators(self, df):
119
+ """計算技術指標"""
120
+ if df.empty:
121
+ return df
122
+
123
+ # 確保有足夠的數據計算技術指標
124
+ if len(df) < 60:
125
+ return pd.DataFrame()
126
+
127
+ try:
128
+ # 基本指標
129
+ df['volume'] = df['Volume']
130
+ df['rate'] = df['Close'].pct_change()
131
+
132
+ # RSI
133
+ delta = df['Close'].diff()
134
+ gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
135
+ loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
136
+ rs = gain / loss
137
+ df['RSI'] = 100 - (100 / (1 + rs))
138
+
139
+ # MACD
140
+ exp1 = df['Close'].ewm(span=12).mean()
141
+ exp2 = df['Close'].ewm(span=26).mean()
142
+ df['MACD'] = exp1 - exp2
143
+ df['MACDsign'] = df['MACD'].ewm(span=9).mean()
144
+ df['MACDvol'] = df['MACD'] - df['MACDsign']
145
+
146
+ # KD 指標
147
+ low_min = df['Low'].rolling(window=9).min()
148
+ high_max = df['High'].rolling(window=9).max()
149
+ rsv = (df['Close'] - low_min) / (high_max - low_min) * 100
150
+ df['K'] = rsv.ewm(com=2).mean()
151
+ df['D'] = df['K'].ewm(com=2).mean()
152
+
153
+ # DMI 指標
154
+ df['up_move'] = df['High'] - df['High'].shift(1)
155
+ df['down_move'] = df['Low'].shift(1) - df['Low']
156
+ df['+DM'] = np.where((df['up_move'] > df['down_move']) & (df['up_move'] > 0), df['up_move'], 0)
157
+ df['-DM'] = np.where((df['down_move'] > df['up_move']) & (df['down_move'] > 0), df['down_move'], 0)
158
+ df['TR'] = np.max([df['High'] - df['Low'],
159
+ abs(df['High'] - df['Close'].shift(1)),
160
+ abs(df['Low'] - df['Close'].shift(1))], axis=0)
161
+
162
+ df['+DI'] = (df['+DM'].ewm(com=13).mean() / df['TR'].ewm(com=13).mean()) * 100
163
+ df['-DI'] = (df['-DM'].ewm(com=13).mean() / df['TR'].ewm(com=13).mean()) * 100
164
+ df['DX'] = abs(df['+DI'] - df['-DI']) / (df['+DI'] + df['-DI']) * 100
165
+ df['ADX'] = df['DX'].ewm(com=13).mean()
166
+
167
+ except Exception as e:
168
+ print(f"計算技術指標時發生錯誤: {e}")
169
+ return pd.DataFrame()
170
+
171
+ return df
172
+
173
+ def prepare_training_data(self):
174
+ """準備訓練數據"""
175
+ print("開始準備訓練數據...")
176
+
177
+ # 獲取市場數據
178
+ market_data = self.fetch_yfinance_data()
179
+ economic_data = self.load_economic_data()
180
+
181
+ if 'TAIEX' not in market_data:
182
+ print("錯誤:無法獲取台股指數數據")
183
+ return None, None
184
+
185
+ # 以台股指數為主要數據
186
+ main_df = market_data['TAIEX'].copy()
187
+ # 統一時區處理 - 移除時區資訊
188
+ main_df.index = main_df.index.tz_localize(None)
189
+
190
+ main_df = self.calculate_technical_indicators(main_df)
191
+
192
+ if main_df.empty:
193
+ print("錯誤:技術指標計算失敗")
194
+ return None, None
195
+
196
+ # 合併其他市場數據
197
+ for name, data in market_data.items():
198
+ if name != 'TAIEX' and not data.empty:
199
+ # 統一時區處理
200
+ data.index = data.index.tz_localize(None)
201
+
202
+ # 重新命名欄位以避免衝突
203
+ if name == 'DJI':
204
+ main_df['DJI'] = data['Close'].reindex(main_df.index)
205
+ elif name == 'NAS':
206
+ main_df['NAS'] = data['Close'].reindex(main_df.index)
207
+ elif name == 'SOX':
208
+ main_df['SOX'] = data['Close'].reindex(main_df.index)
209
+ elif name == 'SP500':
210
+ main_df['SP500'] = data['Close'].reindex(main_df.index)
211
+ elif name == 'TSM_ADR':
212
+ main_df['TSM_ADR'] = data['Close'].reindex(main_df.index)
213
+
214
+ # 合併經濟數據
215
+ for name, data in economic_data.items():
216
+ if name == 'business_climate':
217
+ main_df['business_climate'] = data['business_climate'].reindex(main_df.index, method='ffill')
218
+ elif name == 'PMI':
219
+ main_df['PMI'] = data['PMI'].reindex(main_df.index, method='ffill')
220
+
221
+ # 創建未來價格標籤
222
+ close_prices = main_df['Close']
223
+ for days in [1, 5, 10, 20, 60]:
224
+ main_df[f'close_{days}d'] = close_prices.shift(-days)
225
+
226
+ # 選擇特徵欄位
227
+ feature_columns = []
228
+ for feature in self.feature_names:
229
+ if feature in main_df.columns:
230
+ feature_columns.append(feature)
231
+ else:
232
+ print(f"警告:特徵 {feature} 不存在,使用預設值 0")
233
+ main_df[feature] = 0 # 使用預設值
234
+ feature_columns.append(feature)
235
+
236
+ # 移除包含 NaN 的行
237
+ print(f"處理前數據量: {len(main_df)}")
238
+ main_df = main_df.dropna()
239
+ print(f"處理後數據量: {len(main_df)}")
240
+
241
+ if len(main_df) < self.sequence_length + 60: # 需要足夠的數據
242
+ print("錯誤:數據量不足以進行訓練")
243
+ return None, None
244
+
245
+ # 準備特徵和標籤
246
+ X = main_df[feature_columns].values
247
+ y = main_df[self.target_names].values
248
+
249
+ print(f"數據準備完成:X shape: {X.shape}, y shape: {y.shape}")
250
+ return X, y
251
+
252
+ def create_sequences(self, X, y):
253
+ """創建時間序列序列"""
254
+ X_seq, y_seq = [], []
255
+
256
+ for i in range(self.sequence_length, len(X)):
257
+ X_seq.append(X[i-self.sequence_length:i])
258
+ y_seq.append(y[i])
259
+
260
+ return np.array(X_seq), np.array(y_seq)
261
+
262
+ def build_model(self, input_shape, output_shape):
263
+ """建立進階LSTM模型"""
264
+ if tf is None:
265
+ raise ImportError("TensorFlow 未安裝,無法建立模型")
266
+
267
+ model = Sequential([
268
+ # 第一層 Bidirectional LSTM
269
+ Bidirectional(LSTM(128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
270
+ input_shape=input_shape),
271
+ BatchNormalization(),
272
+
273
+ # 第二層 LSTM
274
+ LSTM(64, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
275
+ BatchNormalization(),
276
+
277
+ # 第三層 LSTM
278
+ LSTM(32, dropout=0.2, recurrent_dropout=0.2),
279
+ BatchNormalization(),
280
+
281
+ # 全連接層
282
+ Dense(64, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01)),
283
+ Dropout(0.3),
284
+
285
+ Dense(32, activation='relu', kernel_regularizer=l1_l2(l1=0.01, l2=0.01)),
286
+ Dropout(0.2),
287
+
288
+ # 輸出層
289
+ Dense(output_shape, activation='linear')
290
+ ])
291
+
292
+ # 編譯模型
293
+ model.compile(
294
+ optimizer=Adam(learning_rate=0.001),
295
+ loss='huber',
296
+ metrics=['mae', 'mse']
297
+ )
298
+
299
+ return model
300
+
301
+ def train(self, epochs=100, batch_size=32, validation_split=0.2):
302
+ """訓練模型"""
303
+ print("開始訓練模型...")
304
+
305
+ # 準備數據
306
+ X, y = self.prepare_training_data()
307
+ if X is None or y is None:
308
+ print("錯誤:無法準備訓練數據")
309
+ return False
310
+
311
+ # 數據標準化
312
+ X_scaled = self.scaler_X.fit_transform(X)
313
+ y_scaled = self.scaler_y.fit_transform(y)
314
+
315
+ # 創建序列
316
+ X_seq, y_seq = self.create_sequences(X_scaled, y_scaled)
317
+
318
+ if len(X_seq) == 0:
319
+ print("錯誤:無法創建有效序列")
320
+ return False
321
+
322
+ print(f"訓練數據形狀:X_seq: {X_seq.shape}, y_seq: {y_seq.shape}")
323
+
324
+ # 建立模型
325
+ self.model = self.build_model(
326
+ input_shape=(X_seq.shape[1], X_seq.shape[2]),
327
+ output_shape=y_seq.shape[1]
328
+ )
329
+
330
+ print("模型架構:")
331
+ self.model.summary()
332
+
333
+ # 設定回調函數
334
+ callbacks = [
335
+ EarlyStopping(patience=15, restore_best_weights=True, monitor='val_loss'),
336
+ ReduceLROnPlateau(factor=0.5, patience=8, min_lr=0.0001, monitor='val_loss'),
337
+ ModelCheckpoint(f'{self.model_name}.keras', save_best_only=True, monitor='val_loss')
338
+ ]
339
+
340
+ # 訓練模型
341
+ history = self.model.fit(
342
+ X_seq, y_seq,
343
+ epochs=epochs,
344
+ batch_size=batch_size,
345
+ validation_split=validation_split,
346
+ callbacks=callbacks,
347
+ verbose=1
348
+ )
349
+
350
+ # 儲存模型和縮放器
351
+ self.save_model()
352
+
353
+ # 評估模型
354
+ self.evaluate_model(X_seq, y_seq, validation_split)
355
+
356
+ self.is_trained = True
357
+ print("模型訓練完成!")
358
+ return True
359
+
360
+ def evaluate_model(self, X_seq, y_seq, validation_split):
361
+ """評估模型性能"""
362
+ print("\n模型評估結果:")
363
+
364
+ # 分割數據
365
+ split_idx = int(len(X_seq) * (1 - validation_split))
366
+ X_val, y_val = X_seq[split_idx:], y_seq[split_idx:]
367
+
368
+ # 預測
369
+ y_pred = self.model.predict(X_val)
370
+
371
+ # 反標準化
372
+ y_val_orig = self.scaler_y.inverse_transform(y_val)
373
+ y_pred_orig = self.scaler_y.inverse_transform(y_pred)
374
+
375
+ # 計算指標
376
+ for i, target in enumerate(self.target_names):
377
+ mae = mean_absolute_error(y_val_orig[:, i], y_pred_orig[:, i])
378
+ mse = mean_squared_error(y_val_orig[:, i], y_pred_orig[:, i])
379
+ r2 = r2_score(y_val_orig[:, i], y_pred_orig[:, i])
380
+ print(f"{target}: MAE={mae:.2f}, MSE={mse:.2f}, R2={r2:.4f}")
381
+
382
+ def save_model(self):
383
+ """儲存模型和縮放器"""
384
+ try:
385
+ if self.model is not None:
386
+ self.model.save(f'{self.model_name}.keras')
387
+ print(f"模型已儲存: {self.model_name}.keras")
388
+
389
+ joblib.dump(self.scaler_X, f'{self.model_name}_scaler_X.pkl')
390
+ joblib.dump(self.scaler_y, f'{self.model_name}_scaler_y.pkl')
391
+ print("縮放器已儲存")
392
+
393
+ except Exception as e:
394
+ print(f"儲存模型時發生錯誤: {e}")
395
+
396
+ def load_model(self):
397
+ """載入已訓練的模型"""
398
+ try:
399
+ if tf is not None and os.path.exists(f'{self.model_name}.keras'):
400
+ self.model = load_model(f'{self.model_name}.keras')
401
+ print("模型載入成功")
402
+
403
+ if os.path.exists(f'{self.model_name}_scaler_X.pkl'):
404
+ self.scaler_X = joblib.load(f'{self.model_name}_scaler_X.pkl')
405
+ print("X 縮放器載入成功")
406
+
407
+ if os.path.exists(f'{self.model_name}_scaler_y.pkl'):
408
+ self.scaler_y = joblib.load(f'{self.model_name}_scaler_y.pkl')
409
+ print("y 縮放器載入成功")
410
+
411
+ self.is_trained = True
412
+ return True
413
+
414
+ except Exception as e:
415
+ print(f"載入模型時發生錯誤: {e}")
416
+ return False
417
+
418
+ def predict(self, predict_days=5):
419
+ """進行預測"""
420
+ if not self.is_trained and not self.load_model():
421
+ print("錯誤:模型未訓練且無法載入已訓練的模型")
422
+ return None
423
+
424
+ if self.model is None:
425
+ print("錯誤:模型未載入")
426
+ return None
427
+
428
+ try:
429
+ # 獲取最新數據
430
+ print("正在獲取最新數據進行預測...")
431
+ market_data = self.fetch_yfinance_data(
432
+ start_date=(datetime.now() - timedelta(days=120)).strftime('%Y-%m-%d'),
433
+ end_date=datetime.now().strftime('%Y-%m-%d')
434
+ )
435
+ economic_data = self.load_economic_data()
436
+
437
+ if 'TAIEX' not in market_data:
438
+ print("錯誤:無法獲取最新台股數據")
439
+ return None
440
+
441
+ # 處理數據(與訓練時相同的流程)
442
+ main_df = market_data['TAIEX'].copy()
443
+ # 統一時區處理
444
+ main_df.index = main_df.index.tz_localize(None)
445
+
446
+ main_df = self.calculate_technical_indicators(main_df)
447
+
448
+ if main_df.empty or len(main_df) < self.sequence_length:
449
+ print("錯誤:數據不足以進行預測")
450
+ return None
451
+
452
+ # 合併其他數據
453
+ for name, data in market_data.items():
454
+ if name != 'TAIEX' and not data.empty:
455
+ # 統一時區處理
456
+ data.index = data.index.tz_localize(None)
457
+
458
+ if name == 'DJI':
459
+ main_df['DJI'] = data['Close'].reindex(main_df.index)
460
+ elif name == 'NAS':
461
+ main_df['NAS'] = data['Close'].reindex(main_df.index)
462
+ elif name == 'SOX':
463
+ main_df['SOX'] = data['Close'].reindex(main_df.index)
464
+ elif name == 'SP500':
465
+ main_df['SP500'] = data['Close'].reindex(main_df.index)
466
+ elif name == 'TSM_ADR':
467
+ main_df['TSM_ADR'] = data['Close'].reindex(main_df.index)
468
+
469
+ for name, data in economic_data.items():
470
+ if name == 'business_climate':
471
+ main_df['business_climate'] = data['business_climate'].reindex(main_df.index, method='ffill')
472
+ elif name == 'PMI':
473
+ main_df['PMI'] = data['PMI'].reindex(main_df.index, method='ffill')
474
+
475
+ # 填充缺失特徵
476
+ for feature in self.feature_names:
477
+ if feature not in main_df.columns:
478
+ main_df[feature] = 0
479
+
480
+ # 使用 fillna 替代已棄用的 method 參數
481
+ main_df = main_df.fillna(method='ffill').fillna(0)
482
+
483
+ # 準備預測數據
484
+ X = main_df[self.feature_names].values
485
+ if len(X) < self.sequence_length:
486
+ print("錯誤:歷史數據不足")
487
+ return None
488
+
489
+ # 使用最後的sequence_length天數據
490
+ X_recent = X[-self.sequence_length:]
491
+ X_scaled = self.scaler_X.transform(X_recent.reshape(1, -1))
492
+ X_scaled = X_scaled.reshape(1, self.sequence_length, -1)
493
+
494
+ # 進行預測
495
+ y_pred_scaled = self.model.predict(X_scaled)
496
+ y_pred = self.scaler_y.inverse_transform(y_pred_scaled)
497
+
498
+ # 獲取當前價格
499
+ current_price = main_df['Close'].iloc[-1]
500
+
501
+ # 根據預測天數選擇對應的預測值
502
+ day_mapping = {1: 0, 5: 1, 10: 2, 20: 3, 60: 4}
503
+
504
+ if predict_days in day_mapping:
505
+ predicted_price = y_pred[0][day_mapping[predict_days]]
506
+ change_pct = ((predicted_price - current_price) / current_price) * 100
507
+
508
+ # 計算信心度(簡化版本)
509
+ confidence = min(0.9, max(0.6, 1 - abs(change_pct) / 100))
510
+
511
+ result = {
512
+ 'predicted_price': float(predicted_price),
513
+ 'change_pct': float(change_pct),
514
+ 'confidence': float(confidence),
515
+ 'current_price': float(current_price),
516
+ 'prediction_days': predict_days
517
+ }
518
+
519
+ print(f"預測結果:{predict_days}天後價格 = {predicted_price:.2f}, 變化 = {change_pct:+.2f}%")
520
+ return result
521
+ else:
522
+ print(f"不支援的預測天數:{predict_days}")
523
+ return None
524
+
525
+ except Exception as e:
526
+ print(f"預測時發生錯誤: {e}")
527
+ return None
528
+
529
+ # 全域預測器實例
530
+ _predictor = None
531
+
532
+ def get_predictor():
533
+ """獲取全域預測器實例"""
534
+ global _predictor
535
+ if _predictor is None:
536
+ _predictor = AdvancedStockPredictor()
537
+ return _predictor
538
+
539
+ def advanced_lstm_predict(predict_days=5):
540
+ """
541
+ 供 HUGING_FACE_V4.2 調用的預測函數
542
+
543
+ Args:
544
+ predict_days (int): 預測天數 (1, 5, 10, 20, 60)
545
+
546
+ Returns:
547
+ dict or None: 預測結果字典,包含 predicted_price, change_pct, confidence
548
+ """
549
+ try:
550
+ predictor = get_predictor()
551
+ return predictor.predict(predict_days)
552
+ except Exception as e:
553
+ print(f"advanced_lstm_predict 錯誤: {e}")
554
+ return None
555
+
556
+ def train_model():
557
+ """
558
+ 訓練模型的主函數
559
+ """
560
+ print("開始訓練進階LSTM模型...")
561
+ predictor = AdvancedStockPredictor()
562
+
563
+ if predictor.train(epochs=50, batch_size=16):
564
+ print("模型訓練成功!")
565
+ return True
566
+ else:
567
+ print("模型訓練失敗!")
568
+ return False
569
+
570
+ if __name__ == "__main__":
571
+ # 直接執行時進行模型訓練
572
+ train_model()
taiwan_stock_predictor.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d07d2fae4191bf2bf62428fdb11f4350198209e9dea796d2e19afc06a496a10b
3
+ size 3088553
taiwan_stock_predictor_scaler_X.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dca04b01833f094cd3c49d51169274dcd35442683bf9a5ed504888b0030fcb69
3
+ size 791
taiwan_stock_predictor_scaler_y.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcdbf13c4e4ba400326b2611b2620ebc8a5437adc25e89d99c8b9a63dd6ef9fb
3
+ size 719