AlanRex commited on
Commit
0c9a985
·
verified ·
1 Parent(s): 5c15c56

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +50 -106
  2. model_predictor.py +237 -159
app.py CHANGED
@@ -854,66 +854,43 @@ def simple_statistical_predict(data, predict_days=5):
854
 
855
  def calculate_new_features(df):
856
  """
857
- 【修正版】使用與 XGBoost 訓練時完全相同的特徵工程
858
- 完全對應 xgboost_for_stock 中的 create_new_features 函數
859
  """
860
  if df.empty:
861
  return df
862
 
863
- # 1. return_t-1 前一日報酬率
864
  df['return_t-1'] = df['Close'].pct_change()
865
 
866
- # 2. return_t-5 過去 5 日累積報酬率
867
  df['return_t-5'] = (df['Close'] / df['Close'].shift(5) - 1)
868
 
869
- # 3. MA5_close 5 日移動平均價
870
  df['MA5_close'] = df['Close'].rolling(window=5).mean()
871
 
872
- # 4. volatility_5d 5 日報酬標準差(短期波動)
 
 
 
873
  df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
874
 
875
- # 5. volume_ratio_5d 今日成交量 ÷ 5 日均量
876
  df['volume_5d_avg'] = df['Volume'].rolling(window=5).mean()
877
  df['volume_ratio_5d'] = df['Volume'] / df['volume_5d_avg']
878
 
879
- # 6. MACD_diff MACD - signal(趨勢強弱)
880
- if 'MACD' in df.columns and 'MACD_Signal' in df.columns:
881
- df['MACD_diff'] = df['MACD'] - df['MACD_Signal']
882
- else:
883
- # 計算 MACD
884
- exp1 = df['Close'].ewm(span=12).mean()
885
- exp2 = df['Close'].ewm(span=26).mean()
886
- macd_line = exp1 - exp2
887
- signal_line = macd_line.ewm(span=9).mean()
888
- df['MACD_diff'] = macd_line - signal_line
889
-
890
- # 7. MACDvol — 【修正】對應訓練資料中的 MACDvol 欄位
891
- if 'MACDvol' in df.columns:
892
- df['MACDvol'] = df['MACDvol']
893
- else:
894
- df['MACDvol'] = df['MACD_diff'] # 使用 MACD_diff 作為 MACDvol
895
-
896
- # 8. RSI_14 — 14 日 RSI 指標
897
- if 'RSI' in df.columns:
898
- df['RSI_14'] = df['RSI']
899
- else:
900
- # 計算 RSI
901
- delta = df['Close'].diff()
902
- gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
903
- loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
904
- rs = gain / loss
905
- df['RSI_14'] = 100 - (100 / (1 + rs))
906
-
907
- # 9. ADX 指標(從現有技術指標中獲取)
908
- if 'ADX' not in df.columns:
909
- # 如果沒有ADX,計算簡化版本或設置預設值
910
- df['ADX'] = 25 # 預設中性值
911
-
912
- # 10. volume_weighted_return — 當日報酬率絕對值 × 當日成交量
913
- df['volume_weighted_return'] = abs(df['return_t-1']) * df['Volume']
914
 
915
- # 11. close(當前收盤價)
916
- df['close'] = df['Close']
 
 
 
 
917
 
918
  # 移除輔助欄位
919
  if 'volume_5d_avg' in df.columns:
@@ -922,27 +899,23 @@ def calculate_new_features(df):
922
  return df
923
 
924
  def advanced_xgboost_predict(predict_days=5):
925
- """
926
- 【修正版】使用 XGBoost 模型進行預測 - 與訓練模型完全一致
927
- """
928
  try:
929
  print(f"開始XGBoost預測 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
930
 
931
  xgb_model = XGBoostModel()
932
 
933
- # 強制重新獲取台指數據
934
  print("正在獲取最新台指數據...")
935
  taiex_data = get_stock_data('^TWII', '2y')
936
  if taiex_data.empty or len(taiex_data) < 60:
937
  print("台指期數據不足,無法進行XGBoost預測")
938
  return None
939
 
940
- # 計算技術指標
941
  taiex_data = calculate_technical_indicators(taiex_data)
942
- # 【修正】使用新的特徵工程函數
943
  taiex_data = calculate_new_features(taiex_data)
944
 
945
- # 獲取美股數據
946
  print("正在獲取美股數據...")
947
  us_market_data = get_us_market_data()
948
 
@@ -962,22 +935,14 @@ def advanced_xgboost_predict(predict_days=5):
962
  latest_data = taiex_data.iloc[-1]
963
  yesterday_close = latest_data['Close']
964
 
965
- # 【修正】使用與訓練時完全相同的特徵列表
966
  new_feature_columns = [
967
- 'close', # 前一日收盤價
968
- 'return_t-1', # 前一日漲跌率
969
- 'return_t-5', # 過去 5 日累積漲跌率
970
- 'MA5_close', # 5 日移動平均價
971
- 'volatility_5d', # 5 日漲跌標準差
972
- 'volume_ratio_5d', # 今日成交量 ÷ 5 日均量
973
- 'MACD_diff', # MACD - signal
974
- 'dji_return_t-1', # 前一日道瓊指數漲跌率
975
- 'sox_return_t-1', # 前一日費半指數漲跌率
976
- 'NEWS', # 新聞情緒分數
977
- 'MACDvol', # MACD成交量
978
- 'RSI_14', # 14日RSI
979
- 'ADX', # ADX指標
980
- 'volume_weighted_return' # 成交量加權報酬率
981
  ]
982
 
983
  # 獲取美股報酬率
@@ -1005,49 +970,31 @@ def advanced_xgboost_predict(predict_days=5):
1005
  feature_names = []
1006
 
1007
  for feature in new_feature_columns:
1008
- if feature == 'dji_return_t-1':
1009
- features_list.append(dji_return)
1010
- elif feature == 'sox_return_t-1':
1011
- features_list.append(sox_return)
1012
- elif feature == 'NEWS':
1013
- features_list.append(sentiment_score_raw)
1014
- elif feature in latest_data.index:
1015
  value = latest_data[feature]
1016
  if pd.isna(value):
1017
- # 設置預設值
1018
- if 'return' in feature:
1019
- default_value = 0.0
1020
- elif 'MA' in feature:
1021
- default_value = latest_data['Close'] if not pd.isna(latest_data['Close']) else 100
1022
- elif 'volatility' in feature:
1023
- default_value = 0.02
1024
- elif 'volume_ratio' in feature:
1025
- default_value = 1.0
1026
- elif 'MACD' in feature:
1027
- default_value = 0.0
1028
- elif feature == 'RSI_14':
1029
- default_value = 50.0
1030
- elif feature == 'ADX':
1031
- default_value = 25.0
1032
- elif feature == 'close':
1033
- default_value = yesterday_close
1034
- else:
1035
- default_value = 0.0
1036
 
1037
  features_list.append(default_value)
1038
  else:
1039
  features_list.append(value)
1040
- else:
1041
- # 特徵不存在,設置預設值
1042
- print(f"警告:特徵 {feature} 不存在,使用預設值")
1043
- features_list.append(0.0)
1044
 
1045
- feature_names.append(feature)
 
 
 
 
1046
 
1047
  # 轉換為 DataFrame
1048
  input_df = pd.DataFrame([features_list], columns=feature_names)
1049
 
1050
- print(f"特徵向量長度: {len(features_list)}")
 
1051
  print("\n=== 📊 本次預測輸入特徵 DataFrame ===")
1052
  print(input_df)
1053
  print("=== ✅ 檢查以上特徵是否每次都有變 ===\n")
@@ -1058,7 +1005,7 @@ def advanced_xgboost_predict(predict_days=5):
1058
  if predictions is None:
1059
  return None
1060
 
1061
- # 【修正】處理預測結果 - 現在返回的已經是漲幅百分比
1062
  pred_mapping = {
1063
  1: 'Change_pct_t1_pred',
1064
  5: 'Change_pct_t5_pred',
@@ -1078,7 +1025,7 @@ def advanced_xgboost_predict(predict_days=5):
1078
 
1079
  return {
1080
  'predicted_price': predicted_price,
1081
- 'change_pct': predicted_change_pct, # 這已經是百分比格式
1082
  'confidence': 0.75
1083
  }
1084
 
@@ -1126,9 +1073,8 @@ def get_prediction(data, predict_days=5):
1126
  return simple_statistical_predict(data, predict_days)
1127
 
1128
  def calculate_technical_indicators(df):
1129
- """【修正版】計算技術指標,確保與模型訓練時一致"""
1130
- if df.empty:
1131
- return df
1132
 
1133
  # 移動平均線
1134
  df['MA5'] = df['Close'].rolling(window=5).mean()
@@ -1171,9 +1117,7 @@ def calculate_technical_indicators(df):
1171
  df['down_move'] = df['Low'].shift(1) - df['Low']
1172
  df['+DM'] = np.where((df['up_move'] > df['down_move']) & (df['up_move'] > 0), df['up_move'], 0)
1173
  df['-DM'] = np.where((df['down_move'] > df['up_move']) & (df['down_move'] > 0), df['down_move'], 0)
1174
- df['TR'] = np.max([df['High'] - df['Low'],
1175
- abs(df['High'] - df['Close'].shift(1)),
1176
- abs(df['Low'] - df['Close'].shift(1))], axis=0)
1177
  df['+DI'] = (df['+DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
1178
  df['-DI'] = (df['-DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
1179
  df['DX'] = abs(df['+DI'] - df['-DI']) / (df['+DI'] + df['-DI']) * 100
 
854
 
855
  def calculate_new_features(df):
856
  """
857
+ 計算新的技術指標特徵 - 針對新特徵需求
 
858
  """
859
  if df.empty:
860
  return df
861
 
862
+ # 1. return_t-1 前一日報酬率
863
  df['return_t-1'] = df['Close'].pct_change()
864
 
865
+ # 2. return_t-5 過去 5 日累積報酬率
866
  df['return_t-5'] = (df['Close'] / df['Close'].shift(5) - 1)
867
 
868
+ # 3. MA5_close 5 日移動平均價
869
  df['MA5_close'] = df['Close'].rolling(window=5).mean()
870
 
871
+ # 4. MA20_close 20 日移動平均價
872
+ df['MA20_close'] = df['Close'].rolling(window=20).mean()
873
+
874
+ # 5. volatility_5d – 5 日報酬標準差(短期波動)
875
  df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
876
 
877
+ # 6. volume_ratio_5d 今日成交量 ÷ 5 日均量
878
  df['volume_5d_avg'] = df['Volume'].rolling(window=5).mean()
879
  df['volume_ratio_5d'] = df['Volume'] / df['volume_5d_avg']
880
 
881
+ # 7. RSI_14 14 RSI 指標
882
+ delta = df['Close'].diff()
883
+ gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
884
+ loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
885
+ rs = gain / loss
886
+ df['RSI_14'] = 100 - (100 / (1 + rs))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
887
 
888
+ # 8. MACD_diff – MACD - signal(趨勢強弱)
889
+ exp1 = df['Close'].ewm(span=12).mean()
890
+ exp2 = df['Close'].ewm(span=26).mean()
891
+ macd_line = exp1 - exp2
892
+ signal_line = macd_line.ewm(span=9).mean()
893
+ df['MACD_diff'] = macd_line - signal_line
894
 
895
  # 移除輔助欄位
896
  if 'volume_5d_avg' in df.columns:
 
899
  return df
900
 
901
  def advanced_xgboost_predict(predict_days=5):
902
+ """使用 XGBoost 模型進行預測 - 強制刷新數據版本"""
 
 
903
  try:
904
  print(f"開始XGBoost預測 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
905
 
906
  xgb_model = XGBoostModel()
907
 
908
+ # 強制重新獲取台指數據 - 不使用緩存
909
  print("正在獲取最新台指數據...")
910
  taiex_data = get_stock_data('^TWII', '2y')
911
  if taiex_data.empty or len(taiex_data) < 60:
912
  print("台指期數據不足,無法進行XGBoost預測")
913
  return None
914
 
 
915
  taiex_data = calculate_technical_indicators(taiex_data)
 
916
  taiex_data = calculate_new_features(taiex_data)
917
 
918
+ # 強制重新獲取美股數據
919
  print("正在獲取美股數據...")
920
  us_market_data = get_us_market_data()
921
 
 
935
  latest_data = taiex_data.iloc[-1]
936
  yesterday_close = latest_data['Close']
937
 
938
+ # 特徵列表保持不變
939
  new_feature_columns = [
940
+ 'return_t-1',
941
+ 'return_t-5',
942
+ 'MA5_close',
943
+ 'volatility_5d',
944
+ 'volume_ratio_5d',
945
+ 'MACD_diff',
 
 
 
 
 
 
 
 
946
  ]
947
 
948
  # 獲取美股報酬率
 
970
  feature_names = []
971
 
972
  for feature in new_feature_columns:
973
+ if feature in latest_data.index:
 
 
 
 
 
 
974
  value = latest_data[feature]
975
  if pd.isna(value):
976
+ if 'return' in feature: default_value = 0.0
977
+ elif 'MA' in feature: default_value = latest_data['Close'] if not pd.isna(latest_data['Close']) else 100
978
+ elif 'volatility' in feature: default_value = 0.02
979
+ elif 'volume_ratio' in feature: default_value = 1.0
980
+ elif 'MACD' in feature: default_value = 0.0
981
+ else: default_value = 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
982
 
983
  features_list.append(default_value)
984
  else:
985
  features_list.append(value)
 
 
 
 
986
 
987
+ feature_names.append(feature)
988
+
989
+ # 添加其他特徵
990
+ features_list.extend([dji_return, sox_return, yesterday_close, sentiment_score_raw])
991
+ feature_names.extend(['dji_return_t-1', 'sox_return_t-1', 'close', 'NEWS'])
992
 
993
  # 轉換為 DataFrame
994
  input_df = pd.DataFrame([features_list], columns=feature_names)
995
 
996
+ print(f"特徵向量: {[f'{f:.4f}' for f in features_list[:5]]}...") # 只顯示前5個
997
+ # 🔍 新增這段:完整印出本次預測輸入資料
998
  print("\n=== 📊 本次預測輸入特徵 DataFrame ===")
999
  print(input_df)
1000
  print("=== ✅ 檢查以上特徵是否每次都有變 ===\n")
 
1005
  if predictions is None:
1006
  return None
1007
 
1008
+ # 處理預測結果
1009
  pred_mapping = {
1010
  1: 'Change_pct_t1_pred',
1011
  5: 'Change_pct_t5_pred',
 
1025
 
1026
  return {
1027
  'predicted_price': predicted_price,
1028
+ 'change_pct': predicted_change_pct,
1029
  'confidence': 0.75
1030
  }
1031
 
 
1073
  return simple_statistical_predict(data, predict_days)
1074
 
1075
  def calculate_technical_indicators(df):
1076
+ """計算技術指標"""
1077
+ if df.empty: return df
 
1078
 
1079
  # 移動平均線
1080
  df['MA5'] = df['Close'].rolling(window=5).mean()
 
1117
  df['down_move'] = df['Low'].shift(1) - df['Low']
1118
  df['+DM'] = np.where((df['up_move'] > df['down_move']) & (df['up_move'] > 0), df['up_move'], 0)
1119
  df['-DM'] = np.where((df['down_move'] > df['up_move']) & (df['down_move'] > 0), df['down_move'], 0)
1120
+ df['TR'] = np.max([df['High'] - df['Low'], abs(df['High'] - df['Close'].shift(1)), abs(df['Low'] - df['Close'].shift(1))], axis=0)
 
 
1121
  df['+DI'] = (df['+DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
1122
  df['-DI'] = (df['-DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
1123
  df['DX'] = abs(df['+DI'] - df['-DI']) / (df['+DI'] + df['-DI']) * 100
model_predictor.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import os
2
  import pandas as pd
3
  import numpy as np
@@ -11,30 +14,26 @@ class XGBoostModel:
11
  """
12
  初始化 XGBoost 模型預測器
13
 
14
- 【修正】與訓練檔案 xgboost_for_stock 完全一致的特徵欄位
 
 
15
  """
16
  self.model = None
17
  self.scaler = None
18
-
19
- # 【修正】使用與訓練時完全相同的特徵欄位順序
20
  self.feature_columns = [
21
- 'close', # 前一日收盤價
22
- 'return_t-1', # 前一日漲跌率
23
- 'return_t-5', # 過去 5 日累積漲跌率
24
- 'MA5_close', # 5 日移動平均價
25
- 'volatility_5d', # 5 日漲跌標準差
26
- 'volume_ratio_5d', # 今日成交量 ÷ 5 日均量
27
- 'MACD_diff', # MACD - signal
28
- 'dji_return_t-1', # 前一日道瓊指數漲跌率
29
- 'sox_return_t-1', # 前一日費半指數漲跌率
30
- 'NEWS', # 新聞情緒分數
31
- 'MACDvol', # MACD 成交量
32
- 'RSI_14', # 14日RSI
33
- 'ADX', # ADX指標
34
- 'volume_weighted_return' # 成交量加權報酬率
35
  ]
36
 
37
- # 輸出目標對應表(漲幅百分比)
38
  self.output_targets = {
39
  1: 'Change_pct_t1_pred', # 1天後漲幅%
40
  5: 'Change_pct_t5_pred', # 5天後漲幅%
@@ -43,125 +42,64 @@ class XGBoostModel:
43
  }
44
 
45
  print("XGBoost 模型預測器初始化完成")
46
- print(f"特徵數量:{len(self.feature_columns)}")
47
  print("輸出格式:漲幅百分比 (1日, 5日, 10日, 20日)")
48
 
49
  def load_model(self, model_path):
50
- """載入預訓練的 XGBoost 模型"""
 
 
 
 
 
 
 
 
51
  try:
 
52
  if not os.path.exists(model_path):
53
  print(f"錯誤:找不到模型檔案 {model_path}")
54
  return False
55
 
 
56
  self.model = xgb.XGBRegressor()
57
  self.model.load_model(model_path)
58
 
59
  print(f"成功載入模型:{model_path}")
 
 
60
  return True
61
 
62
  except Exception as e:
63
  print(f"載入模型時發生錯誤:{e}")
64
  return False
65
 
66
- def create_new_features(self, df):
67
- """
68
- 【修正】使用與訓練時完全相同的特徵工程函數
69
- 完全對應 xgboost_for_stock 中的 create_new_features 函數
70
- """
71
- # 1. return_t-1 — 前一日報酬率
72
- df['return_t-1'] = df['Close'].pct_change()
73
-
74
- # 2. return_t-5 — 過去 5 日累積報酬率
75
- df['return_t-5'] = (df['Close'] / df['Close'].shift(5) - 1)
76
-
77
- # 3. MA5_close — 5 日移動平均價
78
- df['MA5_close'] = df['Close'].rolling(window=5).mean()
79
-
80
- # 4. volatility_5d — 5 日報酬標準差(短期波動)
81
- df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
82
-
83
- # 5. volume_ratio_5d — 今日成交量 ÷ 5 日均量
84
- df['volume_5d_avg'] = df['Volume'].rolling(window=5).mean()
85
- df['volume_ratio_5d'] = df['Volume'] / df['volume_5d_avg']
86
-
87
- # 6. MACD_diff — MACD - signal(趨勢強弱)
88
- if 'MACD' in df.columns and 'MACD_Signal' in df.columns:
89
- df['MACD_diff'] = df['MACD'] - df['MACD_Signal']
90
- elif 'MACD' in df.columns and 'MACDsign' in df.columns:
91
- # 【修正】支援訓練資料中的欄位名稱
92
- df['MACD_diff'] = df['MACD'] - df['MACDsign']
93
- else:
94
- # 計算 MACD
95
- exp1 = df['Close'].ewm(span=12).mean()
96
- exp2 = df['Close'].ewm(span=26).mean()
97
- macd_line = exp1 - exp2
98
- signal_line = macd_line.ewm(span=9).mean()
99
- df['MACD_diff'] = macd_line - signal_line
100
-
101
- # 7. dji_return_t-1 — 前一日道瓊指數報酬率(需外部提供)
102
- if 'dji_return_t-1' not in df.columns:
103
- df['dji_return_t-1'] = 0 # 預設值,實際使用時由外部傳入
104
-
105
- # 8. sox_return_t-1 — 前一日費半指數報酬率(需外部提供)
106
- if 'sox_return_t-1' not in df.columns:
107
- df['sox_return_t-1'] = 0 # 預設值,實際使用時由外部傳入
108
-
109
- # 9. NEWS — 新聞情緒分數(需外部提供)
110
- if 'NEWS' not in df.columns:
111
- df['NEWS'] = 0 # 預設值,實際使用時由外部傳入
112
-
113
- # 10. MACDvol — 【修正】對應訓練資料中的 MACDvol 欄位
114
- if 'MACDvol' in df.columns:
115
- df['MACDvol'] = df['MACDvol']
116
- elif 'MACD_Histogram' in df.columns:
117
- df['MACDvol'] = df['MACD_Histogram']
118
- else:
119
- df['MACDvol'] = df['MACD_diff'] # 使用 MACD_diff 作為替代
120
-
121
- # 11. RSI_14 — 14 日 RSI 指標
122
- if 'RSI' in df.columns:
123
- df['RSI_14'] = df['RSI']
124
- else:
125
- # 計算 RSI
126
- delta = df['Close'].diff()
127
- gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
128
- loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
129
- rs = gain / loss
130
- df['RSI_14'] = 100 - (100 / (1 + rs))
131
-
132
- # 12. ADX(需要從技術指標中獲取)
133
- if 'ADX' not in df.columns:
134
- df['ADX'] = 50 # 預設值
135
-
136
- # 13. volume_weighted_return — 當日報酬率絕對值 × 當日成交量
137
- df['volume_weighted_return'] = abs(df['return_t-1']) * df['Volume']
138
-
139
- # 14. close(當前收盤價)
140
- df['close'] = df['Close']
141
-
142
- # 移除輔助欄位
143
- if 'volume_5d_avg' in df.columns:
144
- df.drop('volume_5d_avg', axis=1, inplace=True)
145
-
146
- return df
147
 
148
  def preprocess_features(self, input_df):
149
- """預處理特徵數據"""
150
  # 確保特徵齊全
151
  missing_features = [f for f in self.feature_columns if f not in input_df.columns]
152
  if missing_features:
153
  print(f"警告:缺少以下特徵:{missing_features}")
154
  for feature in missing_features:
155
  input_df[feature] = 0
156
-
157
- # 選擇並排序特徵(確保順序與訓練時一致)
158
  input_df = input_df[self.feature_columns].fillna(0)
 
 
159
  return input_df
160
 
161
  def predict(self, model_name, input_df):
162
  """
163
  進行股價漲幅預測
164
 
 
 
 
 
165
  Returns:
166
  dict: 預測結果,包含各時間點的漲幅百分比
167
  """
@@ -172,42 +110,36 @@ class XGBoostModel:
172
  if not self.load_model(model_path):
173
  return None
174
 
 
 
 
 
 
175
  # 預處理特徵
176
  processed_df = self.preprocess_features(input_df.copy())
177
 
178
- print("=== 模型輸入特徵檢查 ===")
179
- print(f"輸入形狀: {processed_df.shape}")
180
- print("前5個特徵值:")
181
- for i, col in enumerate(processed_df.columns[:5]):
182
- print(f" {col}: {processed_df[col].iloc[0]:.6f}")
183
-
184
  # 進行預測
185
  predictions = self.model.predict(processed_df)
186
- print(f"原始預測輸出形狀: {predictions.shape}")
187
- print(f"原始預測值: {predictions}")
188
 
189
- # 【修正】處理多輸出預測結果
190
  if predictions.ndim == 1:
191
- # 單輸出情況 - 只有一個時間點的預測
192
  result = {
193
  'Change_pct_t1_pred': float(predictions[0])
194
  }
195
  else:
196
- # 多輸出情況:[t1, t5, t10, t20] - 對應訓練模型的四個輸出
197
- result = {}
198
- target_keys = ['Change_pct_t1_pred', 'Change_pct_t5_pred',
199
- 'Change_pct_t10_pred', 'Change_pct_t20_pred']
200
-
201
- for i, key in enumerate(target_keys):
202
- if i < predictions.shape[1]:
203
- result[key] = float(predictions[0][i])
204
- else:
205
- result[key] = 0.0
206
 
207
  # 輸出預測結果摘要
208
  print("=== 漲幅預測結果 ===")
209
  for key, value in result.items():
210
- days = key.split('_')[2][1:]
211
  direction = "上漲" if value > 0 else "下跌"
212
  print(f" {days}日後預測: {value:+.2f}% ({direction})")
213
 
@@ -219,19 +151,24 @@ class XGBoostModel:
219
  traceback.print_exc()
220
  return None
221
 
222
- except Exception as e:
223
- print(f"預測過程中發生錯誤:{e}")
224
- import traceback
225
- traceback.print_exc()
226
- return None
227
-
228
  def predict_single_timeframe(self, model_name, input_df, days):
229
- """預測特定時間框架的漲幅"""
 
 
 
 
 
 
 
 
 
 
230
  try:
231
  predictions = self.predict(model_name, input_df)
232
  if predictions is None:
233
  return None
234
 
 
235
  target_key = f'Change_pct_t{days}_pred'
236
 
237
  if target_key in predictions:
@@ -244,54 +181,195 @@ class XGBoostModel:
244
  print(f"單一時間框架預測時發生錯誤:{e}")
245
  return None
246
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  def get_feature_importance(self):
248
- """獲取特徵重要性"""
 
 
 
 
 
249
  try:
250
  if self.model is None:
251
  return None
252
 
 
253
  importance_scores = self.model.feature_importances_
 
 
254
  importance_dict = {}
255
  for i, feature in enumerate(self.feature_columns):
256
  if i < len(importance_scores):
257
  importance_dict[feature] = float(importance_scores[i])
258
 
259
- return dict(sorted(importance_dict.items(),
260
- key=lambda x: x[1],
261
- reverse=True))
 
 
 
262
 
263
  except Exception as e:
264
  print(f"獲取特徵重要性時發生錯誤:{e}")
265
  return None
266
 
267
- # 測試函數
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  if __name__ == "__main__":
 
269
  model = XGBoostModel()
270
 
271
- # 創建測試數據
272
  test_data = pd.DataFrame({
273
- 'Close': [150.0] * 30, # 需要足夠的歷史數據來計算技術指標
274
- 'Volume': [1000000] * 30,
275
- 'High': [152.0] * 30,
276
- 'Low': [148.0] * 30,
277
- 'Open': [149.0] * 30
 
 
 
 
 
278
  })
279
 
280
- # 創建特徵
281
- test_data = model.create_new_features(test_data)
 
282
 
283
- # 手動設置外部特徵
284
- test_data['dji_return_t-1'] = 0.01
285
- test_data['sox_return_t-1'] = 0.015
286
- test_data['NEWS'] = 0.1
287
 
288
- # 取最後一行作為輸入
289
- input_data = test_data.tail(1)
290
-
291
- print("測試特徵工程...")
292
- print("可用特徵:")
293
- for col in model.feature_columns:
294
- if col in input_data.columns:
295
- print(f"✓ {col}: {input_data[col].iloc[0]:.4f}")
296
- else:
297
- print(f"✗ {col}: 缺失")
 
 
 
 
1
+ # model_predictor.py - 支援漲幅百分比輸出的XGBoost模型預測器
2
+ # 修改版本:輸出改為漲幅百分比而非絕對價格
3
+
4
  import os
5
  import pandas as pd
6
  import numpy as np
 
14
  """
15
  初始化 XGBoost 模型預測器
16
 
17
+ 【重要更新】
18
+ - 模型現在輸出漲幅百分比而非絕對價格
19
+ - 支援 1日、5日、10日、20日的漲幅預測
20
  """
21
  self.model = None
22
  self.scaler = None
 
 
23
  self.feature_columns = [
24
+ 'close', # 前一日收盤價
25
+ 'return_t-1', # 前一日報酬率
26
+ 'return_t-5', # 過去 5 日累積報酬率
27
+ 'MA5_close', # 5 日移動平均價
28
+ 'volatility_5d', # 5 日報酬標準差
29
+ 'volume_ratio_5d', # 今日成交量 ÷ 5 日均量
30
+ 'MACD_diff', # MACD - signal
31
+ 'dji_return_t-1', # 前一日道瓊指數報酬率
32
+ 'sox_return_t-1', # 前一日費半指數報酬率
33
+ 'NEWS' # 新聞情緒分數
 
 
 
 
34
  ]
35
 
36
+ # 【新增】輸出目標對應表
37
  self.output_targets = {
38
  1: 'Change_pct_t1_pred', # 1天後漲幅%
39
  5: 'Change_pct_t5_pred', # 5天後漲幅%
 
42
  }
43
 
44
  print("XGBoost 模型預測器初始化完成")
 
45
  print("輸出格式:漲幅百分比 (1日, 5日, 10日, 20日)")
46
 
47
  def load_model(self, model_path):
48
+ """
49
+ 載入預訓練的 XGBoost 模型
50
+
51
+ Args:
52
+ model_path (str): 模型檔案路徑 (.json 格式)
53
+
54
+ Returns:
55
+ bool: 是否成功載入
56
+ """
57
  try:
58
+ # 檢查模型檔案是否存在
59
  if not os.path.exists(model_path):
60
  print(f"錯誤:找不到模型檔案 {model_path}")
61
  return False
62
 
63
+ # 載入 XGBoost 模型
64
  self.model = xgb.XGBRegressor()
65
  self.model.load_model(model_path)
66
 
67
  print(f"成功載入模型:{model_path}")
68
+ print(f"預期特徵數量:{len(self.feature_columns)}")
69
+
70
  return True
71
 
72
  except Exception as e:
73
  print(f"載入模型時發生錯誤:{e}")
74
  return False
75
 
76
+ def load_scaler(self, scaler_path):
77
+ """停用標準化流程"""
78
+ print("⚠️ 已停用標準化:模型使用原始特徵進行預測。")
79
+ self.scaler = None
80
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  def preprocess_features(self, input_df):
 
83
  # 確保特徵齊全
84
  missing_features = [f for f in self.feature_columns if f not in input_df.columns]
85
  if missing_features:
86
  print(f"警告:缺少以下特徵:{missing_features}")
87
  for feature in missing_features:
88
  input_df[feature] = 0
89
+
 
90
  input_df = input_df[self.feature_columns].fillna(0)
91
+
92
+ # ✅ 直接回傳原始特徵
93
  return input_df
94
 
95
  def predict(self, model_name, input_df):
96
  """
97
  進行股價漲幅預測
98
 
99
+ Args:
100
+ model_name (str): 模型名稱(用於載入對應模型)
101
+ input_df (pd.DataFrame): 輸入特徵
102
+
103
  Returns:
104
  dict: 預測結果,包含各時間點的漲幅百分比
105
  """
 
110
  if not self.load_model(model_path):
111
  return None
112
 
113
+ # 載入標準化器(如果存在)
114
+ if self.scaler is None:
115
+ scaler_path = f"{model_name}_scaler.pkl"
116
+ self.load_scaler(scaler_path)
117
+
118
  # 預處理特徵
119
  processed_df = self.preprocess_features(input_df.copy())
120
 
 
 
 
 
 
 
121
  # 進行預測
122
  predictions = self.model.predict(processed_df)
 
 
123
 
124
+ # 【重要修改】將預測結果格式化為漲幅百分比
125
  if predictions.ndim == 1:
126
+ # 如果只有一個輸出,假設是 1 日預測
127
  result = {
128
  'Change_pct_t1_pred': float(predictions[0])
129
  }
130
  else:
131
+ # 多輸出情況:1日, 5日, 10日, 20日
132
+ result = {
133
+ 'Change_pct_t1_pred': float(predictions[0][0]) if len(predictions[0]) > 0 else 0.0,
134
+ 'Change_pct_t5_pred': float(predictions[0][1]) if len(predictions[0]) > 1 else 0.0,
135
+ 'Change_pct_t10_pred': float(predictions[0][2]) if len(predictions[0]) > 2 else 0.0,
136
+ 'Change_pct_t20_pred': float(predictions[0][3]) if len(predictions[0]) > 3 else 0.0
137
+ }
 
 
 
138
 
139
  # 輸出預測結果摘要
140
  print("=== 漲幅預測結果 ===")
141
  for key, value in result.items():
142
+ days = key.split('_')[2][1:] # 提取天數
143
  direction = "上漲" if value > 0 else "下跌"
144
  print(f" {days}日後預測: {value:+.2f}% ({direction})")
145
 
 
151
  traceback.print_exc()
152
  return None
153
 
 
 
 
 
 
 
154
  def predict_single_timeframe(self, model_name, input_df, days):
155
+ """
156
+ 預測特定時間框架的漲幅
157
+
158
+ Args:
159
+ model_name (str): 模型名稱
160
+ input_df (pd.DataFrame): 輸入特徵
161
+ days (int): 預測天數 (1, 5, 10, 20)
162
+
163
+ Returns:
164
+ float: 預測的漲幅百分比
165
+ """
166
  try:
167
  predictions = self.predict(model_name, input_df)
168
  if predictions is None:
169
  return None
170
 
171
+ # 根據天數選擇對應的預測結果
172
  target_key = f'Change_pct_t{days}_pred'
173
 
174
  if target_key in predictions:
 
181
  print(f"單一時間框架預測時發生錯誤:{e}")
182
  return None
183
 
184
+ def get_prediction_confidence(self, input_df):
185
+ """
186
+ 評估預測的信心度
187
+
188
+ Args:
189
+ input_df (pd.DataFrame): 輸入特徵
190
+
191
+ Returns:
192
+ float: 信心度 (0-1)
193
+ """
194
+ try:
195
+ # 基於特徵完整性和質量評估信心度
196
+ feature_completeness = 0
197
+ total_features = len(self.feature_columns)
198
+
199
+ for feature in self.feature_columns:
200
+ if feature in input_df.columns:
201
+ value = input_df[feature].iloc[0]
202
+ if not pd.isna(value) and value != 0:
203
+ feature_completeness += 1
204
+
205
+ completeness_ratio = feature_completeness / total_features
206
+
207
+ # 基於數據質量調整信心度
208
+ base_confidence = max(0.5, completeness_ratio)
209
+
210
+ # 如果重要特徵缺失,降低信心度
211
+ important_features = ['close', 'return_t-1', 'MA5_close']
212
+ missing_important = 0
213
+ for feature in important_features:
214
+ if feature not in input_df.columns or pd.isna(input_df[feature].iloc[0]):
215
+ missing_important += 1
216
+
217
+ if missing_important > 0:
218
+ base_confidence *= (1 - missing_important * 0.1)
219
+
220
+ return min(0.9, max(0.3, base_confidence))
221
+
222
+ except Exception as e:
223
+ print(f"計算信心度時發生錯誤:{e}")
224
+ return 0.5
225
+
226
+ def validate_input(self, input_df):
227
+ """
228
+ 驗證輸入數據的有效性
229
+
230
+ Args:
231
+ input_df (pd.DataFrame): 輸入特徵
232
+
233
+ Returns:
234
+ tuple: (是否有效, 錯誤訊息列表)
235
+ """
236
+ errors = []
237
+
238
+ try:
239
+ # 檢查是否為空
240
+ if input_df.empty:
241
+ errors.append("輸入數據為空")
242
+
243
+ # 檢查必要特徵
244
+ required_features = ['close', 'return_t-1']
245
+ for feature in required_features:
246
+ if feature not in input_df.columns:
247
+ errors.append(f"缺少必要特徵:{feature}")
248
+ elif pd.isna(input_df[feature].iloc[0]):
249
+ errors.append(f"必要特徵包含空值:{feature}")
250
+
251
+ # 檢查數據合理性
252
+ if 'close' in input_df.columns:
253
+ close_price = input_df['close'].iloc[0]
254
+ if close_price <= 0:
255
+ errors.append(f"收盤價不合理:{close_price}")
256
+
257
+ if 'return_t-1' in input_df.columns:
258
+ return_val = input_df['return_t-1'].iloc[0]
259
+ if abs(return_val) > 0.5: # 單日漲跌幅超過50%可能有問題
260
+ errors.append(f"報酬率異常:{return_val:.3f}")
261
+
262
+ return len(errors) == 0, errors
263
+
264
+ except Exception as e:
265
+ errors.append(f"驗證過程發生錯誤:{e}")
266
+ return False, errors
267
+
268
  def get_feature_importance(self):
269
+ """
270
+ 獲取特徵重要性
271
+
272
+ Returns:
273
+ dict: 特徵重要性字典
274
+ """
275
  try:
276
  if self.model is None:
277
  return None
278
 
279
+ # 獲取特徵重要性
280
  importance_scores = self.model.feature_importances_
281
+
282
+ # 創建特徵重要性字典
283
  importance_dict = {}
284
  for i, feature in enumerate(self.feature_columns):
285
  if i < len(importance_scores):
286
  importance_dict[feature] = float(importance_scores[i])
287
 
288
+ # 按重要性排序
289
+ sorted_importance = dict(sorted(importance_dict.items(),
290
+ key=lambda x: x[1],
291
+ reverse=True))
292
+
293
+ return sorted_importance
294
 
295
  except Exception as e:
296
  print(f"獲取特徵重要性時發生錯誤:{e}")
297
  return None
298
 
299
+ def explain_prediction(self, input_df, predictions):
300
+ """
301
+ 解釋預測結果
302
+
303
+ Args:
304
+ input_df (pd.DataFrame): 輸入特徵
305
+ predictions (dict): 預測結果
306
+
307
+ Returns:
308
+ str: 解釋文本
309
+ """
310
+ try:
311
+ explanation = []
312
+ explanation.append("=== 預測解釋 ===")
313
+
314
+ # 分析主要驅動因素
315
+ feature_importance = self.get_feature_importance()
316
+ if feature_importance:
317
+ explanation.append("主要影響因素:")
318
+ top_features = list(feature_importance.keys())[:3]
319
+ for feature in top_features:
320
+ if feature in input_df.columns:
321
+ value = input_df[feature].iloc[0]
322
+ importance = feature_importance[feature]
323
+ explanation.append(f" - {feature}: {value:.4f} (重要性: {importance:.3f})")
324
+
325
+ # 分析預測趨勢
326
+ explanation.append("\n預測趨勢分析:")
327
+ for key, value in predictions.items():
328
+ days = key.split('_')[2][1:]
329
+ trend = "看漲" if value > 1 else "看跌" if value < -1 else "持平"
330
+ explanation.append(f" - {days}日: {value:+.2f}% ({trend})")
331
+
332
+ return "\n".join(explanation)
333
+
334
+ except Exception as e:
335
+ return f"解釋生成失敗: {e}"
336
+
337
+ # 範例使用方式
338
  if __name__ == "__main__":
339
+ # 初始化模型
340
  model = XGBoostModel()
341
 
342
+ # 準備測試數據
343
  test_data = pd.DataFrame({
344
+ 'close': [150.0],
345
+ 'return_t-1': [0.02],
346
+ 'return_t-5': [0.05],
347
+ 'MA5_close': [148.0],
348
+ 'volatility_5d': [0.025],
349
+ 'volume_ratio_5d': [1.2],
350
+ 'MACD_diff': [0.5],
351
+ 'dji_return_t-1': [0.01],
352
+ 'sox_return_t-1': [0.015],
353
+ 'NEWS': [0.1]
354
  })
355
 
356
+ print("測試模型預測器...")
357
+ print("輸入特徵:")
358
+ print(test_data)
359
 
360
+ # 進行預測
361
+ predictions = model.predict('xgboost_model', test_data)
 
 
362
 
363
+ if predictions:
364
+ print("\n預測成功!")
365
+ print("結果說明:輸出為相對於當前價格的漲幅百分比")
366
+
367
+ # 解釋預測
368
+ explanation = model.explain_prediction(test_data, predictions)
369
+ print(f"\n{explanation}")
370
+
371
+ # 計算信心度
372
+ confidence = model.get_prediction_confidence(test_data)
373
+ print(f"\n預測信心度: {confidence:.2%}")
374
+ else:
375
+ print("預測失敗!")