AlanRex commited on
Commit
fea8870
·
verified ·
1 Parent(s): 59065bb

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +106 -50
  2. model_predictor.py +153 -237
app.py CHANGED
@@ -854,43 +854,66 @@ def simple_statistical_predict(data, predict_days=5):
854
 
855
  def calculate_new_features(df):
856
  """
857
- 計算新的技術指標特徵 - 針對新特徵需求
 
858
  """
859
  if df.empty:
860
  return df
861
 
862
- # 1. return_t-1 前一日報酬率
863
  df['return_t-1'] = df['Close'].pct_change()
864
 
865
- # 2. return_t-5 過去 5 日累積報酬率
866
  df['return_t-5'] = (df['Close'] / df['Close'].shift(5) - 1)
867
 
868
- # 3. MA5_close 5 日移動平均價
869
  df['MA5_close'] = df['Close'].rolling(window=5).mean()
870
 
871
- # 4. MA20_close 20 日移動平均價
872
- df['MA20_close'] = df['Close'].rolling(window=20).mean()
873
-
874
- # 5. volatility_5d – 5 日報酬標準差(短期波動)
875
  df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
876
 
877
- # 6. volume_ratio_5d 今日成交量 ÷ 5 日均量
878
  df['volume_5d_avg'] = df['Volume'].rolling(window=5).mean()
879
  df['volume_ratio_5d'] = df['Volume'] / df['volume_5d_avg']
880
 
881
- # 7. RSI_14 14 RSI 指標
882
- delta = df['Close'].diff()
883
- gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
884
- loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
885
- rs = gain / loss
886
- df['RSI_14'] = 100 - (100 / (1 + rs))
 
 
 
 
 
 
 
 
 
 
887
 
888
- # 8. MACD_diff MACD - signal(趨勢強弱)
889
- exp1 = df['Close'].ewm(span=12).mean()
890
- exp2 = df['Close'].ewm(span=26).mean()
891
- macd_line = exp1 - exp2
892
- signal_line = macd_line.ewm(span=9).mean()
893
- df['MACD_diff'] = macd_line - signal_line
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
894
 
895
  # 移除輔助欄位
896
  if 'volume_5d_avg' in df.columns:
@@ -899,23 +922,27 @@ def calculate_new_features(df):
899
  return df
900
 
901
  def advanced_xgboost_predict(predict_days=5):
902
- """使用 XGBoost 模型進行預測 - 強制刷新數據版本"""
 
 
903
  try:
904
  print(f"開始XGBoost預測 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
905
 
906
  xgb_model = XGBoostModel()
907
 
908
- # 強制重新獲取台指數據 - 不使用緩存
909
  print("正在獲取最新台指數據...")
910
  taiex_data = get_stock_data('^TWII', '2y')
911
  if taiex_data.empty or len(taiex_data) < 60:
912
  print("台指期數據不足,無法進行XGBoost預測")
913
  return None
914
 
 
915
  taiex_data = calculate_technical_indicators(taiex_data)
 
916
  taiex_data = calculate_new_features(taiex_data)
917
 
918
- # 強制重新獲取美股數據
919
  print("正在獲取美股數據...")
920
  us_market_data = get_us_market_data()
921
 
@@ -935,14 +962,22 @@ def advanced_xgboost_predict(predict_days=5):
935
  latest_data = taiex_data.iloc[-1]
936
  yesterday_close = latest_data['Close']
937
 
938
- # 特徵列表保持不變
939
  new_feature_columns = [
940
- 'return_t-1',
941
- 'return_t-5',
942
- 'MA5_close',
943
- 'volatility_5d',
944
- 'volume_ratio_5d',
945
- 'MACD_diff',
 
 
 
 
 
 
 
 
946
  ]
947
 
948
  # 獲取美股報酬率
@@ -970,31 +1005,49 @@ def advanced_xgboost_predict(predict_days=5):
970
  feature_names = []
971
 
972
  for feature in new_feature_columns:
973
- if feature in latest_data.index:
 
 
 
 
 
 
974
  value = latest_data[feature]
975
  if pd.isna(value):
976
- if 'return' in feature: default_value = 0.0
977
- elif 'MA' in feature: default_value = latest_data['Close'] if not pd.isna(latest_data['Close']) else 100
978
- elif 'volatility' in feature: default_value = 0.02
979
- elif 'volume_ratio' in feature: default_value = 1.0
980
- elif 'MACD' in feature: default_value = 0.0
981
- else: default_value = 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
982
 
983
  features_list.append(default_value)
984
  else:
985
  features_list.append(value)
 
 
 
 
986
 
987
- feature_names.append(feature)
988
-
989
- # 添加其他特徵
990
- features_list.extend([dji_return, sox_return, yesterday_close, sentiment_score_raw])
991
- feature_names.extend(['dji_return_t-1', 'sox_return_t-1', 'close', 'NEWS'])
992
 
993
  # 轉換為 DataFrame
994
  input_df = pd.DataFrame([features_list], columns=feature_names)
995
 
996
- print(f"特徵向量: {[f'{f:.4f}' for f in features_list[:5]]}...") # 只顯示前5個
997
- # 🔍 新增這段:完整印出本次預測輸入資料
998
  print("\n=== 📊 本次預測輸入特徵 DataFrame ===")
999
  print(input_df)
1000
  print("=== ✅ 檢查以上特徵是否每次都有變 ===\n")
@@ -1005,7 +1058,7 @@ def advanced_xgboost_predict(predict_days=5):
1005
  if predictions is None:
1006
  return None
1007
 
1008
- # 處理預測結果
1009
  pred_mapping = {
1010
  1: 'Change_pct_t1_pred',
1011
  5: 'Change_pct_t5_pred',
@@ -1025,7 +1078,7 @@ def advanced_xgboost_predict(predict_days=5):
1025
 
1026
  return {
1027
  'predicted_price': predicted_price,
1028
- 'change_pct': predicted_change_pct,
1029
  'confidence': 0.75
1030
  }
1031
 
@@ -1073,8 +1126,9 @@ def get_prediction(data, predict_days=5):
1073
  return simple_statistical_predict(data, predict_days)
1074
 
1075
  def calculate_technical_indicators(df):
1076
- """計算技術指標"""
1077
- if df.empty: return df
 
1078
 
1079
  # 移動平均線
1080
  df['MA5'] = df['Close'].rolling(window=5).mean()
@@ -1117,7 +1171,9 @@ def calculate_technical_indicators(df):
1117
  df['down_move'] = df['Low'].shift(1) - df['Low']
1118
  df['+DM'] = np.where((df['up_move'] > df['down_move']) & (df['up_move'] > 0), df['up_move'], 0)
1119
  df['-DM'] = np.where((df['down_move'] > df['up_move']) & (df['down_move'] > 0), df['down_move'], 0)
1120
- df['TR'] = np.max([df['High'] - df['Low'], abs(df['High'] - df['Close'].shift(1)), abs(df['Low'] - df['Close'].shift(1))], axis=0)
 
 
1121
  df['+DI'] = (df['+DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
1122
  df['-DI'] = (df['-DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
1123
  df['DX'] = abs(df['+DI'] - df['-DI']) / (df['+DI'] + df['-DI']) * 100
 
854
 
855
  def calculate_new_features(df):
856
  """
857
+ 【修正版】使用與 XGBoost 訓練時完全相同的特徵工程
858
+ 完全對應 xgboost_for_stock 中的 create_new_features 函數
859
  """
860
  if df.empty:
861
  return df
862
 
863
+ # 1. return_t-1 前一日報酬率
864
  df['return_t-1'] = df['Close'].pct_change()
865
 
866
+ # 2. return_t-5 過去 5 日累積報酬率
867
  df['return_t-5'] = (df['Close'] / df['Close'].shift(5) - 1)
868
 
869
+ # 3. MA5_close 5 日移動平均價
870
  df['MA5_close'] = df['Close'].rolling(window=5).mean()
871
 
872
+ # 4. volatility_5d 5 日報酬標準差(短期波動)
 
 
 
873
  df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
874
 
875
+ # 5. volume_ratio_5d 今日成交量 ÷ 5 日均量
876
  df['volume_5d_avg'] = df['Volume'].rolling(window=5).mean()
877
  df['volume_ratio_5d'] = df['Volume'] / df['volume_5d_avg']
878
 
879
+ # 6. MACD_diff MACD - signal(趨勢強弱)
880
+ if 'MACD' in df.columns and 'MACD_Signal' in df.columns:
881
+ df['MACD_diff'] = df['MACD'] - df['MACD_Signal']
882
+ else:
883
+ # 計算 MACD
884
+ exp1 = df['Close'].ewm(span=12).mean()
885
+ exp2 = df['Close'].ewm(span=26).mean()
886
+ macd_line = exp1 - exp2
887
+ signal_line = macd_line.ewm(span=9).mean()
888
+ df['MACD_diff'] = macd_line - signal_line
889
+
890
+ # 7. MACDvol — 【修正】對應訓練資料中的 MACDvol 欄位
891
+ if 'MACDvol' in df.columns:
892
+ df['MACDvol'] = df['MACDvol']
893
+ else:
894
+ df['MACDvol'] = df['MACD_diff'] # 使用 MACD_diff 作為 MACDvol
895
 
896
+ # 8. RSI_14 14 RSI 指標
897
+ if 'RSI' in df.columns:
898
+ df['RSI_14'] = df['RSI']
899
+ else:
900
+ # 計算 RSI
901
+ delta = df['Close'].diff()
902
+ gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
903
+ loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
904
+ rs = gain / loss
905
+ df['RSI_14'] = 100 - (100 / (1 + rs))
906
+
907
+ # 9. ADX 指標(從現有技術指標中獲取)
908
+ if 'ADX' not in df.columns:
909
+ # 如果沒有ADX,計算簡化版本或設置預設值
910
+ df['ADX'] = 25 # 預設中性值
911
+
912
+ # 10. volume_weighted_return — 當日報酬率絕對值 × 當日成交量
913
+ df['volume_weighted_return'] = abs(df['return_t-1']) * df['Volume']
914
+
915
+ # 11. close(當前收盤價)
916
+ df['close'] = df['Close']
917
 
918
  # 移除輔助欄位
919
  if 'volume_5d_avg' in df.columns:
 
922
  return df
923
 
924
  def advanced_xgboost_predict(predict_days=5):
925
+ """
926
+ 【修正版】使用 XGBoost 模型進行預測 - 與訓練模型完全一致
927
+ """
928
  try:
929
  print(f"開始XGBoost預測 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
930
 
931
  xgb_model = XGBoostModel()
932
 
933
+ # 強制重新獲取台指數據
934
  print("正在獲取最新台指數據...")
935
  taiex_data = get_stock_data('^TWII', '2y')
936
  if taiex_data.empty or len(taiex_data) < 60:
937
  print("台指期數據不足,無法進行XGBoost預測")
938
  return None
939
 
940
+ # 計算技術指標
941
  taiex_data = calculate_technical_indicators(taiex_data)
942
+ # 【修正】使用新的特徵工程函數
943
  taiex_data = calculate_new_features(taiex_data)
944
 
945
+ # 獲取美股數據
946
  print("正在獲取美股數據...")
947
  us_market_data = get_us_market_data()
948
 
 
962
  latest_data = taiex_data.iloc[-1]
963
  yesterday_close = latest_data['Close']
964
 
965
+ # 【修正】使用與訓練時完全相同的特徵列表
966
  new_feature_columns = [
967
+ 'close', # 前一日收盤價
968
+ 'return_t-1', # 前一日漲跌率
969
+ 'return_t-5', # 過去 5 日累積漲跌率
970
+ 'MA5_close', # 5 日移動平均價
971
+ 'volatility_5d', # 5 日漲跌標準差
972
+ 'volume_ratio_5d', # 今日成交量 ÷ 5 日均量
973
+ 'MACD_diff', # MACD - signal
974
+ 'dji_return_t-1', # 前一日道瓊指數漲跌率
975
+ 'sox_return_t-1', # 前一日費半指數漲跌率
976
+ 'NEWS', # 新聞情緒分數
977
+ 'MACDvol', # MACD成交量
978
+ 'RSI_14', # 14日RSI
979
+ 'ADX', # ADX指標
980
+ 'volume_weighted_return' # 成交量加權報酬率
981
  ]
982
 
983
  # 獲取美股報酬率
 
1005
  feature_names = []
1006
 
1007
  for feature in new_feature_columns:
1008
+ if feature == 'dji_return_t-1':
1009
+ features_list.append(dji_return)
1010
+ elif feature == 'sox_return_t-1':
1011
+ features_list.append(sox_return)
1012
+ elif feature == 'NEWS':
1013
+ features_list.append(sentiment_score_raw)
1014
+ elif feature in latest_data.index:
1015
  value = latest_data[feature]
1016
  if pd.isna(value):
1017
+ # 設置預設值
1018
+ if 'return' in feature:
1019
+ default_value = 0.0
1020
+ elif 'MA' in feature:
1021
+ default_value = latest_data['Close'] if not pd.isna(latest_data['Close']) else 100
1022
+ elif 'volatility' in feature:
1023
+ default_value = 0.02
1024
+ elif 'volume_ratio' in feature:
1025
+ default_value = 1.0
1026
+ elif 'MACD' in feature:
1027
+ default_value = 0.0
1028
+ elif feature == 'RSI_14':
1029
+ default_value = 50.0
1030
+ elif feature == 'ADX':
1031
+ default_value = 25.0
1032
+ elif feature == 'close':
1033
+ default_value = yesterday_close
1034
+ else:
1035
+ default_value = 0.0
1036
 
1037
  features_list.append(default_value)
1038
  else:
1039
  features_list.append(value)
1040
+ else:
1041
+ # 特徵不存在,設置預設值
1042
+ print(f"警告:特徵 {feature} 不存在,使用預設值")
1043
+ features_list.append(0.0)
1044
 
1045
+ feature_names.append(feature)
 
 
 
 
1046
 
1047
  # 轉換為 DataFrame
1048
  input_df = pd.DataFrame([features_list], columns=feature_names)
1049
 
1050
+ print(f"特徵向量長度: {len(features_list)}")
 
1051
  print("\n=== 📊 本次預測輸入特徵 DataFrame ===")
1052
  print(input_df)
1053
  print("=== ✅ 檢查以上特徵是否每次都有變 ===\n")
 
1058
  if predictions is None:
1059
  return None
1060
 
1061
+ # 【修正】處理預測結果 - 現在返回的已經是漲幅百分比
1062
  pred_mapping = {
1063
  1: 'Change_pct_t1_pred',
1064
  5: 'Change_pct_t5_pred',
 
1078
 
1079
  return {
1080
  'predicted_price': predicted_price,
1081
+ 'change_pct': predicted_change_pct, # 這已經是百分比格式
1082
  'confidence': 0.75
1083
  }
1084
 
 
1126
  return simple_statistical_predict(data, predict_days)
1127
 
1128
  def calculate_technical_indicators(df):
1129
+ """【修正版】計算技術指標,確保與模型訓練時一致"""
1130
+ if df.empty:
1131
+ return df
1132
 
1133
  # 移動平均線
1134
  df['MA5'] = df['Close'].rolling(window=5).mean()
 
1171
  df['down_move'] = df['Low'].shift(1) - df['Low']
1172
  df['+DM'] = np.where((df['up_move'] > df['down_move']) & (df['up_move'] > 0), df['up_move'], 0)
1173
  df['-DM'] = np.where((df['down_move'] > df['up_move']) & (df['down_move'] > 0), df['down_move'], 0)
1174
+ df['TR'] = np.max([df['High'] - df['Low'],
1175
+ abs(df['High'] - df['Close'].shift(1)),
1176
+ abs(df['Low'] - df['Close'].shift(1))], axis=0)
1177
  df['+DI'] = (df['+DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
1178
  df['-DI'] = (df['-DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
1179
  df['DX'] = abs(df['+DI'] - df['-DI']) / (df['+DI'] + df['-DI']) * 100
model_predictor.py CHANGED
@@ -1,6 +1,3 @@
1
- # model_predictor.py - 支援漲幅百分比輸出的XGBoost模型預測器
2
- # 修改版本:輸出改為漲幅百分比而非絕對價格
3
-
4
  import os
5
  import pandas as pd
6
  import numpy as np
@@ -14,26 +11,30 @@ class XGBoostModel:
14
  """
15
  初始化 XGBoost 模型預測器
16
 
17
- 【重要更新】
18
- - 模型現在輸出漲幅百分比而非絕對價格
19
- - 支援 1日、5日、10日、20日的漲幅預測
20
  """
21
  self.model = None
22
  self.scaler = None
 
 
23
  self.feature_columns = [
24
- 'close', # 前一日收盤價
25
- 'return_t-1', # 前一日報酬率
26
- 'return_t-5', # 過去 5 日累積報酬率
27
- 'MA5_close', # 5 日移動平均價
28
- 'volatility_5d', # 5 日報酬標準差
29
- 'volume_ratio_5d', # 今日成交量 ÷ 5 日均量
30
- 'MACD_diff', # MACD - signal
31
- 'dji_return_t-1', # 前一日道瓊指數報酬率
32
- 'sox_return_t-1', # 前一日費半指數報酬率
33
- 'NEWS' # 新聞情緒分數
 
 
 
 
34
  ]
35
 
36
- # 【新增】輸出目標對應表
37
  self.output_targets = {
38
  1: 'Change_pct_t1_pred', # 1天後漲幅%
39
  5: 'Change_pct_t5_pred', # 5天後漲幅%
@@ -42,64 +43,125 @@ class XGBoostModel:
42
  }
43
 
44
  print("XGBoost 模型預測器初始化完成")
 
45
  print("輸出格式:漲幅百分比 (1日, 5日, 10日, 20日)")
46
 
47
  def load_model(self, model_path):
48
- """
49
- 載入預訓練的 XGBoost 模型
50
-
51
- Args:
52
- model_path (str): 模型檔案路徑 (.json 格式)
53
-
54
- Returns:
55
- bool: 是否成功載入
56
- """
57
  try:
58
- # 檢查模型檔案是否存在
59
  if not os.path.exists(model_path):
60
  print(f"錯誤:找不到模型檔案 {model_path}")
61
  return False
62
 
63
- # 載入 XGBoost 模型
64
  self.model = xgb.XGBRegressor()
65
  self.model.load_model(model_path)
66
 
67
  print(f"成功載入模型:{model_path}")
68
- print(f"預期特徵數量:{len(self.feature_columns)}")
69
-
70
  return True
71
 
72
  except Exception as e:
73
  print(f"載入模型時發生錯誤:{e}")
74
  return False
75
 
76
- def load_scaler(self, scaler_path):
77
- """停用標準化流程"""
78
- print("⚠️ 已停用標準化:模型使用原始特徵進行預測。")
79
- self.scaler = None
80
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  def preprocess_features(self, input_df):
 
83
  # 確保特徵齊全
84
  missing_features = [f for f in self.feature_columns if f not in input_df.columns]
85
  if missing_features:
86
  print(f"警告:缺少以下特徵:{missing_features}")
87
  for feature in missing_features:
88
  input_df[feature] = 0
89
-
 
90
  input_df = input_df[self.feature_columns].fillna(0)
91
-
92
- # ✅ 直接回傳原始特徵
93
  return input_df
94
 
95
  def predict(self, model_name, input_df):
96
  """
97
  進行股價漲幅預測
98
 
99
- Args:
100
- model_name (str): 模型名稱(用於載入對應模型)
101
- input_df (pd.DataFrame): 輸入特徵
102
-
103
  Returns:
104
  dict: 預測結果,包含各時間點的漲幅百分比
105
  """
@@ -110,36 +172,42 @@ class XGBoostModel:
110
  if not self.load_model(model_path):
111
  return None
112
 
113
- # 載入標準化器(如果存在)
114
- if self.scaler is None:
115
- scaler_path = f"{model_name}_scaler.pkl"
116
- self.load_scaler(scaler_path)
117
-
118
  # 預處理特徵
119
  processed_df = self.preprocess_features(input_df.copy())
120
 
 
 
 
 
 
 
121
  # 進行預測
122
  predictions = self.model.predict(processed_df)
 
 
123
 
124
- # 【重要修改】將預測結果格式化為漲幅百分比
125
  if predictions.ndim == 1:
126
- # 如果只有一個輸出,假設是 1 日預測
127
  result = {
128
  'Change_pct_t1_pred': float(predictions[0])
129
  }
130
  else:
131
- # 多輸出情況:1日, 5日, 10日, 20日
132
- result = {
133
- 'Change_pct_t1_pred': float(predictions[0][0]) if len(predictions[0]) > 0 else 0.0,
134
- 'Change_pct_t5_pred': float(predictions[0][1]) if len(predictions[0]) > 1 else 0.0,
135
- 'Change_pct_t10_pred': float(predictions[0][2]) if len(predictions[0]) > 2 else 0.0,
136
- 'Change_pct_t20_pred': float(predictions[0][3]) if len(predictions[0]) > 3 else 0.0
137
- }
 
 
 
138
 
139
  # 輸出預測結果摘要
140
  print("=== 漲幅預測結果 ===")
141
  for key, value in result.items():
142
- days = key.split('_')[2][1:] # 提取天數
143
  direction = "上漲" if value > 0 else "下跌"
144
  print(f" {days}日後預測: {value:+.2f}% ({direction})")
145
 
@@ -152,23 +220,12 @@ class XGBoostModel:
152
  return None
153
 
154
  def predict_single_timeframe(self, model_name, input_df, days):
155
- """
156
- 預測特定時間框架的漲幅
157
-
158
- Args:
159
- model_name (str): 模型名稱
160
- input_df (pd.DataFrame): 輸入特徵
161
- days (int): 預測天數 (1, 5, 10, 20)
162
-
163
- Returns:
164
- float: 預測的漲幅百分比
165
- """
166
  try:
167
  predictions = self.predict(model_name, input_df)
168
  if predictions is None:
169
  return None
170
 
171
- # 根據天數選擇對應的預測結果
172
  target_key = f'Change_pct_t{days}_pred'
173
 
174
  if target_key in predictions:
@@ -181,195 +238,54 @@ class XGBoostModel:
181
  print(f"單一時間框架預測時發生錯誤:{e}")
182
  return None
183
 
184
- def get_prediction_confidence(self, input_df):
185
- """
186
- 評估預測的信心度
187
-
188
- Args:
189
- input_df (pd.DataFrame): 輸入特徵
190
-
191
- Returns:
192
- float: 信心度 (0-1)
193
- """
194
- try:
195
- # 基於特徵完整性和質量評估信心度
196
- feature_completeness = 0
197
- total_features = len(self.feature_columns)
198
-
199
- for feature in self.feature_columns:
200
- if feature in input_df.columns:
201
- value = input_df[feature].iloc[0]
202
- if not pd.isna(value) and value != 0:
203
- feature_completeness += 1
204
-
205
- completeness_ratio = feature_completeness / total_features
206
-
207
- # 基於數據質量調整信心度
208
- base_confidence = max(0.5, completeness_ratio)
209
-
210
- # 如果重要特徵缺失,降低信心度
211
- important_features = ['close', 'return_t-1', 'MA5_close']
212
- missing_important = 0
213
- for feature in important_features:
214
- if feature not in input_df.columns or pd.isna(input_df[feature].iloc[0]):
215
- missing_important += 1
216
-
217
- if missing_important > 0:
218
- base_confidence *= (1 - missing_important * 0.1)
219
-
220
- return min(0.9, max(0.3, base_confidence))
221
-
222
- except Exception as e:
223
- print(f"計算信心度時發生錯誤:{e}")
224
- return 0.5
225
-
226
- def validate_input(self, input_df):
227
- """
228
- 驗證輸入數據的有效性
229
-
230
- Args:
231
- input_df (pd.DataFrame): 輸入特徵
232
-
233
- Returns:
234
- tuple: (是否有效, 錯誤訊息列表)
235
- """
236
- errors = []
237
-
238
- try:
239
- # 檢查是否為空
240
- if input_df.empty:
241
- errors.append("輸入數據為空")
242
-
243
- # 檢查必要特徵
244
- required_features = ['close', 'return_t-1']
245
- for feature in required_features:
246
- if feature not in input_df.columns:
247
- errors.append(f"缺少必要特徵:{feature}")
248
- elif pd.isna(input_df[feature].iloc[0]):
249
- errors.append(f"必要特徵包含空值:{feature}")
250
-
251
- # 檢查數據合理性
252
- if 'close' in input_df.columns:
253
- close_price = input_df['close'].iloc[0]
254
- if close_price <= 0:
255
- errors.append(f"收盤價不合理:{close_price}")
256
-
257
- if 'return_t-1' in input_df.columns:
258
- return_val = input_df['return_t-1'].iloc[0]
259
- if abs(return_val) > 0.5: # 單日漲跌幅超過50%可能有問題
260
- errors.append(f"報酬率異常:{return_val:.3f}")
261
-
262
- return len(errors) == 0, errors
263
-
264
- except Exception as e:
265
- errors.append(f"驗證過程發生錯誤:{e}")
266
- return False, errors
267
-
268
  def get_feature_importance(self):
269
- """
270
- 獲取特徵重要性
271
-
272
- Returns:
273
- dict: 特徵重要性字典
274
- """
275
  try:
276
  if self.model is None:
277
  return None
278
 
279
- # 獲取特徵重要性
280
  importance_scores = self.model.feature_importances_
281
-
282
- # 創建特徵重要性字典
283
  importance_dict = {}
284
  for i, feature in enumerate(self.feature_columns):
285
  if i < len(importance_scores):
286
  importance_dict[feature] = float(importance_scores[i])
287
 
288
- # 按重要性排序
289
- sorted_importance = dict(sorted(importance_dict.items(),
290
- key=lambda x: x[1],
291
- reverse=True))
292
-
293
- return sorted_importance
294
 
295
  except Exception as e:
296
  print(f"獲取特徵重要性時發生錯誤:{e}")
297
  return None
298
 
299
- def explain_prediction(self, input_df, predictions):
300
- """
301
- 解釋預測結果
302
-
303
- Args:
304
- input_df (pd.DataFrame): 輸入特徵
305
- predictions (dict): 預測結果
306
-
307
- Returns:
308
- str: 解釋文本
309
- """
310
- try:
311
- explanation = []
312
- explanation.append("=== 預測解釋 ===")
313
-
314
- # 分析主要驅動因素
315
- feature_importance = self.get_feature_importance()
316
- if feature_importance:
317
- explanation.append("主要影響因素:")
318
- top_features = list(feature_importance.keys())[:3]
319
- for feature in top_features:
320
- if feature in input_df.columns:
321
- value = input_df[feature].iloc[0]
322
- importance = feature_importance[feature]
323
- explanation.append(f" - {feature}: {value:.4f} (重要性: {importance:.3f})")
324
-
325
- # 分析預測趨勢
326
- explanation.append("\n預測趨勢分析:")
327
- for key, value in predictions.items():
328
- days = key.split('_')[2][1:]
329
- trend = "看漲" if value > 1 else "看跌" if value < -1 else "持平"
330
- explanation.append(f" - {days}日: {value:+.2f}% ({trend})")
331
-
332
- return "\n".join(explanation)
333
-
334
- except Exception as e:
335
- return f"解釋生成失敗: {e}"
336
-
337
- # 範例使用方式
338
  if __name__ == "__main__":
339
- # 初始化模型
340
  model = XGBoostModel()
341
 
342
- # 準備測試數據
343
  test_data = pd.DataFrame({
344
- 'close': [150.0],
345
- 'return_t-1': [0.02],
346
- 'return_t-5': [0.05],
347
- 'MA5_close': [148.0],
348
- 'volatility_5d': [0.025],
349
- 'volume_ratio_5d': [1.2],
350
- 'MACD_diff': [0.5],
351
- 'dji_return_t-1': [0.01],
352
- 'sox_return_t-1': [0.015],
353
- 'NEWS': [0.1]
354
  })
355
 
356
- print("測試模型預測器...")
357
- print("輸入特徵:")
358
- print(test_data)
359
 
360
- # 進行預測
361
- predictions = model.predict('xgboost_model', test_data)
 
 
362
 
363
- if predictions:
364
- print("\n預測成功!")
365
- print("結果說明:輸出為相對於當前價格的漲幅百分比")
366
-
367
- # 解釋預測
368
- explanation = model.explain_prediction(test_data, predictions)
369
- print(f"\n{explanation}")
370
-
371
- # 計算信心度
372
- confidence = model.get_prediction_confidence(test_data)
373
- print(f"\n預測信心度: {confidence:.2%}")
374
- else:
375
- print("預測失敗!")
 
 
 
 
1
  import os
2
  import pandas as pd
3
  import numpy as np
 
11
  """
12
  初始化 XGBoost 模型預測器
13
 
14
+ 【修正】與訓練檔案 xgboost_for_stock 完全一致的特徵欄位
 
 
15
  """
16
  self.model = None
17
  self.scaler = None
18
+
19
+ # 【修正】使用與訓練時完全相同的特徵欄位順序
20
  self.feature_columns = [
21
+ 'close', # 前一日收盤價
22
+ 'return_t-1', # 前一日漲跌率
23
+ 'return_t-5', # 過去 5 日累積漲跌率
24
+ 'MA5_close', # 5 日移動平均價
25
+ 'volatility_5d', # 5 日漲跌標準差
26
+ 'volume_ratio_5d', # 今日成交量 ÷ 5 日均量
27
+ 'MACD_diff', # MACD - signal
28
+ 'dji_return_t-1', # 前一日道瓊指數漲跌率
29
+ 'sox_return_t-1', # 前一日費半指數漲跌率
30
+ 'NEWS', # 新聞情緒分數
31
+ 'MACDvol', # MACD 成交量
32
+ 'RSI_14', # 14日RSI
33
+ 'ADX', # ADX指標
34
+ 'volume_weighted_return' # 成交量加權報酬率
35
  ]
36
 
37
+ # 輸出目標對應表(漲幅百分比)
38
  self.output_targets = {
39
  1: 'Change_pct_t1_pred', # 1天後漲幅%
40
  5: 'Change_pct_t5_pred', # 5天後漲幅%
 
43
  }
44
 
45
  print("XGBoost 模型預測器初始化完成")
46
+ print(f"特徵數量:{len(self.feature_columns)}")
47
  print("輸出格式:漲幅百分比 (1日, 5日, 10日, 20日)")
48
 
49
  def load_model(self, model_path):
50
+ """載入預訓練的 XGBoost 模型"""
 
 
 
 
 
 
 
 
51
  try:
 
52
  if not os.path.exists(model_path):
53
  print(f"錯誤:找不到模型檔案 {model_path}")
54
  return False
55
 
 
56
  self.model = xgb.XGBRegressor()
57
  self.model.load_model(model_path)
58
 
59
  print(f"成功載入模型:{model_path}")
 
 
60
  return True
61
 
62
  except Exception as e:
63
  print(f"載入模型時發生錯誤:{e}")
64
  return False
65
 
66
+ def create_new_features(self, df):
67
+ """
68
+ 【修正】使用與訓練時完全相同的特徵工程函數
69
+ 完全對應 xgboost_for_stock 中的 create_new_features 函數
70
+ """
71
+ # 1. return_t-1 — 前一日報酬率
72
+ df['return_t-1'] = df['Close'].pct_change()
73
+
74
+ # 2. return_t-5 — 過去 5 日累積報酬率
75
+ df['return_t-5'] = (df['Close'] / df['Close'].shift(5) - 1)
76
+
77
+ # 3. MA5_close — 5 日移動平均價
78
+ df['MA5_close'] = df['Close'].rolling(window=5).mean()
79
+
80
+ # 4. volatility_5d — 5 日報酬標準差(短期波動)
81
+ df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
82
+
83
+ # 5. volume_ratio_5d — 今日成交量 ÷ 5 日均量
84
+ df['volume_5d_avg'] = df['Volume'].rolling(window=5).mean()
85
+ df['volume_ratio_5d'] = df['Volume'] / df['volume_5d_avg']
86
+
87
+ # 6. MACD_diff — MACD - signal(趨勢強弱)
88
+ if 'MACD' in df.columns and 'MACD_Signal' in df.columns:
89
+ df['MACD_diff'] = df['MACD'] - df['MACD_Signal']
90
+ elif 'MACD' in df.columns and 'MACDsign' in df.columns:
91
+ # 【��正】支援訓練資料中的欄位名稱
92
+ df['MACD_diff'] = df['MACD'] - df['MACDsign']
93
+ else:
94
+ # 計算 MACD
95
+ exp1 = df['Close'].ewm(span=12).mean()
96
+ exp2 = df['Close'].ewm(span=26).mean()
97
+ macd_line = exp1 - exp2
98
+ signal_line = macd_line.ewm(span=9).mean()
99
+ df['MACD_diff'] = macd_line - signal_line
100
+
101
+ # 7. dji_return_t-1 — 前一日道瓊指數報酬率(需外部提供)
102
+ if 'dji_return_t-1' not in df.columns:
103
+ df['dji_return_t-1'] = 0 # 預設值,實際使用時由外部傳入
104
+
105
+ # 8. sox_return_t-1 — 前一日費半指數報酬率(需外部提供)
106
+ if 'sox_return_t-1' not in df.columns:
107
+ df['sox_return_t-1'] = 0 # 預設值,實際使用時由外部傳入
108
+
109
+ # 9. NEWS — 新聞情緒分數(需外部提供)
110
+ if 'NEWS' not in df.columns:
111
+ df['NEWS'] = 0 # 預設值,實際使用時由外部傳入
112
+
113
+ # 10. MACDvol — 【修正】對應訓練資料中的 MACDvol 欄位
114
+ if 'MACDvol' in df.columns:
115
+ df['MACDvol'] = df['MACDvol']
116
+ elif 'MACD_Histogram' in df.columns:
117
+ df['MACDvol'] = df['MACD_Histogram']
118
+ else:
119
+ df['MACDvol'] = df['MACD_diff'] # 使用 MACD_diff 作為替代
120
+
121
+ # 11. RSI_14 — 14 日 RSI 指標
122
+ if 'RSI' in df.columns:
123
+ df['RSI_14'] = df['RSI']
124
+ else:
125
+ # 計算 RSI
126
+ delta = df['Close'].diff()
127
+ gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
128
+ loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
129
+ rs = gain / loss
130
+ df['RSI_14'] = 100 - (100 / (1 + rs))
131
+
132
+ # 12. ADX(需要從技術指標中獲取)
133
+ if 'ADX' not in df.columns:
134
+ df['ADX'] = 50 # 預設值
135
+
136
+ # 13. volume_weighted_return — 當日報酬率絕對值 × 當日成交量
137
+ df['volume_weighted_return'] = abs(df['return_t-1']) * df['Volume']
138
+
139
+ # 14. close(當前收盤價)
140
+ df['close'] = df['Close']
141
+
142
+ # 移除輔助欄位
143
+ if 'volume_5d_avg' in df.columns:
144
+ df.drop('volume_5d_avg', axis=1, inplace=True)
145
+
146
+ return df
147
 
148
  def preprocess_features(self, input_df):
149
+ """預處理特徵數據"""
150
  # 確保特徵齊全
151
  missing_features = [f for f in self.feature_columns if f not in input_df.columns]
152
  if missing_features:
153
  print(f"警告:缺少以下特徵:{missing_features}")
154
  for feature in missing_features:
155
  input_df[feature] = 0
156
+
157
+ # 選擇並排序特徵(確保順序與訓練時一致)
158
  input_df = input_df[self.feature_columns].fillna(0)
 
 
159
  return input_df
160
 
161
  def predict(self, model_name, input_df):
162
  """
163
  進行股價漲幅預測
164
 
 
 
 
 
165
  Returns:
166
  dict: 預測結果,包含各時間點的漲幅百分比
167
  """
 
172
  if not self.load_model(model_path):
173
  return None
174
 
 
 
 
 
 
175
  # 預處理特徵
176
  processed_df = self.preprocess_features(input_df.copy())
177
 
178
+ print("=== 模型輸入特徵檢查 ===")
179
+ print(f"輸入形狀: {processed_df.shape}")
180
+ print("前5個特徵值:")
181
+ for i, col in enumerate(processed_df.columns[:5]):
182
+ print(f" {col}: {processed_df[col].iloc[0]:.6f}")
183
+
184
  # 進行預測
185
  predictions = self.model.predict(processed_df)
186
+ print(f"原始預測輸出形狀: {predictions.shape}")
187
+ print(f"原始預測值: {predictions}")
188
 
189
+ # 【修正】處理多輸出預測結果
190
  if predictions.ndim == 1:
191
+ # 單輸出情況 - 只有一個時間點的預測
192
  result = {
193
  'Change_pct_t1_pred': float(predictions[0])
194
  }
195
  else:
196
+ # 多輸出情況:[t1, t5, t10, t20] - 對應訓練模型的四個輸出
197
+ result = {}
198
+ target_keys = ['Change_pct_t1_pred', 'Change_pct_t5_pred',
199
+ 'Change_pct_t10_pred', 'Change_pct_t20_pred']
200
+
201
+ for i, key in enumerate(target_keys):
202
+ if i < predictions.shape[1]:
203
+ result[key] = float(predictions[0][i])
204
+ else:
205
+ result[key] = 0.0
206
 
207
  # 輸出預測結果摘要
208
  print("=== 漲幅預測結果 ===")
209
  for key, value in result.items():
210
+ days = key.split('_')[2][1:]
211
  direction = "上漲" if value > 0 else "下跌"
212
  print(f" {days}日後預測: {value:+.2f}% ({direction})")
213
 
 
220
  return None
221
 
222
  def predict_single_timeframe(self, model_name, input_df, days):
223
+ """預測特定時間框架的漲幅"""
 
 
 
 
 
 
 
 
 
 
224
  try:
225
  predictions = self.predict(model_name, input_df)
226
  if predictions is None:
227
  return None
228
 
 
229
  target_key = f'Change_pct_t{days}_pred'
230
 
231
  if target_key in predictions:
 
238
  print(f"單一時間框架預測時發生錯誤:{e}")
239
  return None
240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  def get_feature_importance(self):
242
+ """獲取特徵重要性"""
 
 
 
 
 
243
  try:
244
  if self.model is None:
245
  return None
246
 
 
247
  importance_scores = self.model.feature_importances_
 
 
248
  importance_dict = {}
249
  for i, feature in enumerate(self.feature_columns):
250
  if i < len(importance_scores):
251
  importance_dict[feature] = float(importance_scores[i])
252
 
253
+ return dict(sorted(importance_dict.items(),
254
+ key=lambda x: x[1],
255
+ reverse=True))
 
 
 
256
 
257
  except Exception as e:
258
  print(f"獲取特徵重要性時發生錯誤:{e}")
259
  return None
260
 
261
+ # 測試函數
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  if __name__ == "__main__":
 
263
  model = XGBoostModel()
264
 
265
+ # 創建測試數據
266
  test_data = pd.DataFrame({
267
+ 'Close': [150.0] * 30, # 需要足夠的歷史數據來計算技術指標
268
+ 'Volume': [1000000] * 30,
269
+ 'High': [152.0] * 30,
270
+ 'Low': [148.0] * 30,
271
+ 'Open': [149.0] * 30
 
 
 
 
 
272
  })
273
 
274
+ # 創建特徵
275
+ test_data = model.create_new_features(test_data)
 
276
 
277
+ # 手動設置外部特徵
278
+ test_data['dji_return_t-1'] = 0.01
279
+ test_data['sox_return_t-1'] = 0.015
280
+ test_data['NEWS'] = 0.1
281
 
282
+ # 取最後一行作為輸入
283
+ input_data = test_data.tail(1)
284
+
285
+ print("測試特徵工程...")
286
+ print("可用特徵:")
287
+ for col in model.feature_columns:
288
+ if col in input_data.columns:
289
+ print(f"✓ {col}: {input_data[col].iloc[0]:.4f}")
290
+ else:
291
+ print(f"✗ {col}: 缺失")