AlanRex commited on
Commit
3a5ca62
·
verified ·
1 Parent(s): 06ae8e0

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +200 -161
  2. model_predictor.py +2 -102
app.py CHANGED
@@ -23,7 +23,7 @@ warnings.filterwarnings('ignore')
23
  # 引用您組員的預測器程式
24
  from Bert_predict import BertPredictor
25
 
26
- # 引用新的模型預測器 (已修正特徵欄位)
27
  from model_predictor import XGBoostModel
28
  # ========================== 引用外部模組 END ==========================
29
  # ========================= 新增:交易回測模組 START =========================
@@ -418,35 +418,45 @@ class TradingBacktester:
418
  buy_trades = trades_df[trades_df['signal'] == 1]
419
  sell_trades = trades_df[trades_df['signal'] == -1]
420
 
421
- fig.add_trace(
422
- go.Scatter(
423
- x=buy_trades['date'],
424
- y=[stock_data.loc[d, 'Close'] for d in buy_trades['date']],
425
- mode='markers',
426
- name='買入點',
427
- marker=dict(color='green', size=10, symbol='triangle-up')
428
- ),
429
- row=3, col=1
430
- )
431
-
432
- fig.add_trace(
433
- go.Scatter(
434
- x=sell_trades['date'],
435
- y=[stock_data.loc[d, 'Close'] for d in sell_trades['date']],
436
- mode='markers',
437
- name='賣出點',
438
- marker=dict(color='red', size=10, symbol='triangle-down')
439
- ),
440
- row=3, col=1
441
- )
 
 
442
 
 
443
  fig.update_layout(
444
- title='交易回測結果',
445
  height=800,
446
- showlegend=True
 
447
  )
 
 
 
 
 
448
  return fig
449
 
 
450
  def create_backtest_summary_card(results):
451
  """創建回測摘要卡片"""
452
  if not results:
@@ -797,98 +807,47 @@ def simple_statistical_predict(data, predict_days=5):
797
 
798
  def calculate_new_features(df):
799
  """
800
- 計算新的技術指標特徵 - 完整對應訓練模型的特徵
801
  """
802
  if df.empty:
803
  return df
804
 
805
- # 確保基礎欄位存在
806
- if 'Close' not in df.columns and 'close' in df.columns:
807
- df['Close'] = df['close']
808
- if 'Volume' not in df.columns and 'volume' in df.columns:
809
- df['Volume'] = df['volume']
810
- if 'High' not in df.columns and 'high' in df.columns:
811
- df['High'] = df['high']
812
- if 'Low' not in df.columns and 'low' in df.columns:
813
- df['Low'] = df['low']
814
- if 'Open' not in df.columns and 'open' in df.columns:
815
- df['Open'] = df['open']
816
-
817
- # 1. close - 收盤價
818
- df['close'] = df['Close']
819
-
820
- # 2. return_t-1 — 前一日報酬率 (***FIXED: Corrected to use hyphen to match the model***)
821
  df['return_t-1'] = df['Close'].pct_change()
822
 
823
- # 3. return_t-5 過去 5 日累積報酬率 (***FIXED: Corrected to use hyphen to match the model***)
824
  df['return_t-5'] = (df['Close'] / df['Close'].shift(5) - 1)
825
 
826
- # 4. MA5_close 5 日移動平均價
827
  df['MA5_close'] = df['Close'].rolling(window=5).mean()
828
 
829
- # 5. volatility_5d 5 日報酬標準差(短期波動)
 
 
 
830
  df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
831
 
832
- # 6. volume_ratio_5d 今日成交量 ÷ 5 日均量
833
  df['volume_5d_avg'] = df['Volume'].rolling(window=5).mean()
834
  df['volume_ratio_5d'] = df['Volume'] / df['volume_5d_avg']
835
 
836
- # 7. MACD_diff MACD - signal(趨勢強弱)
837
- exp1 = df['Close'].ewm(span=12).mean()
838
- exp2 = df['Close'].ewm(span=26).mean()
839
- macd_line = exp1 - exp2
840
- signal_line = macd_line.ewm(span=9).mean()
841
- df['MACD_diff'] = macd_line - signal_line
842
-
843
- # 8. dji_return_t-1 — 前一日道瓊指數報酬率(預設為0,需外部數據)
844
- df['dji_return_t-1'] = 0.0
845
-
846
- # 9. sox_return_t-1 — 前一日費半指數報酬率(預設為0,需外部數據)
847
- df['sox_return_t-1'] = 0.0
848
-
849
- # 10. NEWS — 新聞情緒分數(預設為0,需外部數據)
850
- df['NEWS'] = 0.0
851
-
852
- # 11. MACDvol — MACD柱狀圖
853
- df['MACDvol'] = 0.0
854
-
855
- # 12. RSI_14 — 14日RSI
856
  delta = df['Close'].diff()
857
  gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
858
  loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
859
  rs = gain / loss
860
  df['RSI_14'] = 100 - (100 / (1 + rs))
861
 
862
- # 13. ADX 平均趨向指標
863
- try:
864
- df['up_move'] = df['High'] - df['High'].shift(1)
865
- df['down_move'] = df['Low'].shift(1) - df['Low']
866
- df['+DM'] = np.where((df['up_move'] > df['down_move']) & (df['up_move'] > 0), df['up_move'], 0)
867
- df['-DM'] = np.where((df['down_move'] > df['up_move']) & (df['down_move'] > 0), df['down_move'], 0)
868
-
869
- high_low = df['High'] - df['Low']
870
- high_close_prev = np.abs(df['High'] - df['Close'].shift(1))
871
- low_close_prev = np.abs(df['Low'] - df['Close'].shift(1))
872
- df['TR'] = np.maximum.reduce([high_low, high_close_prev, low_close_prev])
873
-
874
- df['+DI'] = (df['+DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
875
- df['-DI'] = (df['-DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
876
- df['DX'] = np.abs(df['+DI'] - df['-DI']) / (df['+DI'] + df['-DI']) * 100
877
- df['ADX'] = df['DX'].ewm(com=13, adjust=False).mean()
878
- except Exception:
879
- print("警告:ADX計算失敗,使用預設值")
880
- df['ADX'] = 25.0
881
-
882
- # 14. volume_weighted_return — 成交量加權報酬率
883
- df['volume_weighted_return'] = np.abs(df['return_t-1']) * df['Volume']
884
 
885
  # 移除輔助欄位
886
- auxiliary_columns = ['volume_5d_avg', 'up_move', 'down_move', '+DM', '-DM', 'TR', '+DI', '-DI', 'DX']
887
- df.drop(columns=[col for col in auxiliary_columns if col in df.columns], inplace=True)
888
-
889
- # 填補 NaN 值
890
- df.fillna(method='ffill', inplace=True)
891
- df.fillna(0, inplace=True)
892
 
893
  return df
894
 
@@ -904,92 +863,171 @@ def advanced_xgboost_predict(predict_days=5):
904
  xgb_model = XGBoostModel()
905
 
906
  # 獲取台指期數據 (作為主要標的)
907
- taiex_data_raw = get_stock_data('^TWII', '2y')
908
- if taiex_data_raw.empty or len(taiex_data_raw) < 60:
909
  print("台指期數據不足,無法進行XGBoost預測")
910
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
911
 
912
- # ***FIX START: Simplified and Corrected Feature Engineering and Assembly***
913
-
914
- # 1. Calculate all technical features
915
- taiex_data = calculate_new_features(taiex_data_raw)
916
-
917
- # 2. Get the latest row of data which contains most of our features
918
- latest_features = taiex_data.iloc[-1:].copy()
919
-
920
- # 3. Update external features (US markets, News)
921
- # Get US market returns
 
922
  dji_return = 0
923
  sox_return = 0
 
 
924
  try:
925
  dji_data = get_stock_data('^DJI', '5d')
926
  if not dji_data.empty and len(dji_data) >= 2:
927
  dji_return = (dji_data['Close'].iloc[-1] / dji_data['Close'].iloc[-2] - 1)
928
- except Exception as e:
929
- print(f"Could not fetch DJI data: {e}")
930
 
931
  try:
932
  sox_data = get_stock_data('^SOX', '5d')
933
  if not sox_data.empty and len(sox_data) >= 2:
934
  sox_return = (sox_data['Close'].iloc[-1] / sox_data['Close'].iloc[-2] - 1)
935
- except Exception as e:
936
- print(f"Could not fetch SOX data: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
937
 
938
- # Get news sentiment score
939
- sentiment_score_raw = 0
940
- try:
941
- if predictor is not None:
942
- score = predictor.get_news_index()
943
- if score is not None:
944
- sentiment_score_raw = score
945
- except Exception as e:
946
- print(f"Could not get news score: {e}")
947
-
948
- # Update the values in our feature set
949
- latest_features['dji_return_t-1'] = dji_return
950
- latest_features['sox_return_t-1'] = sox_return
951
- latest_features['NEWS'] = sentiment_score_raw
952
-
953
- # 4. Define the exact feature list from the training script
954
- model_feature_columns = [
955
- 'close',
956
- 'return_t-1',
957
- 'return_t-5',
958
- 'MA5_close',
959
- 'volatility_5d',
960
- 'volume_ratio_5d',
961
- 'MACD_diff',
962
- 'dji_return_t-1',
963
- 'sox_return_t-1',
964
- 'NEWS',
965
- 'MACDvol',
966
- 'RSI_14',
967
- 'ADX',
968
- 'volume_weighted_return'
969
- ]
970
 
971
- # 5. Create the final input DataFrame with columns in the correct order
972
- # This replaces the entire flawed feature assembly loop from the original code.
973
- input_df = latest_features[model_feature_columns]
 
 
 
 
 
974
 
975
- # Log the final features for verification
 
 
 
 
 
 
 
 
976
  print("=" * 60)
977
- print("XGBoost Model Input Features Verification")
978
  print("=" * 60)
979
- print(input_df.iloc[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
980
  print("=" * 60)
981
 
982
- # ***FIX END***
983
-
984
  # 進行預測
985
  predictions = xgb_model.predict('xgboost_model', input_df)
986
 
987
- # 處理新的漲幅百分比輸出格式
988
  pred_mapping = {
989
- 1: 'Change_pct_t1_pred',
990
- 5: 'Change_pct_t5_pred',
991
- 10: 'Change_pct_t10_pred',
992
- 20: 'Change_pct_t20_pred'
993
  }
994
 
995
  # 找到最接近的預測天數
@@ -997,24 +1035,25 @@ def advanced_xgboost_predict(predict_days=5):
997
  closest_day = min(available_days, key=lambda x: abs(x - predict_days))
998
  pred_key = pred_mapping[closest_day]
999
 
 
1000
  predicted_change_pct = predictions[pred_key]
1001
 
1002
- current_price = taiex_data['Close'].iloc[-1]
 
1003
  predicted_price = current_price * (1 + predicted_change_pct / 100)
1004
 
1005
- # Use a simple confidence score for now
1006
- confidence = 0.8
1007
-
1008
  print(f"XGBoost 預測完成:")
1009
  print(f"- 預測天數: {predict_days} (使用 {closest_day} 天模型)")
1010
  print(f"- 當前價格: {current_price:.2f}")
1011
  print(f"- 預測漲幅: {predicted_change_pct:+.2f}%")
1012
  print(f"- 預測價格: {predicted_price:.2f} (參考)")
 
 
1013
 
1014
  return {
1015
- 'predicted_price': predicted_price,
1016
- 'change_pct': predicted_change_pct,
1017
- 'confidence': confidence
1018
  }
1019
 
1020
  except Exception as e:
 
23
  # 引用您組員的預測器程式
24
  from Bert_predict import BertPredictor
25
 
26
+ # 引用新的模型預測器
27
  from model_predictor import XGBoostModel
28
  # ========================== 引用外部模組 END ==========================
29
  # ========================= 新增:交易回測模組 START =========================
 
418
  buy_trades = trades_df[trades_df['signal'] == 1]
419
  sell_trades = trades_df[trades_df['signal'] == -1]
420
 
421
+ if not buy_trades.empty:
422
+ fig.add_trace(
423
+ go.Scatter(
424
+ x=buy_trades['date'],
425
+ y=buy_trades['price'],
426
+ mode='markers',
427
+ name='買入',
428
+ marker=dict(color='red', size=8, symbol='triangle-up')
429
+ ),
430
+ row=3, col=1
431
+ )
432
+
433
+ if not sell_trades.empty:
434
+ fig.add_trace(
435
+ go.Scatter(
436
+ x=sell_trades['date'],
437
+ y=sell_trades['price'],
438
+ mode='markers',
439
+ name='賣出',
440
+ marker=dict(color='green', size=8, symbol='triangle-down')
441
+ ),
442
+ row=3, col=1
443
+ )
444
 
445
+ # 更新布局
446
  fig.update_layout(
447
+ title=f"交易策略回測結果",
448
  height=800,
449
+ showlegend=True,
450
+ xaxis3_title="日期"
451
  )
452
+
453
+ fig.update_yaxes(title_text="價值 (TWD)", row=1, col=1)
454
+ fig.update_yaxes(title_text="股數", row=2, col=1)
455
+ fig.update_yaxes(title_text="股價 (TWD)", row=3, col=1)
456
+
457
  return fig
458
 
459
+
460
  def create_backtest_summary_card(results):
461
  """創建回測摘要卡片"""
462
  if not results:
 
807
 
808
  def calculate_new_features(df):
809
  """
810
+ 計算新的技術指標特徵 - 針對新特徵需求
811
  """
812
  if df.empty:
813
  return df
814
 
815
+ # 1. return_t-1 – 前一日報酬率
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
816
  df['return_t-1'] = df['Close'].pct_change()
817
 
818
+ # 2. return_t-5 過去 5 日累積報酬率
819
  df['return_t-5'] = (df['Close'] / df['Close'].shift(5) - 1)
820
 
821
+ # 3. MA5_close 5 日移動平均價
822
  df['MA5_close'] = df['Close'].rolling(window=5).mean()
823
 
824
+ # 4. MA20_close 20 日移動平均價
825
+ df['MA20_close'] = df['Close'].rolling(window=20).mean()
826
+
827
+ # 5. volatility_5d – 5 日報酬標準差(短期波動)
828
  df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
829
 
830
+ # 6. volume_ratio_5d 今日成交量 ÷ 5 日均量
831
  df['volume_5d_avg'] = df['Volume'].rolling(window=5).mean()
832
  df['volume_ratio_5d'] = df['Volume'] / df['volume_5d_avg']
833
 
834
+ # 7. RSI_14 14 RSI 指標
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
835
  delta = df['Close'].diff()
836
  gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
837
  loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
838
  rs = gain / loss
839
  df['RSI_14'] = 100 - (100 / (1 + rs))
840
 
841
+ # 8. MACD_diff MACD - signal(趨勢強弱)
842
+ exp1 = df['Close'].ewm(span=12).mean()
843
+ exp2 = df['Close'].ewm(span=26).mean()
844
+ macd_line = exp1 - exp2
845
+ signal_line = macd_line.ewm(span=9).mean()
846
+ df['MACD_diff'] = macd_line - signal_line
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
847
 
848
  # 移除輔助欄位
849
+ if 'volume_5d_avg' in df.columns:
850
+ df = df.drop('volume_5d_avg', axis=1)
 
 
 
 
851
 
852
  return df
853
 
 
863
  xgb_model = XGBoostModel()
864
 
865
  # 獲取台指期數據 (作為主要標的)
866
+ taiex_data = get_stock_data('^TWII', '2y')
867
+ if taiex_data.empty or len(taiex_data) < 60:
868
  print("台指期數據不足,無法進行XGBoost預測")
869
  return None
870
+
871
+ # 計算技術指標(包含舊的指標)
872
+ taiex_data = calculate_technical_indicators(taiex_data)
873
+
874
+ # 計算新特徵
875
+ taiex_data = calculate_new_features(taiex_data)
876
+
877
+ # 獲取美股指數數據來計算外部指標
878
+ us_market_data = get_us_market_data()
879
+
880
+ # 獲取新聞情緒分數
881
+ try:
882
+ if predictor is not None:
883
+ sentiment_score_raw = predictor.get_news_index()
884
+ if sentiment_score_raw is None:
885
+ sentiment_score_raw = 0
886
+ else:
887
+ sentiment_score_raw = 0
888
+ except:
889
+ sentiment_score_raw = 0
890
+
891
+ # 準備特徵數據 (使用最新的數據點)
892
+ latest_data = taiex_data.iloc[-1]
893
+
894
+ # 取得昨日收盤價
895
+ yesterday_close = latest_data['Close']
896
 
897
+ # 特徵列表,確保與模型訓練時完全一致
898
+ new_feature_columns = [
899
+ 'return_t-1',
900
+ 'return_t-5',
901
+ 'MA5_close',
902
+ 'volatility_5d',
903
+ 'volume_ratio_5d',
904
+ 'MACD_diff',
905
+ ]
906
+
907
+ # 添加美股指標(如果有數據的話)
908
  dji_return = 0
909
  sox_return = 0
910
+
911
+ # 嘗試獲取美股前一日報酬率
912
  try:
913
  dji_data = get_stock_data('^DJI', '5d')
914
  if not dji_data.empty and len(dji_data) >= 2:
915
  dji_return = (dji_data['Close'].iloc[-1] / dji_data['Close'].iloc[-2] - 1)
916
+ except:
917
+ pass
918
 
919
  try:
920
  sox_data = get_stock_data('^SOX', '5d')
921
  if not sox_data.empty and len(sox_data) >= 2:
922
  sox_return = (sox_data['Close'].iloc[-1] / sox_data['Close'].iloc[-2] - 1)
923
+ except:
924
+ pass
925
+
926
+ # 檢查並處理 NaN 值,建立特徵狀態記錄
927
+ feature_status = {}
928
+ features_list = []
929
+ feature_names = []
930
+
931
+ # 處理本地計算的技術指標特徵
932
+ for feature in new_feature_columns:
933
+ if feature in latest_data.index:
934
+ value = latest_data[feature]
935
+ if pd.isna(value):
936
+ # 使用合理的預設值
937
+ if 'return' in feature: default_value = 0.0
938
+ elif 'MA' in feature: default_value = latest_data['Close'] if not pd.isna(latest_data['Close']) else 100
939
+ elif 'volatility' in feature: default_value = 0.02
940
+ elif 'volume_ratio' in feature: default_value = 1.0
941
+ elif 'MACD' in feature: default_value = 0.0
942
+ else: default_value = 0.0
943
+
944
+ features_list.append(default_value)
945
+ feature_status[feature] = {'value': default_value, 'is_real': False, 'source': 'default'}
946
+ else:
947
+ features_list.append(value)
948
+ feature_status[feature] = {'value': value, 'is_real': True, 'source': 'calculated'}
949
+
950
+ feature_names.append(feature)
951
+
952
+ # 按照模型訓練的順序添加剩餘特徵
953
+ # 7. dji_return_t-1
954
+ features_list.append(dji_return)
955
+ feature_names.append('dji_return_t-1')
956
+ feature_status['dji_return_t-1'] = {
957
+ 'value': dji_return,
958
+ 'is_real': dji_return != 0,
959
+ 'source': 'calculated' if dji_return != 0 else 'default'
960
+ }
961
 
962
+ # 8. sox_return_t-1
963
+ features_list.append(sox_return)
964
+ feature_names.append('sox_return_t-1')
965
+ feature_status['sox_return_t-1'] = {
966
+ 'value': sox_return,
967
+ 'is_real': sox_return != 0,
968
+ 'source': 'calculated' if sox_return != 0 else 'default'
969
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
970
 
971
+ # 9. close
972
+ if not pd.isna(yesterday_close):
973
+ features_list.append(yesterday_close)
974
+ feature_status['close'] = {'value': yesterday_close, 'is_real': True, 'source': 'calculated'}
975
+ else:
976
+ features_list.append(10000) # Fallback value for price
977
+ feature_status['close'] = {'value': 10000, 'is_real': False, 'source': 'default'}
978
+ feature_names.append('close')
979
 
980
+ # 10. NEWS
981
+ features_list.append(sentiment_score_raw)
982
+ feature_status['NEWS'] = {'value': sentiment_score_raw, 'is_real': True, 'source': 'calculated'}
983
+ feature_names.append('NEWS')
984
+
985
+ # 轉換為 DataFrame (XGBoost 模型期望的格式)
986
+ input_df = pd.DataFrame([features_list], columns=feature_names)
987
+
988
+ # 詳細的資料驗證日誌
989
  print("=" * 60)
990
+ print("XGBoost 模型輸入特徵檢查報告 (漲幅百分比版��)")
991
  print("=" * 60)
992
+
993
+ print(f"總特徵數量: {len(features_list)} 個")
994
+ print(f"新聞情緒分數: {sentiment_score_raw:.6f}")
995
+
996
+ # 特徵詳細狀態
997
+ print("\n特徵狀態詳情:")
998
+ for i, (name, value) in enumerate(zip(feature_names, features_list)):
999
+ status = feature_status.get(name, {})
1000
+ status_symbol = "✓正常" if status.get('is_real', False) else "⚠ 預設值"
1001
+ print(f" [{i+1:2d}] {name:18s}: {value:12.6f} ({status_symbol})")
1002
+
1003
+ # 統計完整性
1004
+ real_features = sum(1 for status in feature_status.values() if status.get('is_real', False))
1005
+ total_features = len(feature_status)
1006
+ completeness = (real_features / total_features) * 100 if total_features > 0 else 0
1007
+
1008
+ print(f"\n特徵完整性:")
1009
+ print(f" 實際計算特徵: {real_features}/{total_features} ({completeness:.1f}%)")
1010
+ if completeness < 70:
1011
+ print(" 警告: 超過30%的特徵使用預設值,可能影響預測準確性")
1012
+ else:
1013
+ print(" 特徵完整性良好")
1014
+
1015
+ # 顯示完整特徵向量
1016
+ print(f"\n完整特徵向量 (共{len(features_list)}個特徵):")
1017
+ for i, (name, value) in enumerate(zip(feature_names, features_list)):
1018
+ print(f" [{i+1:2d}] {name:18s}: {value:12.6f}")
1019
+
1020
  print("=" * 60)
1021
 
 
 
1022
  # 進行預測
1023
  predictions = xgb_model.predict('xgboost_model', input_df)
1024
 
1025
+ # 【重要更新】處理新的漲幅百分比輸出格式
1026
  pred_mapping = {
1027
+ 1: 'Change_pct_t1_pred', # 1天後漲幅%
1028
+ 5: 'Change_pct_t5_pred', # 5天後漲幅%
1029
+ 10: 'Change_pct_t10_pred', # 10天後漲幅%
1030
+ 20: 'Change_pct_t20_pred' # 20天後漲幅%
1031
  }
1032
 
1033
  # 找到最接近的預測天數
 
1035
  closest_day = min(available_days, key=lambda x: abs(x - predict_days))
1036
  pred_key = pred_mapping[closest_day]
1037
 
1038
+ # 【關鍵修改】現在直接取得漲幅百分比
1039
  predicted_change_pct = predictions[pred_key]
1040
 
1041
+ # 【新增】為了兼容性,計算預測價格(僅供參考)
1042
+ current_price = latest_data['Close']
1043
  predicted_price = current_price * (1 + predicted_change_pct / 100)
1044
 
 
 
 
1045
  print(f"XGBoost 預測完成:")
1046
  print(f"- 預測天數: {predict_days} (使用 {closest_day} 天模型)")
1047
  print(f"- 當前價格: {current_price:.2f}")
1048
  print(f"- 預測漲幅: {predicted_change_pct:+.2f}%")
1049
  print(f"- 預測價格: {predicted_price:.2f} (參考)")
1050
+ print(f"- 使用特徵數: {len(features_list)} 個")
1051
+ print(f"- 特徵完整性: {completeness:.1f}%")
1052
 
1053
  return {
1054
+ 'predicted_price': predicted_price, # 為了兼容現有代碼
1055
+ 'change_pct': predicted_change_pct, # 【新增】直接的漲幅百分比
1056
+ 'confidence': max(0.6, min(0.85, completeness / 100)) # 根據特徵完整性調整信心度
1057
  }
1058
 
1059
  except Exception as e:
model_predictor.py CHANGED
@@ -30,11 +30,7 @@ class XGBoostModel:
30
  'MACD_diff', # MACD - signal
31
  'dji_return_t-1', # 前一日道瓊指數報酬率
32
  'sox_return_t-1', # 前一日費半指數報酬率
33
- 'NEWS', # 新聞情緒分數
34
- 'MACDvol', # MACD 柱狀圖
35
- 'RSI_14', # 14 日 RSI
36
- 'ADX', # 平均趨向指標
37
- 'volume_weighted_return' # 成交量加權報酬率
38
  ]
39
 
40
  # 【新增】輸出目標對應表
@@ -103,98 +99,6 @@ class XGBoostModel:
103
  self.scaler = StandardScaler()
104
  return False
105
 
106
- def create_features_from_stock_data(self, stock_data):
107
- """
108
- 從股票資料創建所需的特徵
109
- 完全對應訓練腳本中的 create_new_features 函數
110
-
111
- Args:
112
- stock_data: yfinance 格式的股票資料 DataFrame
113
-
114
- Returns:
115
- processed_df: 包含所有特徵的 DataFrame
116
- """
117
- df = stock_data.copy()
118
-
119
- # 確保必要的基礎欄位存在
120
- required_base_columns = ['Close', 'Volume', 'High', 'Low']
121
- for col in required_base_columns:
122
- if col not in df.columns:
123
- raise ValueError(f"缺少必要的基礎欄位: {col}")
124
-
125
- # 統一欄位名稱(yfinance 使用大寫)
126
- df['close'] = df['Close']
127
- df['volume'] = df['Volume']
128
-
129
- # 1. return_t-1 — 前一日報酬率
130
- df['return_t-1'] = df['close'].pct_change()
131
-
132
- # 2. return_t-5 — 過去 5 日累積報酬率
133
- df['return_t-5'] = (df['close'] / df['close'].shift(5) - 1)
134
-
135
- # 3. MA5_close — 5 日移動平均價
136
- df['MA5_close'] = df['close'].rolling(window=5).mean()
137
-
138
- # 4. volatility_5d — 5 日報酬標準差
139
- df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
140
-
141
- # 5. volume_ratio_5d — 今日成交量 ÷ 5 日均量
142
- df['volume_5d_avg'] = df['volume'].rolling(window=5).mean()
143
- df['volume_ratio_5d'] = df['volume'] / df['volume_5d_avg']
144
-
145
- # 6. MACD_diff — MACD - signal
146
- exp1 = df['close'].ewm(span=12).mean()
147
- exp2 = df['close'].ewm(span=26).mean()
148
- macd_line = exp1 - exp2
149
- signal_line = macd_line.ewm(span=9).mean()
150
- df['MACD_diff'] = macd_line - signal_line
151
-
152
- # 7-8. 美股指數報酬率(需要外部資料,暫設為0)
153
- df['dji_return_t-1'] = 0.0
154
- df['sox_return_t-1'] = 0.0
155
-
156
- # 9. NEWS — 新聞情緒分數(需要外部資料,暫設為0)
157
- df['NEWS'] = 0.0
158
-
159
- # 10. MACDvol — MACD柱狀圖(需要外部資料,暫設為0)
160
- df['MACDvol'] = 0.0
161
-
162
- # 11. RSI_14 — 14日RSI
163
- delta = df['close'].diff()
164
- gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
165
- loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
166
- rs = gain / loss
167
- df['RSI_14'] = 100 - (100 / (1 + rs))
168
-
169
- # 12. ADX — 平均趨向指標
170
- df['up_move'] = df['High'] - df['High'].shift(1)
171
- df['down_move'] = df['Low'].shift(1) - df['Low']
172
- df['+DM'] = np.where((df['up_move'] > df['down_move']) & (df['up_move'] > 0), df['up_move'], 0)
173
- df['-DM'] = np.where((df['down_move'] > df['up_move']) & (df['down_move'] > 0), df['down_move'], 0)
174
-
175
- high_low = df['High'] - df['Low']
176
- high_close_prev = np.abs(df['High'] - df['close'].shift(1))
177
- low_close_prev = np.abs(df['Low'] - df['close'].shift(1))
178
- df['TR'] = np.maximum.reduce([high_low, high_close_prev, low_close_prev])
179
-
180
- df['+DI'] = (df['+DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
181
- df['-DI'] = (df['-DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
182
- df['DX'] = np.abs(df['+DI'] - df['-DI']) / (df['+DI'] + df['-DI']) * 100
183
- df['ADX'] = df['DX'].ewm(com=13, adjust=False).mean()
184
-
185
- # 13. volume_weighted_return — 成交量加權報酬率
186
- df['volume_weighted_return'] = np.abs(df['return_t-1']) * df['volume']
187
-
188
- # 清理輔助欄位
189
- cleanup_columns = ['volume_5d_avg', 'up_move', 'down_move', '+DM', '-DM', 'TR', '+DI', '-DI', 'DX']
190
- df.drop(columns=[col for col in cleanup_columns if col in df.columns], inplace=True)
191
-
192
- # 填補 NaN 值
193
- df.fillna(method='ffill', inplace=True)
194
- df.fillna(0, inplace=True) # 剩餘的 NaN 用 0 填補
195
-
196
- return df
197
-
198
  def preprocess_features(self, input_df):
199
  """
200
  預處理輸入特徵
@@ -496,11 +400,7 @@ if __name__ == "__main__":
496
  'MACD_diff': [0.5],
497
  'dji_return_t-1': [0.01],
498
  'sox_return_t-1': [0.015],
499
- 'NEWS': [0.1],
500
- 'MACDvol': [0.2],
501
- 'RSI_14': [55.0],
502
- 'ADX': [25.0],
503
- 'volume_weighted_return': [1000.0]
504
  })
505
 
506
  print("測試模型預測器...")
 
30
  'MACD_diff', # MACD - signal
31
  'dji_return_t-1', # 前一日道瓊指數報酬率
32
  'sox_return_t-1', # 前一日費半指數報酬率
33
+ 'NEWS' # 新聞情緒分數
 
 
 
 
34
  ]
35
 
36
  # 【新增】輸出目標對應表
 
99
  self.scaler = StandardScaler()
100
  return False
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  def preprocess_features(self, input_df):
103
  """
104
  預處理輸入特徵
 
400
  'MACD_diff': [0.5],
401
  'dji_return_t-1': [0.01],
402
  'sox_return_t-1': [0.015],
403
+ 'NEWS': [0.1]
 
 
 
 
404
  })
405
 
406
  print("測試模型預測器...")