Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -133,25 +133,37 @@ class TradingBacktester:
|
|
| 133 |
return 0
|
| 134 |
|
| 135 |
def simulate_predictions(self, data, predictor_func):
|
| 136 |
-
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
-
# 為每個交易日生成預測
|
| 149 |
-
for i in range(60, len(data)): # 從第60天開始,確保有足夠歷史資料,
|
| 150 |
-
current_date = data.index[i]
|
| 151 |
-
historical_data = data.iloc[:i+1] # 到當前日期的歷史資料
|
| 152 |
-
|
| 153 |
try:
|
| 154 |
-
# 呼叫預測函數
|
| 155 |
predictions = {}
|
| 156 |
for days in [1, 5, 10, 20]:
|
| 157 |
pred_result = predictor_func(historical_data, days)
|
|
@@ -159,15 +171,15 @@ class TradingBacktester:
|
|
| 159 |
predictions[f'{days}d'] = pred_result.get('change_pct', 0)
|
| 160 |
else:
|
| 161 |
predictions[f'{days}d'] = 0
|
| 162 |
-
|
| 163 |
predictions_history[current_date] = predictions
|
| 164 |
-
|
| 165 |
except Exception as e:
|
| 166 |
# print(f"預測失敗 {current_date}: {e}")
|
| 167 |
predictions_history[current_date] = {
|
| 168 |
'1d': 0, '5d': 0, '10d': 0, '20d': 0
|
| 169 |
}
|
| 170 |
-
|
| 171 |
return predictions_history
|
| 172 |
|
| 173 |
def run_backtest(self, stock_data, predictor_func, start_date=None, end_date=None):
|
|
@@ -807,47 +819,98 @@ def simple_statistical_predict(data, predict_days=5):
|
|
| 807 |
|
| 808 |
def calculate_new_features(df):
|
| 809 |
"""
|
| 810 |
-
計算新的技術指標特徵 -
|
| 811 |
"""
|
| 812 |
if df.empty:
|
| 813 |
return df
|
| 814 |
|
| 815 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 816 |
df['return_t-1'] = df['Close'].pct_change()
|
| 817 |
|
| 818 |
-
#
|
| 819 |
df['return_t-5'] = (df['Close'] / df['Close'].shift(5) - 1)
|
| 820 |
|
| 821 |
-
#
|
| 822 |
df['MA5_close'] = df['Close'].rolling(window=5).mean()
|
| 823 |
|
| 824 |
-
#
|
| 825 |
-
df['MA20_close'] = df['Close'].rolling(window=20).mean()
|
| 826 |
-
|
| 827 |
-
# 5. volatility_5d – 5 日報酬標準差(短期波動)
|
| 828 |
df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
|
| 829 |
|
| 830 |
-
# 6. volume_ratio_5d
|
| 831 |
df['volume_5d_avg'] = df['Volume'].rolling(window=5).mean()
|
| 832 |
df['volume_ratio_5d'] = df['Volume'] / df['volume_5d_avg']
|
| 833 |
|
| 834 |
-
# 7.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 835 |
delta = df['Close'].diff()
|
| 836 |
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
|
| 837 |
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
|
| 838 |
rs = gain / loss
|
| 839 |
df['RSI_14'] = 100 - (100 / (1 + rs))
|
| 840 |
|
| 841 |
-
#
|
| 842 |
-
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 847 |
|
| 848 |
# 移除輔助欄位
|
| 849 |
-
|
| 850 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 851 |
|
| 852 |
return df
|
| 853 |
|
|
@@ -871,8 +934,34 @@ def advanced_xgboost_predict(predict_days=5):
|
|
| 871 |
# 計算技術指標(包含舊的指標)
|
| 872 |
taiex_data = calculate_technical_indicators(taiex_data)
|
| 873 |
|
|
|
|
| 874 |
# 計算新特徵
|
| 875 |
taiex_data = calculate_new_features(taiex_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 876 |
|
| 877 |
# 獲取美股指數數據來計算外部指標
|
| 878 |
us_market_data = get_us_market_data()
|
|
@@ -895,14 +984,22 @@ def advanced_xgboost_predict(predict_days=5):
|
|
| 895 |
yesterday_close = latest_data['Close']
|
| 896 |
|
| 897 |
# 特徵列表,確保與模型訓練時完全一致
|
| 898 |
-
|
| 899 |
-
'
|
| 900 |
-
'return_t-
|
| 901 |
-
'
|
| 902 |
-
'
|
| 903 |
-
'
|
| 904 |
-
'
|
| 905 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 906 |
|
| 907 |
# 添加美股指標(如果有數據的話)
|
| 908 |
dji_return = 0
|
|
@@ -929,25 +1026,241 @@ def advanced_xgboost_predict(predict_days=5):
|
|
| 929 |
feature_names = []
|
| 930 |
|
| 931 |
# 處理本地計算的技術指標特徵
|
| 932 |
-
for feature in
|
| 933 |
-
if feature in
|
| 934 |
-
|
| 935 |
-
if
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
| 942 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 943 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 944 |
features_list.append(default_value)
|
| 945 |
-
feature_status[feature] = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 946 |
else:
|
| 947 |
-
|
| 948 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 949 |
|
| 950 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 951 |
|
| 952 |
# 按照模型訓練的順序添加剩餘特徵
|
| 953 |
# 7. dji_return_t-1
|
|
|
|
| 133 |
return 0
|
| 134 |
|
| 135 |
def simulate_predictions(self, data, predictor_func):
|
| 136 |
+
"""
|
| 137 |
+
模擬歷史預測結果
|
| 138 |
+
|
| 139 |
+
Args:
|
| 140 |
+
data: 股價歷史資料
|
| 141 |
+
predictor_func: 預測函數
|
| 142 |
|
| 143 |
+
Returns:
|
| 144 |
+
predictions_history: 歷史預測結果字典
|
| 145 |
+
"""
|
| 146 |
+
predictions_history = {}
|
| 147 |
+
|
| 148 |
+
# 為每個交易日生成預測
|
| 149 |
+
for i in range(60, len(data)): # 從第60天開始,確保有足夠歷史資料
|
| 150 |
+
current_date = data.index[i]
|
| 151 |
+
historical_data = data.iloc[:i+1].copy() # 到當前日期的歷史資料
|
| 152 |
+
|
| 153 |
+
# 確保歷史資料有必要的欄位結構 (yfinance格式)
|
| 154 |
+
if 'Close' not in historical_data.columns and 'close' in historical_data.columns:
|
| 155 |
+
historical_data['Close'] = historical_data['close']
|
| 156 |
+
if 'Volume' not in historical_data.columns and 'volume' in historical_data.columns:
|
| 157 |
+
historical_data['Volume'] = historical_data['volume']
|
| 158 |
+
if 'High' not in historical_data.columns and 'high' in historical_data.columns:
|
| 159 |
+
historical_data['High'] = historical_data['high']
|
| 160 |
+
if 'Low' not in historical_data.columns and 'low' in historical_data.columns:
|
| 161 |
+
historical_data['Low'] = historical_data['low']
|
| 162 |
+
if 'Open' not in historical_data.columns and 'open' in historical_data.columns:
|
| 163 |
+
historical_data['Open'] = historical_data['open']
|
| 164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
try:
|
| 166 |
+
# 呼叫預測函數 - get_prediction會處理特徵計算
|
| 167 |
predictions = {}
|
| 168 |
for days in [1, 5, 10, 20]:
|
| 169 |
pred_result = predictor_func(historical_data, days)
|
|
|
|
| 171 |
predictions[f'{days}d'] = pred_result.get('change_pct', 0)
|
| 172 |
else:
|
| 173 |
predictions[f'{days}d'] = 0
|
| 174 |
+
|
| 175 |
predictions_history[current_date] = predictions
|
| 176 |
+
|
| 177 |
except Exception as e:
|
| 178 |
# print(f"預測失敗 {current_date}: {e}")
|
| 179 |
predictions_history[current_date] = {
|
| 180 |
'1d': 0, '5d': 0, '10d': 0, '20d': 0
|
| 181 |
}
|
| 182 |
+
|
| 183 |
return predictions_history
|
| 184 |
|
| 185 |
def run_backtest(self, stock_data, predictor_func, start_date=None, end_date=None):
|
|
|
|
| 819 |
|
| 820 |
def calculate_new_features(df):
|
| 821 |
"""
|
| 822 |
+
計算新的技術指標特徵 - 完整對應訓練模型的特徵
|
| 823 |
"""
|
| 824 |
if df.empty:
|
| 825 |
return df
|
| 826 |
|
| 827 |
+
# 確保基礎欄位存在
|
| 828 |
+
if 'Close' not in df.columns and 'close' in df.columns:
|
| 829 |
+
df['Close'] = df['close']
|
| 830 |
+
if 'Volume' not in df.columns and 'volume' in df.columns:
|
| 831 |
+
df['Volume'] = df['volume']
|
| 832 |
+
if 'High' not in df.columns and 'high' in df.columns:
|
| 833 |
+
df['High'] = df['high']
|
| 834 |
+
if 'Low' not in df.columns and 'low' in df.columns:
|
| 835 |
+
df['Low'] = df['low']
|
| 836 |
+
if 'Open' not in df.columns and 'open' in df.columns:
|
| 837 |
+
df['Open'] = df['open']
|
| 838 |
+
|
| 839 |
+
# 1. close - 收盤價
|
| 840 |
+
df['close'] = df['Close']
|
| 841 |
+
|
| 842 |
+
# 2. return_t-1 — 前一日報酬率
|
| 843 |
df['return_t-1'] = df['Close'].pct_change()
|
| 844 |
|
| 845 |
+
# 3. return_t-5 — 過去 5 日累積報酬率
|
| 846 |
df['return_t-5'] = (df['Close'] / df['Close'].shift(5) - 1)
|
| 847 |
|
| 848 |
+
# 4. MA5_close — 5 日移動平均價
|
| 849 |
df['MA5_close'] = df['Close'].rolling(window=5).mean()
|
| 850 |
|
| 851 |
+
# 5. volatility_5d — 5 日報酬標準差(短期波動)
|
|
|
|
|
|
|
|
|
|
| 852 |
df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
|
| 853 |
|
| 854 |
+
# 6. volume_ratio_5d — 今日成交量 ÷ 5 日均量
|
| 855 |
df['volume_5d_avg'] = df['Volume'].rolling(window=5).mean()
|
| 856 |
df['volume_ratio_5d'] = df['Volume'] / df['volume_5d_avg']
|
| 857 |
|
| 858 |
+
# 7. MACD_diff — MACD - signal(趨勢強弱)
|
| 859 |
+
exp1 = df['Close'].ewm(span=12).mean()
|
| 860 |
+
exp2 = df['Close'].ewm(span=26).mean()
|
| 861 |
+
macd_line = exp1 - exp2
|
| 862 |
+
signal_line = macd_line.ewm(span=9).mean()
|
| 863 |
+
df['MACD_diff'] = macd_line - signal_line
|
| 864 |
+
|
| 865 |
+
# 8. dji_return_t-1 — 前一日道瓊指數報酬率(預設為0,需外部數據)
|
| 866 |
+
df['dji_return_t-1'] = 0.0
|
| 867 |
+
|
| 868 |
+
# 9. sox_return_t-1 — 前一日費半指數報酬率(預設為0,需外部數據)
|
| 869 |
+
df['sox_return_t-1'] = 0.0
|
| 870 |
+
|
| 871 |
+
# 10. NEWS — 新聞情緒分數(預設為0,需外部數據)
|
| 872 |
+
df['NEWS'] = 0.0
|
| 873 |
+
|
| 874 |
+
# 11. MACDvol — MACD柱狀圖
|
| 875 |
+
df['MACDvol'] = macd_line - signal_line
|
| 876 |
+
|
| 877 |
+
# 12. RSI_14 — 14日RSI
|
| 878 |
delta = df['Close'].diff()
|
| 879 |
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
|
| 880 |
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
|
| 881 |
rs = gain / loss
|
| 882 |
df['RSI_14'] = 100 - (100 / (1 + rs))
|
| 883 |
|
| 884 |
+
# 13. ADX — 平均趨向指標
|
| 885 |
+
try:
|
| 886 |
+
df['up_move'] = df['High'] - df['High'].shift(1)
|
| 887 |
+
df['down_move'] = df['Low'].shift(1) - df['Low']
|
| 888 |
+
df['+DM'] = np.where((df['up_move'] > df['down_move']) & (df['up_move'] > 0), df['up_move'], 0)
|
| 889 |
+
df['-DM'] = np.where((df['down_move'] > df['up_move']) & (df['down_move'] > 0), df['down_move'], 0)
|
| 890 |
+
|
| 891 |
+
high_low = df['High'] - df['Low']
|
| 892 |
+
high_close_prev = np.abs(df['High'] - df['Close'].shift(1))
|
| 893 |
+
low_close_prev = np.abs(df['Low'] - df['Close'].shift(1))
|
| 894 |
+
df['TR'] = np.maximum.reduce([high_low, high_close_prev, low_close_prev])
|
| 895 |
+
|
| 896 |
+
df['+DI'] = (df['+DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
|
| 897 |
+
df['-DI'] = (df['-DM'].ewm(com=13, adjust=False).mean() / df['TR'].ewm(com=13, adjust=False).mean()) * 100
|
| 898 |
+
df['DX'] = np.abs(df['+DI'] - df['-DI']) / (df['+DI'] + df['-DI']) * 100
|
| 899 |
+
df['ADX'] = df['DX'].ewm(com=13, adjust=False).mean()
|
| 900 |
+
except:
|
| 901 |
+
print("警告:ADX計算失敗,使用預設值")
|
| 902 |
+
df['ADX'] = 25.0 # 預設中性值
|
| 903 |
+
|
| 904 |
+
# 14. volume_weighted_return — 成交量加權報酬率
|
| 905 |
+
df['volume_weighted_return'] = np.abs(df['return_t-1']) * df['Volume']
|
| 906 |
|
| 907 |
# 移除輔助欄位
|
| 908 |
+
auxiliary_columns = ['volume_5d_avg', 'up_move', 'down_move', '+DM', '-DM', 'TR', '+DI', '-DI', 'DX']
|
| 909 |
+
df.drop(columns=[col for col in auxiliary_columns if col in df.columns], inplace=True)
|
| 910 |
+
|
| 911 |
+
# 填補 NaN 值
|
| 912 |
+
df.fillna(method='ffill', inplace=True)
|
| 913 |
+
df.fillna(0, inplace=True) # 剩餘的 NaN 用 0 填補
|
| 914 |
|
| 915 |
return df
|
| 916 |
|
|
|
|
| 934 |
# 計算技術指標(包含舊的指標)
|
| 935 |
taiex_data = calculate_technical_indicators(taiex_data)
|
| 936 |
|
| 937 |
+
# 計算新特徵
|
| 938 |
# 計算新特徵
|
| 939 |
taiex_data = calculate_new_features(taiex_data)
|
| 940 |
+
|
| 941 |
+
# 確保所有必要特徵都存在且沒有NaN值
|
| 942 |
+
required_features = [
|
| 943 |
+
'return_t-1', 'return_t-5', 'MA5_close', 'volatility_5d',
|
| 944 |
+
'volume_ratio_5d', 'MACD_diff'
|
| 945 |
+
]
|
| 946 |
+
|
| 947 |
+
for feature in required_features:
|
| 948 |
+
if feature not in taiex_data.columns:
|
| 949 |
+
print(f"警告: 缺少特徵 {feature},使用默認值")
|
| 950 |
+
if 'return' in feature:
|
| 951 |
+
taiex_data[feature] = 0.0
|
| 952 |
+
elif 'MA' in feature:
|
| 953 |
+
taiex_data[feature] = taiex_data['Close']
|
| 954 |
+
elif 'volatility' in feature:
|
| 955 |
+
taiex_data[feature] = 0.02
|
| 956 |
+
elif 'volume_ratio' in feature:
|
| 957 |
+
taiex_data[feature] = 1.0
|
| 958 |
+
elif 'MACD' in feature:
|
| 959 |
+
taiex_data[feature] = 0.0
|
| 960 |
+
else:
|
| 961 |
+
taiex_data[feature] = 0.0
|
| 962 |
+
|
| 963 |
+
# 填補可能的NaN值
|
| 964 |
+
taiex_data = taiex_data.fillna(method='ffill').fillna(0)
|
| 965 |
|
| 966 |
# 獲取美股指數數據來計算外部指標
|
| 967 |
us_market_data = get_us_market_data()
|
|
|
|
| 984 |
yesterday_close = latest_data['Close']
|
| 985 |
|
| 986 |
# 特徵列表,確保與模型訓練時完全一致
|
| 987 |
+
model_feature_columns = [
|
| 988 |
+
'close', # 前一日收盤價
|
| 989 |
+
'return_t-1', # 前一日報酬率
|
| 990 |
+
'return_t-5', # 過去 5 日累積報酬率
|
| 991 |
+
'MA5_close', # 5 日移動平均價
|
| 992 |
+
'volatility_5d', # 5 日報酬標準差
|
| 993 |
+
'volume_ratio_5d', # 今日成交量 ÷ 5 日均量
|
| 994 |
+
'MACD_diff', # MACD - signal
|
| 995 |
+
'dji_return_t-1', # 前一日道瓊指數報酬率
|
| 996 |
+
'sox_return_t-1', # 前一日費半指數報酬率
|
| 997 |
+
'NEWS', # 新聞情緒分數
|
| 998 |
+
'MACDvol', # MACD柱狀圖
|
| 999 |
+
'RSI_14', # 14日RSI
|
| 1000 |
+
'ADX', # ADX指標
|
| 1001 |
+
'volume_weighted_return' # 成交量加權報酬率
|
| 1002 |
+
]
|
| 1003 |
|
| 1004 |
# 添加美股指標(如果有數據的話)
|
| 1005 |
dji_return = 0
|
|
|
|
| 1026 |
feature_names = []
|
| 1027 |
|
| 1028 |
# 處理本地計算的技術指標特徵
|
| 1029 |
+
for feature in model_feature_columns:
|
| 1030 |
+
if feature in ['dji_return_t-1', 'sox_return_t-1']:
|
| 1031 |
+
# 處理美股指標
|
| 1032 |
+
if feature == 'dji_return_t-1':
|
| 1033 |
+
features_list.append(dji_return)
|
| 1034 |
+
feature_status[feature] = {
|
| 1035 |
+
'value': dji_return,
|
| 1036 |
+
'is_real': dji_return != 0,
|
| 1037 |
+
'source': 'calculated' if dji_return != 0 else 'default'
|
| 1038 |
+
}
|
| 1039 |
+
else: # sox_return_t-1
|
| 1040 |
+
features_list.append(sox_return)
|
| 1041 |
+
feature_status[feature] = {
|
| 1042 |
+
'value': sox_return,
|
| 1043 |
+
'is_real': sox_return != 0,
|
| 1044 |
+
'source': 'calculated' if sox_return != 0 else 'default'
|
| 1045 |
+
}
|
| 1046 |
|
| 1047 |
+
elif feature == 'NEWS':
|
| 1048 |
+
# 新聞分數
|
| 1049 |
+
features_list.append(sentiment_score_raw)
|
| 1050 |
+
feature_status[feature] = {
|
| 1051 |
+
'value': sentiment_score_raw,
|
| 1052 |
+
'is_real': True,
|
| 1053 |
+
'source': 'calculated'
|
| 1054 |
+
}
|
| 1055 |
+
|
| 1056 |
+
else:
|
| 1057 |
+
# 其他技術指標特徵
|
| 1058 |
+
if feature in latest_data.index:
|
| 1059 |
+
value = latest_data[feature]
|
| 1060 |
+
if pd.isna(value):
|
| 1061 |
+
# 使用合理的預設值
|
| 1062 |
+
if 'return' in feature:
|
| 1063 |
+
default_value = 0.0
|
| 1064 |
+
elif 'MA' in feature or feature == 'close':
|
| 1065 |
+
default_value = latest_data['Close'] if not pd.isna(latest_data['Close']) else 100
|
| 1066 |
+
elif 'volatility' in feature:
|
| 1067 |
+
default_value = 0.02
|
| 1068 |
+
elif 'volume_ratio' in feature:
|
| 1069 |
+
default_value = 1.0
|
| 1070 |
+
elif 'MACD' in feature:
|
| 1071 |
+
default_value = 0.0
|
| 1072 |
+
elif 'RSI' in feature:
|
| 1073 |
+
default_value = 50.0
|
| 1074 |
+
elif 'ADX' in feature:
|
| 1075 |
+
default_value = 25.0
|
| 1076 |
+
elif 'volume_weighted' in feature:
|
| 1077 |
+
default_value = 0.0
|
| 1078 |
+
else:
|
| 1079 |
+
default_value = 0.0
|
| 1080 |
+
|
| 1081 |
+
features_list.append(default_value)
|
| 1082 |
+
feature_status[feature] = {
|
| 1083 |
+
'value': default_value,
|
| 1084 |
+
'is_real': False,
|
| 1085 |
+
'source': 'default'
|
| 1086 |
+
}
|
| 1087 |
+
else:
|
| 1088 |
+
features_list.append(value)
|
| 1089 |
+
feature_status[feature] = {
|
| 1090 |
+
'value': value,
|
| 1091 |
+
'is_real': True,
|
| 1092 |
+
'source': 'calculated'
|
| 1093 |
+
}
|
| 1094 |
+
else:
|
| 1095 |
+
# 特徵不存在,使用預設值
|
| 1096 |
+
default_value = 0.0
|
| 1097 |
features_list.append(default_value)
|
| 1098 |
+
feature_status[feature] = {
|
| 1099 |
+
'value': default_value,
|
| 1100 |
+
'is_real': False,
|
| 1101 |
+
'source': 'missing'
|
| 1102 |
+
}
|
| 1103 |
+
|
| 1104 |
+
feature_names.append(feature)
|
| 1105 |
+
|
| 1106 |
+
# 轉換為 DataFrame (XGBoost 模型期望的格式)
|
| 1107 |
+
input_df = pd.DataFrame([features_list], columns=feature_names)for feature in model_feature_columns:
|
| 1108 |
+
if feature in ['dji_return_t-1', 'sox_return_t-1']:
|
| 1109 |
+
# 處理美股指標
|
| 1110 |
+
if feature == 'dji_return_t-1':
|
| 1111 |
+
features_list.append(dji_return)
|
| 1112 |
+
feature_status[feature] = {
|
| 1113 |
+
'value': dji_return,
|
| 1114 |
+
'is_real': dji_return != 0,
|
| 1115 |
+
'source': 'calculated' if dji_return != 0 else 'default'
|
| 1116 |
+
}
|
| 1117 |
+
else: # sox_return_t-1
|
| 1118 |
+
features_list.append(sox_return)
|
| 1119 |
+
feature_status[feature] = {
|
| 1120 |
+
'value': sox_return,
|
| 1121 |
+
'is_real': sox_return != 0,
|
| 1122 |
+
'source': 'calculated' if sox_return != 0 else 'default'
|
| 1123 |
+
}
|
| 1124 |
+
|
| 1125 |
+
elif feature == 'NEWS':
|
| 1126 |
+
# 新聞分數
|
| 1127 |
+
features_list.append(sentiment_score_raw)
|
| 1128 |
+
feature_status[feature] = {
|
| 1129 |
+
'value': sentiment_score_raw,
|
| 1130 |
+
'is_real': True,
|
| 1131 |
+
'source': 'calculated'
|
| 1132 |
+
}
|
| 1133 |
+
|
| 1134 |
+
else:
|
| 1135 |
+
# 其他技術指標特徵
|
| 1136 |
+
if feature in latest_data.index:
|
| 1137 |
+
value = latest_data[feature]
|
| 1138 |
+
if pd.isna(value):
|
| 1139 |
+
# 使用合理的預設值
|
| 1140 |
+
if 'return' in feature:
|
| 1141 |
+
default_value = 0.0
|
| 1142 |
+
elif 'MA' in feature or feature == 'close':
|
| 1143 |
+
default_value = latest_data['Close'] if not pd.isna(latest_data['Close']) else 100
|
| 1144 |
+
elif 'volatility' in feature:
|
| 1145 |
+
default_value = 0.02
|
| 1146 |
+
elif 'volume_ratio' in feature:
|
| 1147 |
+
default_value = 1.0
|
| 1148 |
+
elif 'MACD' in feature:
|
| 1149 |
+
default_value = 0.0
|
| 1150 |
+
elif 'RSI' in feature:
|
| 1151 |
+
default_value = 50.0
|
| 1152 |
+
elif 'ADX' in feature:
|
| 1153 |
+
default_value = 25.0
|
| 1154 |
+
elif 'volume_weighted' in feature:
|
| 1155 |
+
default_value = 0.0
|
| 1156 |
+
else:
|
| 1157 |
+
default_value = 0.0
|
| 1158 |
+
|
| 1159 |
+
features_list.append(default_value)
|
| 1160 |
+
feature_status[feature] = {
|
| 1161 |
+
'value': default_value,
|
| 1162 |
+
'is_real': False,
|
| 1163 |
+
'source': 'default'
|
| 1164 |
+
}
|
| 1165 |
+
else:
|
| 1166 |
+
features_list.append(value)
|
| 1167 |
+
feature_status[feature] = {
|
| 1168 |
+
'value': value,
|
| 1169 |
+
'is_real': True,
|
| 1170 |
+
'source': 'calculated'
|
| 1171 |
+
}
|
| 1172 |
else:
|
| 1173 |
+
# 特徵不存在,使用預設值
|
| 1174 |
+
default_value = 0.0
|
| 1175 |
+
features_list.append(default_value)
|
| 1176 |
+
feature_status[feature] = {
|
| 1177 |
+
'value': default_value,
|
| 1178 |
+
'is_real': False,
|
| 1179 |
+
'source': 'missing'
|
| 1180 |
+
}
|
| 1181 |
+
|
| 1182 |
+
feature_names.append(feature)
|
| 1183 |
+
|
| 1184 |
+
# 轉換為 DataFrame (XGBoost 模型期望的格式)
|
| 1185 |
+
input_df = pd.DataFrame([features_list], columns=feature_names)for feature in model_feature_columns:
|
| 1186 |
+
if feature in ['dji_return_t-1', 'sox_return_t-1']:
|
| 1187 |
+
# 處理美股指標
|
| 1188 |
+
if feature == 'dji_return_t-1':
|
| 1189 |
+
features_list.append(dji_return)
|
| 1190 |
+
feature_status[feature] = {
|
| 1191 |
+
'value': dji_return,
|
| 1192 |
+
'is_real': dji_return != 0,
|
| 1193 |
+
'source': 'calculated' if dji_return != 0 else 'default'
|
| 1194 |
+
}
|
| 1195 |
+
else: # sox_return_t-1
|
| 1196 |
+
features_list.append(sox_return)
|
| 1197 |
+
feature_status[feature] = {
|
| 1198 |
+
'value': sox_return,
|
| 1199 |
+
'is_real': sox_return != 0,
|
| 1200 |
+
'source': 'calculated' if sox_return != 0 else 'default'
|
| 1201 |
+
}
|
| 1202 |
+
|
| 1203 |
+
elif feature == 'NEWS':
|
| 1204 |
+
# 新聞分數
|
| 1205 |
+
features_list.append(sentiment_score_raw)
|
| 1206 |
+
feature_status[feature] = {
|
| 1207 |
+
'value': sentiment_score_raw,
|
| 1208 |
+
'is_real': True,
|
| 1209 |
+
'source': 'calculated'
|
| 1210 |
+
}
|
| 1211 |
|
| 1212 |
+
else:
|
| 1213 |
+
# 其他技術指標特徵
|
| 1214 |
+
if feature in latest_data.index:
|
| 1215 |
+
value = latest_data[feature]
|
| 1216 |
+
if pd.isna(value):
|
| 1217 |
+
# 使用合理的預設值
|
| 1218 |
+
if 'return' in feature:
|
| 1219 |
+
default_value = 0.0
|
| 1220 |
+
elif 'MA' in feature or feature == 'close':
|
| 1221 |
+
default_value = latest_data['Close'] if not pd.isna(latest_data['Close']) else 100
|
| 1222 |
+
elif 'volatility' in feature:
|
| 1223 |
+
default_value = 0.02
|
| 1224 |
+
elif 'volume_ratio' in feature:
|
| 1225 |
+
default_value = 1.0
|
| 1226 |
+
elif 'MACD' in feature:
|
| 1227 |
+
default_value = 0.0
|
| 1228 |
+
elif 'RSI' in feature:
|
| 1229 |
+
default_value = 50.0
|
| 1230 |
+
elif 'ADX' in feature:
|
| 1231 |
+
default_value = 25.0
|
| 1232 |
+
elif 'volume_weighted' in feature:
|
| 1233 |
+
default_value = 0.0
|
| 1234 |
+
else:
|
| 1235 |
+
default_value = 0.0
|
| 1236 |
+
|
| 1237 |
+
features_list.append(default_value)
|
| 1238 |
+
feature_status[feature] = {
|
| 1239 |
+
'value': default_value,
|
| 1240 |
+
'is_real': False,
|
| 1241 |
+
'source': 'default'
|
| 1242 |
+
}
|
| 1243 |
+
else:
|
| 1244 |
+
features_list.append(value)
|
| 1245 |
+
feature_status[feature] = {
|
| 1246 |
+
'value': value,
|
| 1247 |
+
'is_real': True,
|
| 1248 |
+
'source': 'calculated'
|
| 1249 |
+
}
|
| 1250 |
+
else:
|
| 1251 |
+
# 特徵不存在,使用預設值
|
| 1252 |
+
default_value = 0.0
|
| 1253 |
+
features_list.append(default_value)
|
| 1254 |
+
feature_status[feature] = {
|
| 1255 |
+
'value': default_value,
|
| 1256 |
+
'is_real': False,
|
| 1257 |
+
'source': 'missing'
|
| 1258 |
+
}
|
| 1259 |
+
|
| 1260 |
+
feature_names.append(feature)
|
| 1261 |
+
|
| 1262 |
+
# 轉換為 DataFrame (XGBoost 模型期望的格式)
|
| 1263 |
+
input_df = pd.DataFrame([features_list], columns=feature_names)
|
| 1264 |
|
| 1265 |
# 按照模型訓練的順序添加剩餘特徵
|
| 1266 |
# 7. dji_return_t-1
|