Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -280,6 +280,75 @@ def advanced_xgboost_predict(predict_days=5):
|
|
| 280 |
latest_data = taiex_data.iloc[-1]
|
| 281 |
|
| 282 |
# 建立特徵向量 (按照訓練數據記錄的順序)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
features_list = [
|
| 284 |
latest_data['Close'], # close
|
| 285 |
latest_data['Volume'], # volume
|
|
@@ -290,15 +359,15 @@ def advanced_xgboost_predict(predict_days=5):
|
|
| 290 |
us_market['S&P_500'], # S&P_500
|
| 291 |
us_market['TSM_ADR'], # TSM_ADR
|
| 292 |
sentiment_score_raw, # NEWS (使用原始 sentiment_score_raw)
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
15, # business_climate (手動填入值)
|
| 303 |
46.7 # PMI (手動填入值)
|
| 304 |
]
|
|
@@ -312,13 +381,60 @@ def advanced_xgboost_predict(predict_days=5):
|
|
| 312 |
|
| 313 |
input_df = pd.DataFrame([features_list], columns=column_names)
|
| 314 |
|
| 315 |
-
|
| 316 |
-
print(
|
| 317 |
-
print(
|
| 318 |
-
print(
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
print(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
|
| 323 |
# 進行預測
|
| 324 |
predictions = xgb_model.predict('xgboost_model', input_df)
|
|
|
|
| 280 |
latest_data = taiex_data.iloc[-1]
|
| 281 |
|
| 282 |
# 建立特徵向量 (按照訓練數據記錄的順序)
|
| 283 |
+
# 先檢查每個技術指標是否存在並記錄狀態
|
| 284 |
+
tech_indicators_status = {}
|
| 285 |
+
|
| 286 |
+
# RSI 檢查
|
| 287 |
+
rsi_value = latest_data['RSI'] if not pd.isna(latest_data['RSI']) else 50
|
| 288 |
+
tech_indicators_status['RSI'] = {
|
| 289 |
+
'value': rsi_value,
|
| 290 |
+
'is_real': not pd.isna(latest_data['RSI']),
|
| 291 |
+
'source': 'calculated' if not pd.isna(latest_data['RSI']) else 'default'
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
# MACD 相關檢查
|
| 295 |
+
macd_value = latest_data['MACD'] if not pd.isna(latest_data['MACD']) else 0
|
| 296 |
+
macd_signal_value = latest_data['MACD_Signal'] if not pd.isna(latest_data['MACD_Signal']) else 0
|
| 297 |
+
macd_hist_value = latest_data['MACD_Histogram'] if not pd.isna(latest_data['MACD_Histogram']) else 0
|
| 298 |
+
|
| 299 |
+
tech_indicators_status['MACD'] = {
|
| 300 |
+
'value': macd_value,
|
| 301 |
+
'is_real': not pd.isna(latest_data['MACD']),
|
| 302 |
+
'source': 'calculated' if not pd.isna(latest_data['MACD']) else 'default'
|
| 303 |
+
}
|
| 304 |
+
tech_indicators_status['MACD_Signal'] = {
|
| 305 |
+
'value': macd_signal_value,
|
| 306 |
+
'is_real': not pd.isna(latest_data['MACD_Signal']),
|
| 307 |
+
'source': 'calculated' if not pd.isna(latest_data['MACD_Signal']) else 'default'
|
| 308 |
+
}
|
| 309 |
+
tech_indicators_status['MACD_Histogram'] = {
|
| 310 |
+
'value': macd_hist_value,
|
| 311 |
+
'is_real': not pd.isna(latest_data['MACD_Histogram']),
|
| 312 |
+
'source': 'calculated' if not pd.isna(latest_data['MACD_Histogram']) else 'default'
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
# KD 指標檢查
|
| 316 |
+
k_value = latest_data['K'] if not pd.isna(latest_data['K']) else 50
|
| 317 |
+
d_value = latest_data['D'] if not pd.isna(latest_data['D']) else 50
|
| 318 |
+
|
| 319 |
+
tech_indicators_status['K'] = {
|
| 320 |
+
'value': k_value,
|
| 321 |
+
'is_real': not pd.isna(latest_data['K']),
|
| 322 |
+
'source': 'calculated' if not pd.isna(latest_data['K']) else 'default'
|
| 323 |
+
}
|
| 324 |
+
tech_indicators_status['D'] = {
|
| 325 |
+
'value': d_value,
|
| 326 |
+
'is_real': not pd.isna(latest_data['D']),
|
| 327 |
+
'source': 'calculated' if not pd.isna(latest_data['D']) else 'default'
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
# DMI 指標檢查
|
| 331 |
+
plus_di_value = latest_data['+DI'] if not pd.isna(latest_data['+DI']) else 25
|
| 332 |
+
minus_di_value = latest_data['-DI'] if not pd.isna(latest_data['-DI']) else 25
|
| 333 |
+
adx_value = latest_data['ADX'] if not pd.isna(latest_data['ADX']) else 25
|
| 334 |
+
|
| 335 |
+
tech_indicators_status['+DI'] = {
|
| 336 |
+
'value': plus_di_value,
|
| 337 |
+
'is_real': not pd.isna(latest_data['+DI']),
|
| 338 |
+
'source': 'calculated' if not pd.isna(latest_data['+DI']) else 'default'
|
| 339 |
+
}
|
| 340 |
+
tech_indicators_status['-DI'] = {
|
| 341 |
+
'value': minus_di_value,
|
| 342 |
+
'is_real': not pd.isna(latest_data['-DI']),
|
| 343 |
+
'source': 'calculated' if not pd.isna(latest_data['-DI']) else 'default'
|
| 344 |
+
}
|
| 345 |
+
tech_indicators_status['ADX'] = {
|
| 346 |
+
'value': adx_value,
|
| 347 |
+
'is_real': not pd.isna(latest_data['ADX']),
|
| 348 |
+
'source': 'calculated' if not pd.isna(latest_data['ADX']) else 'default'
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
# 建立特徵向量
|
| 352 |
features_list = [
|
| 353 |
latest_data['Close'], # close
|
| 354 |
latest_data['Volume'], # volume
|
|
|
|
| 359 |
us_market['S&P_500'], # S&P_500
|
| 360 |
us_market['TSM_ADR'], # TSM_ADR
|
| 361 |
sentiment_score_raw, # NEWS (使用原始 sentiment_score_raw)
|
| 362 |
+
rsi_value, # RSI
|
| 363 |
+
macd_value, # MACD
|
| 364 |
+
macd_signal_value, # MACDsign
|
| 365 |
+
macd_hist_value, # MACDvol
|
| 366 |
+
k_value, # K
|
| 367 |
+
d_value, # D
|
| 368 |
+
plus_di_value, # +DI
|
| 369 |
+
minus_di_value, # -DI
|
| 370 |
+
adx_value, # ADX
|
| 371 |
15, # business_climate (手動填入值)
|
| 372 |
46.7 # PMI (手動填入值)
|
| 373 |
]
|
|
|
|
| 381 |
|
| 382 |
input_df = pd.DataFrame([features_list], columns=column_names)
|
| 383 |
|
| 384 |
+
# 詳細的資料驗證日誌
|
| 385 |
+
print("=" * 50)
|
| 386 |
+
print("XGBoost 模型輸入特徵詳細檢查報告")
|
| 387 |
+
print("=" * 50)
|
| 388 |
+
|
| 389 |
+
# 基本市場數據
|
| 390 |
+
print("📊 基本市場數據:")
|
| 391 |
+
print(f" 收盤價 (close): {latest_data['Close']:.2f}")
|
| 392 |
+
print(f" 成交量 (volume): {latest_data['Volume']:,.0f}")
|
| 393 |
+
print(f" 匯率 (rate): {exchange_rate:.4f}")
|
| 394 |
+
|
| 395 |
+
# 美股指數
|
| 396 |
+
print("\n🇺🇸 美股指數數據:")
|
| 397 |
+
for key, value in us_market.items():
|
| 398 |
+
status = "✅ 正常" if value > 0 else "⚠️ 可能異常(=0)"
|
| 399 |
+
print(f" {key}: {value:.2f} {status}")
|
| 400 |
+
|
| 401 |
+
# 新聞情緒
|
| 402 |
+
print(f"\n📰 新聞情緒 (NEWS): {sentiment_score_raw:.6f}")
|
| 403 |
+
if sentiment_score_raw == 0:
|
| 404 |
+
print(" ⚠️ 新聞情緒分數為0,可能無新聞數據")
|
| 405 |
+
else:
|
| 406 |
+
print(" ✅ 新聞情緒分數正常")
|
| 407 |
+
|
| 408 |
+
# 技術指標詳細狀態
|
| 409 |
+
print("\n📈 技術指標狀態:")
|
| 410 |
+
for indicator, status in tech_indicators_status.items():
|
| 411 |
+
status_symbol = "✅" if status['is_real'] else "⚠️"
|
| 412 |
+
source_info = "實際計算值" if status['is_real'] else "預設替代值"
|
| 413 |
+
print(f" {indicator}: {status['value']:.4f} {status_symbol} ({source_info})")
|
| 414 |
+
|
| 415 |
+
# 手動填入數據
|
| 416 |
+
print("\n🔧 手動填入數據:")
|
| 417 |
+
print(f" business_climate: 15 ✅")
|
| 418 |
+
print(f" PMI: 46.7 ✅")
|
| 419 |
+
|
| 420 |
+
# 統計資料完整性
|
| 421 |
+
real_indicators = sum(1 for status in tech_indicators_status.values() if status['is_real'])
|
| 422 |
+
total_indicators = len(tech_indicators_status)
|
| 423 |
+
completeness = (real_indicators / total_indicators) * 100
|
| 424 |
+
|
| 425 |
+
print(f"\n📋 技術指標完整性統計:")
|
| 426 |
+
print(f" 實際計算指標: {real_indicators}/{total_indicators} ({completeness:.1f}%)")
|
| 427 |
+
if completeness < 80:
|
| 428 |
+
print(" ⚠️ 警告:超過20%的技術指標使用預設值,可能影響預測準確性")
|
| 429 |
+
else:
|
| 430 |
+
print(" ✅ 技術指標完整性良好")
|
| 431 |
+
|
| 432 |
+
# 顯示完整輸入向量
|
| 433 |
+
print(f"\n🔢 完整特徵向量 (共{len(features_list)}個特徵):")
|
| 434 |
+
for i, (name, value) in enumerate(zip(column_names, features_list)):
|
| 435 |
+
print(f" [{i:2d}] {name:15s}: {value:10.4f}")
|
| 436 |
+
|
| 437 |
+
print("=" * 50)
|
| 438 |
|
| 439 |
# 進行預測
|
| 440 |
predictions = xgb_model.predict('xgboost_model', input_df)
|