AlanRex commited on
Commit
5c4d9c7
·
verified ·
1 Parent(s): a50afec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -16
app.py CHANGED
@@ -280,6 +280,75 @@ def advanced_xgboost_predict(predict_days=5):
280
  latest_data = taiex_data.iloc[-1]
281
 
282
  # 建立特徵向量 (按照訓練數據記錄的順序)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  features_list = [
284
  latest_data['Close'], # close
285
  latest_data['Volume'], # volume
@@ -290,15 +359,15 @@ def advanced_xgboost_predict(predict_days=5):
290
  us_market['S&P_500'], # S&P_500
291
  us_market['TSM_ADR'], # TSM_ADR
292
  sentiment_score_raw, # NEWS (使用原始 sentiment_score_raw)
293
- latest_data['RSI'] if not pd.isna(latest_data['RSI']) else 50, # RSI
294
- latest_data['MACD'] if not pd.isna(latest_data['MACD']) else 0, # MACD
295
- latest_data['MACD_Signal'] if not pd.isna(latest_data['MACD_Signal']) else 0, # MACDsign
296
- latest_data['MACD_Histogram'] if not pd.isna(latest_data['MACD_Histogram']) else 0, # MACDvol
297
- latest_data['K'] if not pd.isna(latest_data['K']) else 50, # K
298
- latest_data['D'] if not pd.isna(latest_data['D']) else 50, # D
299
- latest_data['+DI'] if not pd.isna(latest_data['+DI']) else 25, # +DI
300
- latest_data['-DI'] if not pd.isna(latest_data['-DI']) else 25, # -DI
301
- latest_data['ADX'] if not pd.isna(latest_data['ADX']) else 25, # ADX
302
  15, # business_climate (手動填入值)
303
  46.7 # PMI (手動填入值)
304
  ]
@@ -312,13 +381,60 @@ def advanced_xgboost_predict(predict_days=5):
312
 
313
  input_df = pd.DataFrame([features_list], columns=column_names)
314
 
315
- print("特徵數據準備完成:")
316
- print(f"- 收盤價: {features_list[0]:.2f}")
317
- print(f"- 成交量: {features_list[1]:,.0f}")
318
- print(f"- 匯率: {features_list[2]:.2f}")
319
- print(f"- 道瓊指數: {features_list[3]:.2f}")
320
- print(f"- 新聞情緒: {features_list[8]:.4f}")
321
- print(f"- RSI: {features_list[9]:.2f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
 
323
  # 進行預測
324
  predictions = xgb_model.predict('xgboost_model', input_df)
 
280
  latest_data = taiex_data.iloc[-1]
281
 
282
  # 建立特徵向量 (按照訓練數據記錄的順序)
283
+ # 先檢查每個技術指標是否存在並記錄狀態
284
+ tech_indicators_status = {}
285
+
286
+ # RSI 檢查
287
+ rsi_value = latest_data['RSI'] if not pd.isna(latest_data['RSI']) else 50
288
+ tech_indicators_status['RSI'] = {
289
+ 'value': rsi_value,
290
+ 'is_real': not pd.isna(latest_data['RSI']),
291
+ 'source': 'calculated' if not pd.isna(latest_data['RSI']) else 'default'
292
+ }
293
+
294
+ # MACD 相關檢查
295
+ macd_value = latest_data['MACD'] if not pd.isna(latest_data['MACD']) else 0
296
+ macd_signal_value = latest_data['MACD_Signal'] if not pd.isna(latest_data['MACD_Signal']) else 0
297
+ macd_hist_value = latest_data['MACD_Histogram'] if not pd.isna(latest_data['MACD_Histogram']) else 0
298
+
299
+ tech_indicators_status['MACD'] = {
300
+ 'value': macd_value,
301
+ 'is_real': not pd.isna(latest_data['MACD']),
302
+ 'source': 'calculated' if not pd.isna(latest_data['MACD']) else 'default'
303
+ }
304
+ tech_indicators_status['MACD_Signal'] = {
305
+ 'value': macd_signal_value,
306
+ 'is_real': not pd.isna(latest_data['MACD_Signal']),
307
+ 'source': 'calculated' if not pd.isna(latest_data['MACD_Signal']) else 'default'
308
+ }
309
+ tech_indicators_status['MACD_Histogram'] = {
310
+ 'value': macd_hist_value,
311
+ 'is_real': not pd.isna(latest_data['MACD_Histogram']),
312
+ 'source': 'calculated' if not pd.isna(latest_data['MACD_Histogram']) else 'default'
313
+ }
314
+
315
+ # KD 指標檢查
316
+ k_value = latest_data['K'] if not pd.isna(latest_data['K']) else 50
317
+ d_value = latest_data['D'] if not pd.isna(latest_data['D']) else 50
318
+
319
+ tech_indicators_status['K'] = {
320
+ 'value': k_value,
321
+ 'is_real': not pd.isna(latest_data['K']),
322
+ 'source': 'calculated' if not pd.isna(latest_data['K']) else 'default'
323
+ }
324
+ tech_indicators_status['D'] = {
325
+ 'value': d_value,
326
+ 'is_real': not pd.isna(latest_data['D']),
327
+ 'source': 'calculated' if not pd.isna(latest_data['D']) else 'default'
328
+ }
329
+
330
+ # DMI 指標檢查
331
+ plus_di_value = latest_data['+DI'] if not pd.isna(latest_data['+DI']) else 25
332
+ minus_di_value = latest_data['-DI'] if not pd.isna(latest_data['-DI']) else 25
333
+ adx_value = latest_data['ADX'] if not pd.isna(latest_data['ADX']) else 25
334
+
335
+ tech_indicators_status['+DI'] = {
336
+ 'value': plus_di_value,
337
+ 'is_real': not pd.isna(latest_data['+DI']),
338
+ 'source': 'calculated' if not pd.isna(latest_data['+DI']) else 'default'
339
+ }
340
+ tech_indicators_status['-DI'] = {
341
+ 'value': minus_di_value,
342
+ 'is_real': not pd.isna(latest_data['-DI']),
343
+ 'source': 'calculated' if not pd.isna(latest_data['-DI']) else 'default'
344
+ }
345
+ tech_indicators_status['ADX'] = {
346
+ 'value': adx_value,
347
+ 'is_real': not pd.isna(latest_data['ADX']),
348
+ 'source': 'calculated' if not pd.isna(latest_data['ADX']) else 'default'
349
+ }
350
+
351
+ # 建立特徵向量
352
  features_list = [
353
  latest_data['Close'], # close
354
  latest_data['Volume'], # volume
 
359
  us_market['S&P_500'], # S&P_500
360
  us_market['TSM_ADR'], # TSM_ADR
361
  sentiment_score_raw, # NEWS (使用原始 sentiment_score_raw)
362
+ rsi_value, # RSI
363
+ macd_value, # MACD
364
+ macd_signal_value, # MACDsign
365
+ macd_hist_value, # MACDvol
366
+ k_value, # K
367
+ d_value, # D
368
+ plus_di_value, # +DI
369
+ minus_di_value, # -DI
370
+ adx_value, # ADX
371
  15, # business_climate (手動填入值)
372
  46.7 # PMI (手動填入值)
373
  ]
 
381
 
382
  input_df = pd.DataFrame([features_list], columns=column_names)
383
 
384
+ # 詳細的資料驗證日誌
385
+ print("=" * 50)
386
+ print("XGBoost 模型輸入特徵詳細檢查報告")
387
+ print("=" * 50)
388
+
389
+ # 基本市場數據
390
+ print("📊 基本市場數據:")
391
+ print(f" 收盤價 (close): {latest_data['Close']:.2f}")
392
+ print(f" 成交量 (volume): {latest_data['Volume']:,.0f}")
393
+ print(f" 匯率 (rate): {exchange_rate:.4f}")
394
+
395
+ # 美股指數
396
+ print("\n🇺🇸 美股指數數據:")
397
+ for key, value in us_market.items():
398
+ status = "✅ 正常" if value > 0 else "⚠️ 可能異常(=0)"
399
+ print(f" {key}: {value:.2f} {status}")
400
+
401
+ # 新聞情緒
402
+ print(f"\n📰 新聞情緒 (NEWS): {sentiment_score_raw:.6f}")
403
+ if sentiment_score_raw == 0:
404
+ print(" ⚠️ 新聞情緒分數為0,可能無新聞數據")
405
+ else:
406
+ print(" ✅ 新聞情緒分數正常")
407
+
408
+ # 技術指標詳細狀態
409
+ print("\n📈 技術指標狀態:")
410
+ for indicator, status in tech_indicators_status.items():
411
+ status_symbol = "✅" if status['is_real'] else "⚠️"
412
+ source_info = "實際計算值" if status['is_real'] else "預設替代值"
413
+ print(f" {indicator}: {status['value']:.4f} {status_symbol} ({source_info})")
414
+
415
+ # 手動填入數據
416
+ print("\n🔧 手動填入數據:")
417
+ print(f" business_climate: 15 ✅")
418
+ print(f" PMI: 46.7 ✅")
419
+
420
+ # 統計資料完整性
421
+ real_indicators = sum(1 for status in tech_indicators_status.values() if status['is_real'])
422
+ total_indicators = len(tech_indicators_status)
423
+ completeness = (real_indicators / total_indicators) * 100
424
+
425
+ print(f"\n📋 技術指標完整性統計:")
426
+ print(f" 實際計算指標: {real_indicators}/{total_indicators} ({completeness:.1f}%)")
427
+ if completeness < 80:
428
+ print(" ⚠️ 警告:超過20%的技術指標使用預設值,可能影響預測準確性")
429
+ else:
430
+ print(" ✅ 技術指標完整性良好")
431
+
432
+ # 顯示完整輸入向量
433
+ print(f"\n🔢 完整特徵向量 (共{len(features_list)}個特徵):")
434
+ for i, (name, value) in enumerate(zip(column_names, features_list)):
435
+ print(f" [{i:2d}] {name:15s}: {value:10.4f}")
436
+
437
+ print("=" * 50)
438
 
439
  # 進行預測
440
  predictions = xgb_model.predict('xgboost_model', input_df)