AlanRex commited on
Commit
cf9467d
·
verified ·
1 Parent(s): 3e5c6f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -47
app.py CHANGED
@@ -257,7 +257,7 @@ def calculate_new_features(df):
257
  df['MA5_close'] = df['Close'].rolling(window=5).mean()
258
 
259
  # 4. MA20_close – 20 日移動平均價
260
- #df['MA20_close'] = df['Close'].rolling(window=20).mean()
261
 
262
  # 5. volatility_5d – 5 日報酬標準差(短期波動)
263
  df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
@@ -325,19 +325,17 @@ def advanced_xgboost_predict(predict_days=5):
325
  # 準備特徵數據 (使用最新的數據點)
326
  latest_data = taiex_data.iloc[-1]
327
 
328
- # 【【【新增功能】】】 取得昨日收盤價
329
  yesterday_close = latest_data['Close']
330
 
331
- # 新特徵列表 - 按照您指定的8個技術指標特徵
332
  new_feature_columns = [
333
- 'return_t-1', # 前一日報酬率
334
- 'return_t-5', # 過去 5 日累積報酬率
335
- 'MA5_close', # 5 日移動平均價
336
- # 20 日移動平均價
337
- 'volatility_5d', # 5 日報酬標準差
338
- 'volume_ratio_5d', # 今日成交量 ÷ 5 日均量
339
- 'RSI_14', # 14 日 RSI 指標
340
- 'MACD_diff', # MACD - signal
341
  ]
342
 
343
  # 添加美股指標(如果有數據的話)
@@ -364,26 +362,18 @@ def advanced_xgboost_predict(predict_days=5):
364
  features_list = []
365
  feature_names = []
366
 
367
- # 處理本地計算的特徵
368
  for feature in new_feature_columns:
369
  if feature in latest_data.index:
370
  value = latest_data[feature]
371
  if pd.isna(value):
372
  # 使用合理的預設值
373
- if 'return' in feature:
374
- default_value = 0.0
375
- elif 'MA' in feature:
376
- default_value = latest_data['Close'] if not pd.isna(latest_data['Close']) else 100
377
- elif 'volatility' in feature:
378
- default_value = 0.02
379
- elif 'volume_ratio' in feature:
380
- default_value = 1.0
381
- elif 'RSI' in feature:
382
- default_value = 50.0
383
- elif 'MACD' in feature:
384
- default_value = 0.0
385
- else:
386
- default_value = 0.0
387
 
388
  features_list.append(default_value)
389
  feature_status[feature] = {'value': default_value, 'is_real': False, 'source': 'default'}
@@ -392,36 +382,39 @@ def advanced_xgboost_predict(predict_days=5):
392
  feature_status[feature] = {'value': value, 'is_real': True, 'source': 'calculated'}
393
 
394
  feature_names.append(feature)
395
-
396
- # 【【【新增功能】】】 添加 'close' 和 'NEWS' 特徵
397
- # 1. 添加昨日收盤價 ('close')
398
- if not pd.isna(yesterday_close):
399
- features_list.append(yesterday_close)
400
- feature_status['close'] = {'value': yesterday_close, 'is_real': True, 'source': 'calculated'}
401
- else:
402
- features_list.append(10000) # Fallback value for price
403
- feature_status['close'] = {'value': 10000, 'is_real': False, 'source': 'default'}
404
- feature_names.append('close')
405
-
406
- # 2. 添加新聞情緒分數 ('NEWS')
407
- features_list.append(sentiment_score_raw)
408
- feature_status['NEWS'] = {'value': sentiment_score_raw, 'is_real': True, 'source': 'calculated'}
409
- feature_names.append('NEWS')
410
 
411
- # 添加美股指標
412
- features_list.extend([dji_return, sox_return])
413
- feature_names.extend(['dji_return_t-1', 'sox_return_t-1'])
414
-
415
  feature_status['dji_return_t-1'] = {
416
  'value': dji_return,
417
  'is_real': dji_return != 0,
418
  'source': 'calculated' if dji_return != 0 else 'default'
419
  }
 
 
 
 
420
  feature_status['sox_return_t-1'] = {
421
  'value': sox_return,
422
  'is_real': sox_return != 0,
423
  'source': 'calculated' if sox_return != 0 else 'default'
424
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
 
426
  # 轉換為 DataFrame (XGBoost 模型期望的格式)
427
  input_df = pd.DataFrame([features_list], columns=feature_names)
@@ -439,7 +432,7 @@ def advanced_xgboost_predict(predict_days=5):
439
  for i, (name, value) in enumerate(zip(feature_names, features_list)):
440
  status = feature_status.get(name, {})
441
  status_symbol = "✓正常" if status.get('is_real', False) else "⚠預設值"
442
- print(f" [{i:2d}] {name:18s}: {value:12.6f} ({status_symbol})")
443
 
444
  # 統計完整性
445
  real_features = sum(1 for status in feature_status.values() if status.get('is_real', False))
@@ -456,7 +449,7 @@ def advanced_xgboost_predict(predict_days=5):
456
  # 顯示完整特徵向量
457
  print(f"\n完整特徵向量 (共{len(features_list)}個特徵):")
458
  for i, (name, value) in enumerate(zip(feature_names, features_list)):
459
- print(f" [{i:2d}] {name:18s}: {value:12.6f}")
460
 
461
  print("=" * 60)
462
 
 
257
  df['MA5_close'] = df['Close'].rolling(window=5).mean()
258
 
259
  # 4. MA20_close – 20 日移動平均價
260
+ df['MA20_close'] = df['Close'].rolling(window=20).mean()
261
 
262
  # 5. volatility_5d – 5 日報酬標準差(短期波動)
263
  df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
 
325
  # 準備特徵數據 (使用最新的數據點)
326
  latest_data = taiex_data.iloc[-1]
327
 
328
+ # 取得昨日收盤價
329
  yesterday_close = latest_data['Close']
330
 
331
+ # 【【【修正】】】特徵列表,確保與模型訓練時完全一致
332
  new_feature_columns = [
333
+ 'return_t-1',
334
+ 'return_t-5',
335
+ 'MA5_close',
336
+ 'volatility_5d',
337
+ 'volume_ratio_5d',
338
+ 'MACD_diff',
 
 
339
  ]
340
 
341
  # 添加美股指標(如果有數據的話)
 
362
  features_list = []
363
  feature_names = []
364
 
365
+ # 處理本地計算的技術指標特徵
366
  for feature in new_feature_columns:
367
  if feature in latest_data.index:
368
  value = latest_data[feature]
369
  if pd.isna(value):
370
  # 使用合理的預設值
371
+ if 'return' in feature: default_value = 0.0
372
+ elif 'MA' in feature: default_value = latest_data['Close'] if not pd.isna(latest_data['Close']) else 100
373
+ elif 'volatility' in feature: default_value = 0.02
374
+ elif 'volume_ratio' in feature: default_value = 1.0
375
+ elif 'MACD' in feature: default_value = 0.0
376
+ else: default_value = 0.0
 
 
 
 
 
 
 
 
377
 
378
  features_list.append(default_value)
379
  feature_status[feature] = {'value': default_value, 'is_real': False, 'source': 'default'}
 
382
  feature_status[feature] = {'value': value, 'is_real': True, 'source': 'calculated'}
383
 
384
  feature_names.append(feature)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
 
386
+ # 【【【修正】】】 按照模型訓練的順序添加剩餘特徵
387
+ # 7. dji_return_t-1
388
+ features_list.append(dji_return)
389
+ feature_names.append('dji_return_t-1')
390
  feature_status['dji_return_t-1'] = {
391
  'value': dji_return,
392
  'is_real': dji_return != 0,
393
  'source': 'calculated' if dji_return != 0 else 'default'
394
  }
395
+
396
+ # 8. sox_return_t-1
397
+ features_list.append(sox_return)
398
+ feature_names.append('sox_return_t-1')
399
  feature_status['sox_return_t-1'] = {
400
  'value': sox_return,
401
  'is_real': sox_return != 0,
402
  'source': 'calculated' if sox_return != 0 else 'default'
403
  }
404
+
405
+ # 9. close
406
+ if not pd.isna(yesterday_close):
407
+ features_list.append(yesterday_close)
408
+ feature_status['close'] = {'value': yesterday_close, 'is_real': True, 'source': 'calculated'}
409
+ else:
410
+ features_list.append(10000) # Fallback value for price
411
+ feature_status['close'] = {'value': 10000, 'is_real': False, 'source': 'default'}
412
+ feature_names.append('close')
413
+
414
+ # 10. NEWS
415
+ features_list.append(sentiment_score_raw)
416
+ feature_status['NEWS'] = {'value': sentiment_score_raw, 'is_real': True, 'source': 'calculated'}
417
+ feature_names.append('NEWS')
418
 
419
  # 轉換為 DataFrame (XGBoost 模型期望的格式)
420
  input_df = pd.DataFrame([features_list], columns=feature_names)
 
432
  for i, (name, value) in enumerate(zip(feature_names, features_list)):
433
  status = feature_status.get(name, {})
434
  status_symbol = "✓正常" if status.get('is_real', False) else "⚠預設值"
435
+ print(f" [{i+1:2d}] {name:18s}: {value:12.6f} ({status_symbol})")
436
 
437
  # 統計完整性
438
  real_features = sum(1 for status in feature_status.values() if status.get('is_real', False))
 
449
  # 顯示完整特徵向量
450
  print(f"\n完整特徵向量 (共{len(features_list)}個特徵):")
451
  for i, (name, value) in enumerate(zip(feature_names, features_list)):
452
+ print(f" [{i+1:2d}] {name:18s}: {value:12.6f}")
453
 
454
  print("=" * 60)
455