Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -257,7 +257,7 @@ def calculate_new_features(df):
|
|
| 257 |
df['MA5_close'] = df['Close'].rolling(window=5).mean()
|
| 258 |
|
| 259 |
# 4. MA20_close – 20 日移動平均價
|
| 260 |
-
|
| 261 |
|
| 262 |
# 5. volatility_5d – 5 日報酬標準差(短期波動)
|
| 263 |
df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
|
|
@@ -325,19 +325,17 @@ def advanced_xgboost_predict(predict_days=5):
|
|
| 325 |
# 準備特徵數據 (使用最新的數據點)
|
| 326 |
latest_data = taiex_data.iloc[-1]
|
| 327 |
|
| 328 |
-
#
|
| 329 |
yesterday_close = latest_data['Close']
|
| 330 |
|
| 331 |
-
#
|
| 332 |
new_feature_columns = [
|
| 333 |
-
'return_t-1',
|
| 334 |
-
'return_t-5',
|
| 335 |
-
'MA5_close',
|
| 336 |
-
|
| 337 |
-
'
|
| 338 |
-
'
|
| 339 |
-
'RSI_14', # 14 日 RSI 指標
|
| 340 |
-
'MACD_diff', # MACD - signal
|
| 341 |
]
|
| 342 |
|
| 343 |
# 添加美股指標(如果有數據的話)
|
|
@@ -364,26 +362,18 @@ def advanced_xgboost_predict(predict_days=5):
|
|
| 364 |
features_list = []
|
| 365 |
feature_names = []
|
| 366 |
|
| 367 |
-
#
|
| 368 |
for feature in new_feature_columns:
|
| 369 |
if feature in latest_data.index:
|
| 370 |
value = latest_data[feature]
|
| 371 |
if pd.isna(value):
|
| 372 |
# 使用合理的預設值
|
| 373 |
-
if 'return' in feature:
|
| 374 |
-
|
| 375 |
-
elif '
|
| 376 |
-
|
| 377 |
-
elif '
|
| 378 |
-
|
| 379 |
-
elif 'volume_ratio' in feature:
|
| 380 |
-
default_value = 1.0
|
| 381 |
-
elif 'RSI' in feature:
|
| 382 |
-
default_value = 50.0
|
| 383 |
-
elif 'MACD' in feature:
|
| 384 |
-
default_value = 0.0
|
| 385 |
-
else:
|
| 386 |
-
default_value = 0.0
|
| 387 |
|
| 388 |
features_list.append(default_value)
|
| 389 |
feature_status[feature] = {'value': default_value, 'is_real': False, 'source': 'default'}
|
|
@@ -392,36 +382,39 @@ def advanced_xgboost_predict(predict_days=5):
|
|
| 392 |
feature_status[feature] = {'value': value, 'is_real': True, 'source': 'calculated'}
|
| 393 |
|
| 394 |
feature_names.append(feature)
|
| 395 |
-
|
| 396 |
-
# 【【【新增功能】】】 添加 'close' 和 'NEWS' 特徵
|
| 397 |
-
# 1. 添加昨日收盤價 ('close')
|
| 398 |
-
if not pd.isna(yesterday_close):
|
| 399 |
-
features_list.append(yesterday_close)
|
| 400 |
-
feature_status['close'] = {'value': yesterday_close, 'is_real': True, 'source': 'calculated'}
|
| 401 |
-
else:
|
| 402 |
-
features_list.append(10000) # Fallback value for price
|
| 403 |
-
feature_status['close'] = {'value': 10000, 'is_real': False, 'source': 'default'}
|
| 404 |
-
feature_names.append('close')
|
| 405 |
-
|
| 406 |
-
# 2. 添加新聞情緒分數 ('NEWS')
|
| 407 |
-
features_list.append(sentiment_score_raw)
|
| 408 |
-
feature_status['NEWS'] = {'value': sentiment_score_raw, 'is_real': True, 'source': 'calculated'}
|
| 409 |
-
feature_names.append('NEWS')
|
| 410 |
|
| 411 |
-
#
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
feature_status['dji_return_t-1'] = {
|
| 416 |
'value': dji_return,
|
| 417 |
'is_real': dji_return != 0,
|
| 418 |
'source': 'calculated' if dji_return != 0 else 'default'
|
| 419 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
feature_status['sox_return_t-1'] = {
|
| 421 |
'value': sox_return,
|
| 422 |
'is_real': sox_return != 0,
|
| 423 |
'source': 'calculated' if sox_return != 0 else 'default'
|
| 424 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
|
| 426 |
# 轉換為 DataFrame (XGBoost 模型期望的格式)
|
| 427 |
input_df = pd.DataFrame([features_list], columns=feature_names)
|
|
@@ -439,7 +432,7 @@ def advanced_xgboost_predict(predict_days=5):
|
|
| 439 |
for i, (name, value) in enumerate(zip(feature_names, features_list)):
|
| 440 |
status = feature_status.get(name, {})
|
| 441 |
status_symbol = "✓正常" if status.get('is_real', False) else "⚠預設值"
|
| 442 |
-
print(f" [{i:2d}] {name:18s}: {value:12.6f} ({status_symbol})")
|
| 443 |
|
| 444 |
# 統計完整性
|
| 445 |
real_features = sum(1 for status in feature_status.values() if status.get('is_real', False))
|
|
@@ -456,7 +449,7 @@ def advanced_xgboost_predict(predict_days=5):
|
|
| 456 |
# 顯示完整特徵向量
|
| 457 |
print(f"\n完整特徵向量 (共{len(features_list)}個特徵):")
|
| 458 |
for i, (name, value) in enumerate(zip(feature_names, features_list)):
|
| 459 |
-
print(f" [{i:2d}] {name:18s}: {value:12.6f}")
|
| 460 |
|
| 461 |
print("=" * 60)
|
| 462 |
|
|
|
|
| 257 |
df['MA5_close'] = df['Close'].rolling(window=5).mean()
|
| 258 |
|
| 259 |
# 4. MA20_close – 20 日移動平均價
|
| 260 |
+
df['MA20_close'] = df['Close'].rolling(window=20).mean()
|
| 261 |
|
| 262 |
# 5. volatility_5d – 5 日報酬標準差(短期波動)
|
| 263 |
df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
|
|
|
|
| 325 |
# 準備特徵數據 (使用最新的數據點)
|
| 326 |
latest_data = taiex_data.iloc[-1]
|
| 327 |
|
| 328 |
+
# 取得昨日收盤價
|
| 329 |
yesterday_close = latest_data['Close']
|
| 330 |
|
| 331 |
+
# 【【【修正】】】特徵列表,確保與模型訓練時完全一致
|
| 332 |
new_feature_columns = [
|
| 333 |
+
'return_t-1',
|
| 334 |
+
'return_t-5',
|
| 335 |
+
'MA5_close',
|
| 336 |
+
'volatility_5d',
|
| 337 |
+
'volume_ratio_5d',
|
| 338 |
+
'MACD_diff',
|
|
|
|
|
|
|
| 339 |
]
|
| 340 |
|
| 341 |
# 添加美股指標(如果有數據的話)
|
|
|
|
| 362 |
features_list = []
|
| 363 |
feature_names = []
|
| 364 |
|
| 365 |
+
# 處理本地計算的技術指標特徵
|
| 366 |
for feature in new_feature_columns:
|
| 367 |
if feature in latest_data.index:
|
| 368 |
value = latest_data[feature]
|
| 369 |
if pd.isna(value):
|
| 370 |
# 使用合理的預設值
|
| 371 |
+
if 'return' in feature: default_value = 0.0
|
| 372 |
+
elif 'MA' in feature: default_value = latest_data['Close'] if not pd.isna(latest_data['Close']) else 100
|
| 373 |
+
elif 'volatility' in feature: default_value = 0.02
|
| 374 |
+
elif 'volume_ratio' in feature: default_value = 1.0
|
| 375 |
+
elif 'MACD' in feature: default_value = 0.0
|
| 376 |
+
else: default_value = 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 377 |
|
| 378 |
features_list.append(default_value)
|
| 379 |
feature_status[feature] = {'value': default_value, 'is_real': False, 'source': 'default'}
|
|
|
|
| 382 |
feature_status[feature] = {'value': value, 'is_real': True, 'source': 'calculated'}
|
| 383 |
|
| 384 |
feature_names.append(feature)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
|
| 386 |
+
# 【【【修正】】】 按照模型訓練的順序添加剩餘特徵
|
| 387 |
+
# 7. dji_return_t-1
|
| 388 |
+
features_list.append(dji_return)
|
| 389 |
+
feature_names.append('dji_return_t-1')
|
| 390 |
feature_status['dji_return_t-1'] = {
|
| 391 |
'value': dji_return,
|
| 392 |
'is_real': dji_return != 0,
|
| 393 |
'source': 'calculated' if dji_return != 0 else 'default'
|
| 394 |
}
|
| 395 |
+
|
| 396 |
+
# 8. sox_return_t-1
|
| 397 |
+
features_list.append(sox_return)
|
| 398 |
+
feature_names.append('sox_return_t-1')
|
| 399 |
feature_status['sox_return_t-1'] = {
|
| 400 |
'value': sox_return,
|
| 401 |
'is_real': sox_return != 0,
|
| 402 |
'source': 'calculated' if sox_return != 0 else 'default'
|
| 403 |
}
|
| 404 |
+
|
| 405 |
+
# 9. close
|
| 406 |
+
if not pd.isna(yesterday_close):
|
| 407 |
+
features_list.append(yesterday_close)
|
| 408 |
+
feature_status['close'] = {'value': yesterday_close, 'is_real': True, 'source': 'calculated'}
|
| 409 |
+
else:
|
| 410 |
+
features_list.append(10000) # Fallback value for price
|
| 411 |
+
feature_status['close'] = {'value': 10000, 'is_real': False, 'source': 'default'}
|
| 412 |
+
feature_names.append('close')
|
| 413 |
+
|
| 414 |
+
# 10. NEWS
|
| 415 |
+
features_list.append(sentiment_score_raw)
|
| 416 |
+
feature_status['NEWS'] = {'value': sentiment_score_raw, 'is_real': True, 'source': 'calculated'}
|
| 417 |
+
feature_names.append('NEWS')
|
| 418 |
|
| 419 |
# 轉換為 DataFrame (XGBoost 模型期望的格式)
|
| 420 |
input_df = pd.DataFrame([features_list], columns=feature_names)
|
|
|
|
| 432 |
for i, (name, value) in enumerate(zip(feature_names, features_list)):
|
| 433 |
status = feature_status.get(name, {})
|
| 434 |
status_symbol = "✓正常" if status.get('is_real', False) else "⚠預設值"
|
| 435 |
+
print(f" [{i+1:2d}] {name:18s}: {value:12.6f} ({status_symbol})")
|
| 436 |
|
| 437 |
# 統計完整性
|
| 438 |
real_features = sum(1 for status in feature_status.values() if status.get('is_real', False))
|
|
|
|
| 449 |
# 顯示完整特徵向量
|
| 450 |
print(f"\n完整特徵向量 (共{len(features_list)}個特徵):")
|
| 451 |
for i, (name, value) in enumerate(zip(feature_names, features_list)):
|
| 452 |
+
print(f" [{i+1:2d}] {name:18s}: {value:12.6f}")
|
| 453 |
|
| 454 |
print("=" * 60)
|
| 455 |
|