Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
# HUGING_FACE_V3.
|
| 2 |
|
| 3 |
# 系統套件
|
| 4 |
import os
|
|
@@ -60,13 +60,13 @@ TAIWAN_STOCKS = {
|
|
| 60 |
'元大金': '2885.TW',
|
| 61 |
'智邦': '2345.TW',
|
| 62 |
'緯創': '3231.TW',
|
| 63 |
-
'
|
| 64 |
'第一金': '2892.TW',
|
| 65 |
'瑞昱': '2379.TW',
|
| 66 |
'緯穎': '6669.TWO',
|
| 67 |
'永豐金': '2890.TW',
|
| 68 |
'合庫金': '5880.TW',
|
| 69 |
-
'
|
| 70 |
'台光電': '2383.TW',
|
| 71 |
'世芯-KY': '3661.TWO',
|
| 72 |
'奇鋐': '3017.TW',
|
|
@@ -90,7 +90,7 @@ TAIWAN_STOCKS = {
|
|
| 90 |
'藥華藥': '6446.TWO',
|
| 91 |
'南亞': '1303.TW',
|
| 92 |
'陽明': '2609.TW',
|
| 93 |
-
'
|
| 94 |
'台塑化': '6505.TW',
|
| 95 |
'慧洋-KY': '2637.TW',
|
| 96 |
'上銀': '2049.TW',
|
|
@@ -240,12 +240,58 @@ def simple_statistical_predict(data, predict_days=5):
|
|
| 240 |
'confidence': max(0.6, 1 - volatility * 2)
|
| 241 |
}
|
| 242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
def advanced_xgboost_predict(predict_days=5):
|
| 244 |
"""
|
| 245 |
-
【進階模型】使用 XGBoost 模型進行預測 -
|
| 246 |
"""
|
| 247 |
try:
|
| 248 |
-
print(f"開始使用 XGBoost 模型進行 {predict_days}
|
| 249 |
|
| 250 |
# 初始化 XGBoost 模型
|
| 251 |
xgb_model = XGBoostModel()
|
|
@@ -256,9 +302,15 @@ def advanced_xgboost_predict(predict_days=5):
|
|
| 256 |
print("台指期數據不足,無法進行XGBoost預測")
|
| 257 |
return None
|
| 258 |
|
| 259 |
-
#
|
| 260 |
taiex_data = calculate_technical_indicators(taiex_data)
|
| 261 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
# 獲取新聞情緒分數
|
| 263 |
try:
|
| 264 |
if predictor is not None:
|
|
@@ -273,94 +325,122 @@ def advanced_xgboost_predict(predict_days=5):
|
|
| 273 |
# 準備特徵數據 (使用最新的數據點)
|
| 274 |
latest_data = taiex_data.iloc[-1]
|
| 275 |
|
| 276 |
-
#
|
| 277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
|
| 279 |
-
#
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
'D': (latest_data['D'], 50),
|
| 283 |
-
'+DI': (latest_data['+DI'], 25),
|
| 284 |
-
'-DI': (latest_data['-DI'], 25),
|
| 285 |
-
'ADX': (latest_data['ADX'], 25),
|
| 286 |
-
'RSI': (latest_data['RSI'], 50),
|
| 287 |
-
}
|
| 288 |
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
'source': 'calculated'
|
| 304 |
-
}
|
| 305 |
|
| 306 |
-
#
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
processed_values['K'], # K
|
| 311 |
-
processed_values['D'], # D
|
| 312 |
-
processed_values['+DI'], # +DI
|
| 313 |
-
processed_values['-DI'], # -DI
|
| 314 |
-
processed_values['ADX'], # ADX
|
| 315 |
-
processed_values['RSI'], # RSI
|
| 316 |
-
]
|
| 317 |
|
| 318 |
-
#
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
|
| 323 |
# 轉換為 DataFrame (XGBoost 模型期望的格式)
|
| 324 |
-
input_df = pd.DataFrame([features_list], columns=
|
| 325 |
|
| 326 |
# 詳細的資料驗證日誌
|
| 327 |
-
print("=" *
|
| 328 |
-
print("XGBoost 模型輸入特徵檢查報告 (
|
| 329 |
-
print("=" *
|
| 330 |
|
| 331 |
-
|
| 332 |
-
print(f"
|
| 333 |
-
print(f"新聞情緒 (NEWS): {sentiment_score_raw:.6f}")
|
| 334 |
-
if sentiment_score_raw == 0:
|
| 335 |
-
print(" 警告: 新聞情緒分數為0,可能無新聞數據")
|
| 336 |
-
else:
|
| 337 |
-
print(" 新聞情緒分數正常")
|
| 338 |
|
| 339 |
-
#
|
| 340 |
-
print("\n
|
| 341 |
-
for
|
| 342 |
-
status =
|
| 343 |
-
status_symbol = "
|
| 344 |
-
print(f" {
|
| 345 |
|
| 346 |
# 統計完整性
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
completeness = (
|
| 350 |
|
| 351 |
-
print(f"\n
|
| 352 |
-
print(f"
|
| 353 |
if completeness < 70:
|
| 354 |
-
print(" 警告: 超過30
|
| 355 |
else:
|
| 356 |
-
print("
|
| 357 |
|
| 358 |
# 顯示完整特徵向量
|
| 359 |
print(f"\n完整特徵向量 (共{len(features_list)}個特徵):")
|
| 360 |
-
for i, (name, value) in enumerate(zip(
|
| 361 |
-
print(f" [{i:
|
| 362 |
|
| 363 |
-
print("=" *
|
| 364 |
|
| 365 |
# 進行預測
|
| 366 |
predictions = xgb_model.predict('xgboost_model', input_df)
|
|
@@ -388,11 +468,12 @@ def advanced_xgboost_predict(predict_days=5):
|
|
| 388 |
print(f"- 預測價格: {predicted_price:.2f}")
|
| 389 |
print(f"- 預測變化: {change_pct:+.2f}%")
|
| 390 |
print(f"- 使用特徵數: {len(features_list)} 個")
|
|
|
|
| 391 |
|
| 392 |
return {
|
| 393 |
'predicted_price': predicted_price,
|
| 394 |
'change_pct': change_pct,
|
| 395 |
-
'confidence': 0.
|
| 396 |
}
|
| 397 |
|
| 398 |
except Exception as e:
|
|
|
|
| 1 |
+
# HUGING_FACE_V3.2.0.py (整合 Bert_predict 和 XGBoost 版本 - 新特徵版本)
|
| 2 |
|
| 3 |
# 系統套件
|
| 4 |
import os
|
|
|
|
| 60 |
'元大金': '2885.TW',
|
| 61 |
'智邦': '2345.TW',
|
| 62 |
'緯創': '3231.TW',
|
| 63 |
+
'華邦': '3034.TW',
|
| 64 |
'第一金': '2892.TW',
|
| 65 |
'瑞昱': '2379.TW',
|
| 66 |
'緯穎': '6669.TWO',
|
| 67 |
'永豐金': '2890.TW',
|
| 68 |
'合庫金': '5880.TW',
|
| 69 |
+
'臺南金': '2880.TW',
|
| 70 |
'台光電': '2383.TW',
|
| 71 |
'世芯-KY': '3661.TWO',
|
| 72 |
'奇鋐': '3017.TW',
|
|
|
|
| 90 |
'藥華藥': '6446.TWO',
|
| 91 |
'南亞': '1303.TW',
|
| 92 |
'陽明': '2609.TW',
|
| 93 |
+
'謝海': '2615.TW',
|
| 94 |
'台塑化': '6505.TW',
|
| 95 |
'慧洋-KY': '2637.TW',
|
| 96 |
'上銀': '2049.TW',
|
|
|
|
| 240 |
'confidence': max(0.6, 1 - volatility * 2)
|
| 241 |
}
|
| 242 |
|
| 243 |
+
def calculate_new_features(df):
|
| 244 |
+
"""
|
| 245 |
+
計算新的技術指標特徵 - 針對新特徵需求
|
| 246 |
+
"""
|
| 247 |
+
if df.empty:
|
| 248 |
+
return df
|
| 249 |
+
|
| 250 |
+
# 1. return_t-1 – 前一日報酬率
|
| 251 |
+
df['return_t-1'] = df['Close'].pct_change()
|
| 252 |
+
|
| 253 |
+
# 2. return_t-5 – 過去 5 日累積報酬率
|
| 254 |
+
df['return_t-5'] = (df['Close'] / df['Close'].shift(5) - 1)
|
| 255 |
+
|
| 256 |
+
# 3. MA5_close – 5 日移動平均價
|
| 257 |
+
df['MA5_close'] = df['Close'].rolling(window=5).mean()
|
| 258 |
+
|
| 259 |
+
# 4. MA20_close – 20 日移動平均價
|
| 260 |
+
df['MA20_close'] = df['Close'].rolling(window=20).mean()
|
| 261 |
+
|
| 262 |
+
# 5. volatility_5d – 5 日報酬標準差(短期波動)
|
| 263 |
+
df['volatility_5d'] = df['return_t-1'].rolling(window=5).std()
|
| 264 |
+
|
| 265 |
+
# 6. volume_ratio_5d – 今日成交量 ÷ 5 日均量
|
| 266 |
+
df['volume_5d_avg'] = df['Volume'].rolling(window=5).mean()
|
| 267 |
+
df['volume_ratio_5d'] = df['Volume'] / df['volume_5d_avg']
|
| 268 |
+
|
| 269 |
+
# 7. RSI_14 – 14 日 RSI 指標
|
| 270 |
+
delta = df['Close'].diff()
|
| 271 |
+
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
|
| 272 |
+
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
|
| 273 |
+
rs = gain / loss
|
| 274 |
+
df['RSI_14'] = 100 - (100 / (1 + rs))
|
| 275 |
+
|
| 276 |
+
# 8. MACD_diff – MACD - signal(趨勢強弱)
|
| 277 |
+
exp1 = df['Close'].ewm(span=12).mean()
|
| 278 |
+
exp2 = df['Close'].ewm(span=26).mean()
|
| 279 |
+
macd_line = exp1 - exp2
|
| 280 |
+
signal_line = macd_line.ewm(span=9).mean()
|
| 281 |
+
df['MACD_diff'] = macd_line - signal_line
|
| 282 |
+
|
| 283 |
+
# 移除輔助欄位
|
| 284 |
+
if 'volume_5d_avg' in df.columns:
|
| 285 |
+
df = df.drop('volume_5d_avg', axis=1)
|
| 286 |
+
|
| 287 |
+
return df
|
| 288 |
+
|
| 289 |
def advanced_xgboost_predict(predict_days=5):
|
| 290 |
"""
|
| 291 |
+
【進階模型】使用 XGBoost 模型進行預測 - 新特徵版本
|
| 292 |
"""
|
| 293 |
try:
|
| 294 |
+
print(f"開始使用 XGBoost 模型進行 {predict_days} 天預測(新特徵版本)...")
|
| 295 |
|
| 296 |
# 初始化 XGBoost 模型
|
| 297 |
xgb_model = XGBoostModel()
|
|
|
|
| 302 |
print("台指期數據不足,無法進行XGBoost預測")
|
| 303 |
return None
|
| 304 |
|
| 305 |
+
# 計算技術指標(包含舊的指標)
|
| 306 |
taiex_data = calculate_technical_indicators(taiex_data)
|
| 307 |
|
| 308 |
+
# 計算新特徵
|
| 309 |
+
taiex_data = calculate_new_features(taiex_data)
|
| 310 |
+
|
| 311 |
+
# 獲取美股指數數據來計算外部指標
|
| 312 |
+
us_market_data = get_us_market_data()
|
| 313 |
+
|
| 314 |
# 獲取新聞情緒分數
|
| 315 |
try:
|
| 316 |
if predictor is not None:
|
|
|
|
| 325 |
# 準備特徵數據 (使用最新的數據點)
|
| 326 |
latest_data = taiex_data.iloc[-1]
|
| 327 |
|
| 328 |
+
# 新特徵列表 - 按照您指定的10個特徵
|
| 329 |
+
new_feature_columns = [
|
| 330 |
+
'return_t-1', # 前一日報酬率
|
| 331 |
+
'return_t-5', # 過去 5 日累積報酬率
|
| 332 |
+
'MA5_close', # 5 日移動平均價
|
| 333 |
+
'MA20_close', # 20 日移動平均價
|
| 334 |
+
'volatility_5d', # 5 日報酬標準差
|
| 335 |
+
'volume_ratio_5d', # 今日成交量 ÷ 5 日均量
|
| 336 |
+
'RSI_14', # 14 日 RSI 指標
|
| 337 |
+
'MACD_diff', # MACD - signal
|
| 338 |
+
]
|
| 339 |
|
| 340 |
+
# 添加美股指標(如果有數據的話)
|
| 341 |
+
dji_return = 0
|
| 342 |
+
sox_return = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
|
| 344 |
+
# 嘗試獲取美股前一日報酬率
|
| 345 |
+
try:
|
| 346 |
+
dji_data = get_stock_data('^DJI', '5d')
|
| 347 |
+
if not dji_data.empty and len(dji_data) >= 2:
|
| 348 |
+
dji_return = (dji_data['Close'].iloc[-1] / dji_data['Close'].iloc[-2] - 1)
|
| 349 |
+
except:
|
| 350 |
+
pass
|
| 351 |
+
|
| 352 |
+
try:
|
| 353 |
+
sox_data = get_stock_data('^SOX', '5d')
|
| 354 |
+
if not sox_data.empty and len(sox_data) >= 2:
|
| 355 |
+
sox_return = (sox_data['Close'].iloc[-1] / sox_data['Close'].iloc[-2] - 1)
|
| 356 |
+
except:
|
| 357 |
+
pass
|
|
|
|
|
|
|
| 358 |
|
| 359 |
+
# 檢查並處理 NaN 值,建立特徵狀態記錄
|
| 360 |
+
feature_status = {}
|
| 361 |
+
features_list = []
|
| 362 |
+
feature_names = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
|
| 364 |
+
# 處理本地計算的特徵
|
| 365 |
+
for feature in new_feature_columns:
|
| 366 |
+
if feature in latest_data.index:
|
| 367 |
+
value = latest_data[feature]
|
| 368 |
+
if pd.isna(value):
|
| 369 |
+
# 使用合理的預設值
|
| 370 |
+
if 'return' in feature:
|
| 371 |
+
default_value = 0.0
|
| 372 |
+
elif 'MA' in feature:
|
| 373 |
+
default_value = latest_data['Close'] if not pd.isna(latest_data['Close']) else 100
|
| 374 |
+
elif 'volatility' in feature:
|
| 375 |
+
default_value = 0.02
|
| 376 |
+
elif 'volume_ratio' in feature:
|
| 377 |
+
default_value = 1.0
|
| 378 |
+
elif 'RSI' in feature:
|
| 379 |
+
default_value = 50.0
|
| 380 |
+
elif 'MACD' in feature:
|
| 381 |
+
default_value = 0.0
|
| 382 |
+
else:
|
| 383 |
+
default_value = 0.0
|
| 384 |
+
|
| 385 |
+
features_list.append(default_value)
|
| 386 |
+
feature_status[feature] = {'value': default_value, 'is_real': False, 'source': 'default'}
|
| 387 |
+
else:
|
| 388 |
+
features_list.append(value)
|
| 389 |
+
feature_status[feature] = {'value': value, 'is_real': True, 'source': 'calculated'}
|
| 390 |
+
|
| 391 |
+
feature_names.append(feature)
|
| 392 |
+
|
| 393 |
+
# 添加美股指標
|
| 394 |
+
features_list.extend([dji_return, sox_return])
|
| 395 |
+
feature_names.extend(['dji_return_t-1', 'sox_return_t-1'])
|
| 396 |
+
|
| 397 |
+
feature_status['dji_return_t-1'] = {
|
| 398 |
+
'value': dji_return,
|
| 399 |
+
'is_real': dji_return != 0,
|
| 400 |
+
'source': 'calculated' if dji_return != 0 else 'default'
|
| 401 |
+
}
|
| 402 |
+
feature_status['sox_return_t-1'] = {
|
| 403 |
+
'value': sox_return,
|
| 404 |
+
'is_real': sox_return != 0,
|
| 405 |
+
'source': 'calculated' if sox_return != 0 else 'default'
|
| 406 |
+
}
|
| 407 |
|
| 408 |
# 轉換為 DataFrame (XGBoost 模型期望的格式)
|
| 409 |
+
input_df = pd.DataFrame([features_list], columns=feature_names)
|
| 410 |
|
| 411 |
# 詳細的資料驗證日誌
|
| 412 |
+
print("=" * 60)
|
| 413 |
+
print("XGBoost 模型輸入特徵檢查報告 (新特徵版本)")
|
| 414 |
+
print("=" * 60)
|
| 415 |
|
| 416 |
+
print(f"總特徵數量: {len(features_list)} 個")
|
| 417 |
+
print(f"新聞情緒分數: {sentiment_score_raw:.6f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
|
| 419 |
+
# 特徵詳細狀態
|
| 420 |
+
print("\n特徵狀態詳情:")
|
| 421 |
+
for i, (name, value) in enumerate(zip(feature_names, features_list)):
|
| 422 |
+
status = feature_status.get(name, {})
|
| 423 |
+
status_symbol = "✓正常" if status.get('is_real', False) else "⚠預設值"
|
| 424 |
+
print(f" [{i:2d}] {name:18s}: {value:12.6f} ({status_symbol})")
|
| 425 |
|
| 426 |
# 統計完整性
|
| 427 |
+
real_features = sum(1 for status in feature_status.values() if status.get('is_real', False))
|
| 428 |
+
total_features = len(feature_status)
|
| 429 |
+
completeness = (real_features / total_features) * 100 if total_features > 0 else 0
|
| 430 |
|
| 431 |
+
print(f"\n特徵完整性:")
|
| 432 |
+
print(f" 實際計算特徵: {real_features}/{total_features} ({completeness:.1f}%)")
|
| 433 |
if completeness < 70:
|
| 434 |
+
print(" 警告: 超過30%的特徵使用預設值,可能影響預測準確性")
|
| 435 |
else:
|
| 436 |
+
print(" 特徵完整性良好")
|
| 437 |
|
| 438 |
# 顯示完整特徵向量
|
| 439 |
print(f"\n完整特徵向量 (共{len(features_list)}個特徵):")
|
| 440 |
+
for i, (name, value) in enumerate(zip(feature_names, features_list)):
|
| 441 |
+
print(f" [{i:2d}] {name:18s}: {value:12.6f}")
|
| 442 |
|
| 443 |
+
print("=" * 60)
|
| 444 |
|
| 445 |
# 進行預測
|
| 446 |
predictions = xgb_model.predict('xgboost_model', input_df)
|
|
|
|
| 468 |
print(f"- 預測價格: {predicted_price:.2f}")
|
| 469 |
print(f"- 預測變化: {change_pct:+.2f}%")
|
| 470 |
print(f"- 使用特徵數: {len(features_list)} 個")
|
| 471 |
+
print(f"- 特徵完整性: {completeness:.1f}%")
|
| 472 |
|
| 473 |
return {
|
| 474 |
'predicted_price': predicted_price,
|
| 475 |
'change_pct': change_pct,
|
| 476 |
+
'confidence': max(0.6, min(0.85, completeness / 100)) # 根據特徵完整性調整信心度
|
| 477 |
}
|
| 478 |
|
| 479 |
except Exception as e:
|