Spaces:

AlanRex
/

AITEST

Sleeping

App Files Files Community

AlanRex commited on Sep 19, 2025

Commit

66a7471

verified ·

1 Parent(s): 06e2021

Update model_predictor.py

Browse files

Files changed (1) hide show

model_predictor.py +182 -34

model_predictor.py CHANGED Viewed

@@ -1,45 +1,193 @@
-import xgboost as xgb
 import pandas as pd
 class XGBoostModel:
-    # 使用類別變數儲存所有可用的模型名稱及其對應的檔案名稱
-    MODELS = {
-        'xgboost_model': 'xgboost_model.json'
-    }
-    def __init__(self, default_model='xgboost_model'):
-        # 建立物件時，自動載入預設模型
-        self.current_model_name = default_model
-        self.model = self._load_model(self.current_model_name)
-    def _load_model(self, model_name):
-        if model_name not in self.MODELS:
-            raise ValueError(f"找不到模型 '{model_name}'。可用的模型名稱：{list(self.MODELS.keys())}")
-        filename = self.MODELS[model_name]
         try:
-            # 建立一個新的 XGBoost 模型實例
-            model = xgb.XGBRegressor()
-            # 使用 XGBoost 內建的 load_model 方法載入檔案
-            model.load_model(filename)
-            return model
         except Exception as e:
-            raise FileNotFoundError(f"無法在本地找到或載入模型檔案 '{filename}'：{e}")
     def predict(self, model_name, input_df):
-        # 如果請求的模型名稱與目前載入的不同，則動態載入
-        if model_name != self.current_model_name:
-            self.model = self._load_model(model_name)
-            self.current_model_name = model_name
-        # 進行預測
-        predictions = self.model.predict(input_df)
-        # 將預測結果轉換為字典
-        result = {
-            'Close_t0_pred': predictions[0][0],
-            'Close_t5_pred': predictions[0][1],
-            'Close_t10_pred': predictions[0][2],
-            'Close_t20_pred': predictions[0][3]
-        }
-        return result

+# model_predictor.py - 支援漲幅百分比輸出的XGBoost模型預測器
+# 修改版本：輸出改為漲幅百分比而非絕對價格
+import os
 import pandas as pd
+import numpy as np
+import xgboost as xgb
+from sklearn.preprocessing import StandardScaler
+import pickle
+import joblib
 class XGBoostModel:
+    def __init__(self):
+        """
+        初始化 XGBoost 模型預測器
+        【重要更新】
+        - 模型現在輸出漲幅百分比而非絕對價格
+        - 支援 1日、5日、10日、20日的漲幅預測
+        """
+        self.model = None
+        self.scaler = None
+        self.feature_columns = [
+            'close',             # 前一日收盤價
+            'return_t-1',        # 前一日報酬率
+            'return_t-5',        # 過去 5 日累積報酬率
+            'MA5_close',         # 5 日移動平均價
+            'volatility_5d',     # 5 日報酬標準差
+            'volume_ratio_5d',   # 今日成交量 ÷ 5 日均量
+            'MACD_diff',         # MACD - signal
+            'dji_return_t-1',    # 前一日道瓊指數報酬率
+            'sox_return_t-1',    # 前一日費半指數報酬率
+            'NEWS'               # 新聞情緒分數
+        ]
+        # 【新增】輸出目標對應表
+        self.output_targets = {
+            1: 'Change_pct_t1_pred',    # 1天後漲幅%
+            5: 'Change_pct_t5_pred',    # 5天後漲幅%
+            10: 'Change_pct_t10_pred',  # 10天後漲幅%
+            20: 'Change_pct_t20_pred'   # 20天後漲幅%
+        }
+        print("XGBoost 模型預測器初始化完成")
+        print("輸出格式：漲幅百分比 (1日, 5日, 10日, 20日)")
+    def load_model(self, model_path):
+        """
+        載入預訓練的 XGBoost 模型
+        Args:
+            model_path (str): 模型檔案路徑 (.json 格式)
+        Returns:
+            bool: 是否成功載入
+        """
+        try:
+            # 檢查模型檔案是否存在
+            if not os.path.exists(model_path):
+                print(f"錯誤：找不到模型檔案 {model_path}")
+                return False
+            # 載入 XGBoost 模型
+            self.model = xgb.XGBRegressor()
+            self.model.load_model(model_path)
+            print(f"成功載入模型：{model_path}")
+            print(f"預期特徵數量：{len(self.feature_columns)}")
+            return True
+        except Exception as e:
+            print(f"載入模型時發生錯誤：{e}")
+            return False
+    def load_scaler(self, scaler_path):
+        """
+        載入特徵標準化器
+        Args:
+            scaler_path (str): 標準化器檔案路徑 (.pkl 格式)
+        Returns:
+            bool: 是否成功載入
+        """
+        try:
+            if os.path.exists(scaler_path):
+                self.scaler = joblib.load(scaler_path)
+                print(f"成功載入標準化器：{scaler_path}")
+                return True
+            else:
+                print(f"警告：找不到標準化器檔案 {scaler_path}")
+                print("將使用預設標準化器")
+                self.scaler = StandardScaler()
+                return False
+        except Exception as e:
+            print(f"載入標準化器時發生錯誤：{e}")
+            self.scaler = StandardScaler()
+            return False
+    def preprocess_features(self, input_df):
+        """
+        預處理輸入特徵
+        Args:
+            input_df (pd.DataFrame): 輸入特徵 DataFrame
+        Returns:
+            pd.DataFrame: 預處理後的特徵
+        """
         try:
+            # 確保輸入包含所有必要特徵
+            missing_features = [f for f in self.feature_columns if f not in input_df.columns]
+            if missing_features:
+                print(f"警告：缺少以下特徵：{missing_features}")
+                # 用 0 填補缺少的特徵
+                for feature in missing_features:
+                    input_df[feature] = 0
+            # 按照預期順序重新排列特徵
+            input_df = input_df[self.feature_columns]
+            # 處理 NaN 值
+            input_df = input_df.fillna(0)
+            # 如果有標準化器，進行標準化
+            if self.scaler is not None:
+                try:
+                    # 嘗試使用已訓練的標準化器
+                    scaled_features = self.scaler.transform(input_df)
+                    input_df = pd.DataFrame(scaled_features,
+                                          columns=input_df.columns,
+                                          index=input_df.index)
+                except Exception as scaler_error:
+                    print(f"標準化過程發生錯誤：{scaler_error}")
+                    print("跳過標準化步驟")
+            return input_df
         except Exception as e:
+            print(f"特徵預處理時發生錯誤：{e}")
+            return input_df
     def predict(self, model_name, input_df):
+        """
+        進行股價漲幅預測
+        Args:
+            model_name (str): 模型名稱（用於載入對應模型）
+            input_df (pd.DataFrame): 輸入特徵
+        Returns:
+            dict: 預測結果，包含各時間點的漲幅百分比
+        """
+        try:
+            # 載入模型（如果尚未載入）
+            if self.model is None:
+                model_path = f"{model_name}.json"
+                if not self.load_model(model_path):
+                    return None
+            # 載入標準化器（如果存在）
+            if self.scaler is None:
+                scaler_path = f"{model_name}_scaler.pkl"
+                self.load_scaler(scaler_path)
+            # 預處理特徵
+            processed_df = self.preprocess_features(input_df.copy())
+            # 進行預測
+            predictions = self.model.predict(processed_df)
+            # 【重要修改】將預測結果格式化為漲幅百分比
+            if predictions.ndim == 1:
+                # 如果只有一個輸出，假設是 1 日預測
+                result = {
+                    'Change_pct_t1_pred': float(predictions[0])
+                }
+            else:
+                # 多輸出情況：1日, 5日, 10日, 20日
+                result = {
+                    'Change_pct_t1_pred': float(predictions[0][0]) if len(predictions[0]) > 0 else 0.0,
+                    'Change_pct_t5_pred': float(predictions[0][1]) if len(predictions[0]) > 1 else 0.0,
+                    'Change_pct_t10_pred': float(predictions[0][2]) if len(predictions[0]) > 2 else 0.0,
+                    'Change_pct_t20_pred': float(predictions[0][3]) if len(predictions[0]) > 3 else 0.0
+                }
+            # 輸出預測結果摘要
+            print("=== 漲幅預測結果 ===")
+            for key, value in result.items():
+                days = key.split('_')[2][1:]  # 提取天數
+                direction = "↗️ 上漲" if value > 0