Spaces:

AlanRex
/

AITEST

Sleeping

App Files Files Community

AlanRex commited on Sep 25, 2025

Commit

804d0b4

verified ·

1 Parent(s): 9cb94a7

Update model_predictor.py

Browse files

Files changed (1) hide show

model_predictor.py +31 -47

model_predictor.py CHANGED Viewed

@@ -160,7 +160,10 @@ class XGBoostModel:
     def predict(self, model_name, input_df):
         """
-        進行股價漲幅預測（已加入自動對齊模型 feature_names 的邏輯）
         """
         try:
             # 載入模型（如果尚未載入）
@@ -168,72 +171,53 @@ class XGBoostModel:
                 model_path = f"{model_name}.json"
                 if not self.load_model(model_path):
                     return None
-            # 先做基本的特徵預處理（會補缺失欄位為 0，但不重新排序）
-            processed_df = input_df.copy()
-            processed_df = self.preprocess_features(processed_df.copy())
-            # 嘗試從已載入的 xgboost 模型中取得訓練時的 feature names
-            expected_features = None
-            try:
-                booster = self.model.get_booster()
-                expected_features = getattr(booster, "feature_names", None)
-            except Exception:
-                expected_features = None
-            if expected_features:
-                # 檢查缺失或多餘欄位
-                missing = [f for f in expected_features if f not in processed_df.columns]
-                extra = [f for f in processed_df.columns if f not in expected_features]
-                if missing:
-                    print(f"警告：模型期待以下特徵但輸入缺失，將以 0 補齊: {missing}")
-                    for f in missing:
-                        processed_df[f] = 0.0
-                if extra:
-                    print(f"注意：輸入含有模型未使用的額外特徵，將忽略: {extra}")
-                # 依模型期待欄位順序重排（且只保留 expected_features）
-                processed_df = processed_df[expected_features]
-            else:
-                # 如果模型沒有記錄 feature_names，退回到 class 裡預設的 feature_columns（如有）
-                processed_df = processed_df[self.feature_columns]
-            # DEBUG 訊息
-            print("=== 模型輸入特徵檢查（對齊後） ===")
             print(f"輸入形狀: {processed_df.shape}")
             print("前5個特徵值:")
             for i, col in enumerate(processed_df.columns[:5]):
                 print(f"  {col}: {processed_df[col].iloc[0]:.6f}")
             # 進行預測
             predictions = self.model.predict(processed_df)
-            print(f"原始預測輸出形狀: {getattr(predictions, 'shape', str(type(predictions)))}")
             print(f"原始預測值: {predictions}")
-            # 處理多輸出或單輸出
-            if getattr(predictions, 'ndim', 1) == 1:
-                # 單輸出
-                result = {'Change_pct_t1_pred': float(predictions[0])}
             else:
                 result = {}
-                target_keys = ['Change_pct_t1_pred', 'Change_pct_t5_pred',
-                               'Change_pct_t10_pred', 'Change_pct_t20_pred']
                 for i, key in enumerate(target_keys):
                     if i < predictions.shape[1]:
                         result[key] = float(predictions[0][i])
                     else:
                         result[key] = 0.0
-            # 列印摘要
             print("=== 漲幅預測結果 ===")
             for key, value in result.items():
                 days = key.split('_')[2][1:]
                 direction = "上漲" if value > 0 else "下跌"
                 print(f"  {days}日後預測: {value:+.2f}% ({direction})")
             return result
     except Exception as e:
         print(f"預測過程中發生錯誤：{e}")

     def predict(self, model_name, input_df):
         """
+        進行股價漲幅預測
+        Returns:
+            dict: 預測結果，包含各時間點的漲幅百分比
         """
         try:
             # 載入模型（如果尚未載入）
                 model_path = f"{model_name}.json"
                 if not self.load_model(model_path):
                     return None
+            # 預處理特徵
+            processed_df = self.preprocess_features(input_df.copy())
+            print("=== 模型輸入特徵檢查 ===")
             print(f"輸入形狀: {processed_df.shape}")
             print("前5個特徵值:")
             for i, col in enumerate(processed_df.columns[:5]):
                 print(f"  {col}: {processed_df[col].iloc[0]:.6f}")
             # 進行預測
             predictions = self.model.predict(processed_df)
+            print(f"原始預測輸出形狀: {predictions.shape}")
             print(f"原始預測值: {predictions}")
+            # 【修正】處理多輸出預測結果
+            if predictions.ndim == 1:
+                # 單輸出情況 - 只有一個時間點的預測
+                result = {
+                    'Change_pct_t1_pred': float(predictions[0])
+                }
             else:
+                # 多輸出情況：[t1, t5, t10, t20] - 對應訓練模型的四個輸出
                 result = {}
+                target_keys = ['Change_pct_t1_pred', 'Change_pct_t5_pred',
+                              'Change_pct_t10_pred', 'Change_pct_t20_pred']
                 for i, key in enumerate(target_keys):
                     if i < predictions.shape[1]:
                         result[key] = float(predictions[0][i])
                     else:
                         result[key] = 0.0
+            # 輸出預測結果摘要
             print("=== 漲幅預測結果 ===")
             for key, value in result.items():
                 days = key.split('_')[2][1:]
                 direction = "上漲" if value > 0 else "下跌"
                 print(f"  {days}日後預測: {value:+.2f}% ({direction})")
             return result
+        except Exception as e:
+            print(f"預測過程中發生錯誤：{e}")
+            import traceback
+            traceback.print_exc()
+            return None
     except Exception as e:
         print(f"預測過程中發生錯誤：{e}")