Spaces:

AlanRex
/

AITEST

Running

App Files Files Community

AlanRex commited on Sep 17, 2025

Commit

5777f47

verified ·

1 Parent(s): 3311f4d

Update model_predictor.py

Browse files

Files changed (1) hide show

model_predictor.py +833 -0

model_predictor.py CHANGED Viewed

	@@ -0,0 +1,833 @@

+# -*- coding: utf-8 -*-
+"""model_predictor.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1pIuCvafVPCRzTLojc-rZH_MFKsxMam2L
+"""
+# model_predictor.py
+# 深度學習股價預測模型 - 適用於 HUGGING_FACE_V4.2
+import os
+import numpy as np
+import pandas as pd
+import yfinance as yf
+from datetime import datetime, timedelta
+import warnings
+warnings.filterwarnings('ignore')
+# TensorFlow/Keras 相關
+try:
+    import tensorflow as tf
+    from tensorflow.keras.models import Sequential, load_model
+    from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, LeakyReLU
+    from tensorflow.keras.optimizers import Adam
+    from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
+    from tensorflow.keras.regularizers import l2
+    from sklearn.preprocessing import MinMaxScaler, RobustScaler
+    from sklearn.model_selection import train_test_split
+    from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
+    TENSORFLOW_AVAILABLE = True
+except ImportError:
+    TENSORFLOW_AVAILABLE = False
+# 設定隨機種子以確保結果可重現
+if TENSORFLOW_AVAILABLE:
+    tf.random.set_seed(42)
+np.random.seed(42)
+class StockPredictor:
+    """股價預測模型類別"""
+    def __init__(self):
+        self.model = None
+        self.feature_scaler = None
+        self.target_scalers = {}  # 為每個目標變數建立獨立的縮放器
+        self.feature_columns = [
+            'volume', 'rate', 'DJI', 'NAS', 'SOX', 'S&P_500', 'TSM_ADR',
+            'RSI', 'MACD', 'MACDsign', 'MACDvol', 'K', 'D',
+            '+DI', '-DI', 'ADX', 'business_climate', 'PMI'
+        ]
+        self.target_columns = [
+            'close_1d', 'close_5d', 'close_10d', 'close_20d', 'close_60d'
+        ]
+        self.sequence_length = 60  # 使用60天的歷史數據
+        self.model_path = 'lstm_stock_model.h5'
+        self.scalers_path = 'scalers.npz'
+    def fetch_yfinance_data(self, start_date='2022-09-12', end_date='2025-09-08'):
+        """從yfinance獲取股市數據"""
+        try:
+            # 台積電 (2330.TW) 作為主要目標股票
+            taiwan_stock = yf.Ticker('2314.TW')
+            taiwan_data = taiwan_stock.history(start=start_date, end=end_date)
+            if taiwan_data.empty:
+                print("警告: 無法獲取台灣股市數據")
+                return None
+            # 獲取美國市場數據
+            symbols = {
+                'DJI': '^DJI',
+                'NAS': '^IXIC',
+                'SOX': '^SOX',
+                'S&P_500': '^GSPC',
+                'TSM_ADR': 'TSM'
+            }
+            us_data = {}
+            for name, symbol in symbols.items():
+                try:
+                    ticker = yf.Ticker(symbol)
+                    data = ticker.history(start=start_date, end=end_date)
+                    if not data.empty:
+                        us_data[name] = data['Close']
+                    else:
+                        print(f"警告: 無法獲取 {name} 數據")
+                except Exception as e:
+                    print(f"獲取 {name} 數據時發生錯誤: {e}")
+            # 合併數據
+            main_df = pd.DataFrame(index=taiwan_data.index)
+            main_df['close'] = taiwan_data['Close']
+            main_df['volume'] = taiwan_data['Volume']
+            # 計算報酬率
+            main_df['rate'] = main_df['close'].pct_change()
+            # 添加美國市場數據
+            for name, data in us_data.items():
+                # 重新索引以匹配台灣股市交易日
+                main_df[name] = data.reindex(main_df.index, method='ffill')
+            return main_df
+        except Exception as e:
+            print(f"獲取yfinance數據時發生錯誤: {e}")
+            return None
+    def load_external_data(self):
+        """載入外部經濟數據"""
+        business_climate = pd.DataFrame()
+        pmi_data = pd.DataFrame()
+        # 載入景氣燈號數據
+        try:
+            if os.path.exists('business_climate.csv'):
+                business_climate = pd.read_csv('business_climate.csv')
+                business_climate['Date'] = pd.to_datetime(business_climate['Date'])
+                business_climate.set_index('Date', inplace=True)
+                print("成功載入景氣燈號數據")
+        except Exception as e:
+            print(f"載入景氣燈號數據失敗: {e}")
+        # 載入PMI數據
+        try:
+            if os.path.exists('taiwan_pmi.csv'):
+                pmi_data = pd.read_csv('taiwan_pmi.csv')
+                pmi_data['Date'] = pd.to_datetime(pmi_data['Date'])
+                pmi_data.set_index('Date', inplace=True)
+                print("成功載入PMI數據")
+        except Exception as e:
+            print(f"載入PMI數據失敗: {e}")
+        return business_climate, pmi_data
+    def calculate_technical_indicators(self, df):
+        """計算技術指標"""
+        try:
+            # RSI
+            delta = df['close'].diff()
+            gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
+            loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
+            rs = gain / loss
+            df['RSI'] = 100 - (100 / (1 + rs))
+            # MACD
+            exp1 = df['close'].ewm(span=12).mean()
+            exp2 = df['close'].ewm(span=26).mean()
+            df['MACD'] = exp1 - exp2
+            df['MACDsign'] = df['MACD'].ewm(span=9).mean()
+            df['MACDvol'] = df['MACD'] - df['MACDsign']
+            # KD指標
+            low_min = df['close'].rolling(window=9).min()
+            high_max = df['close'].rolling(window=9).max()
+            rsv = (df['close'] - low_min) / (high_max - low_min) * 100
+            df['K'] = rsv.ewm(com=2).mean()
+            df['D'] = df['K'].ewm(com=2).mean()
+            # DMI指標 (簡化版本，使用close價格)
+            df['high_low_diff'] = df['close'].rolling(2).max() - df['close'].rolling(2).min()
+            df['+DI'] = df['high_low_diff'].rolling(14).mean()
+            df['-DI'] = df['high_low_diff'].rolling(14).std()
+            df['ADX'] = (df['+DI'] + df['-DI']).rolling(14).mean()
+            # 清理臨時欄位
+            df.drop(['high_low_diff'], axis=1, inplace=True)
+            return df
+        except Exception as e:
+            print(f"計算技術指標時發生錯誤: {e}")
+            return df
+    def create_sample_data(self, days=500):
+        """創建示例數據用於訓練（當CSV載入失敗時的後備方案）"""
+        try:
+            print("創建示例數據進行訓練...")
+            # 獲取台積電數據作為基礎
+            taiwan_data = self.fetch_yfinance_data(
+                start_date=(datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d'),
+                end_date=datetime.now().strftime('%Y-%m-%d')
+            )
+            if taiwan_data is None or taiwan_data.empty:
+                print("無法獲取示例數據")
+                return None
+            # 確保有基本的close和volume數據
+            if 'close' not in taiwan_data.columns or 'volume' not in taiwan_data.columns:
+                print("示例數據缺少必要欄位")
+                return None
+            # 計算技術指標
+            taiwan_data = self.calculate_technical_indicators(taiwan_data)
+            # 添加經濟指標（使用固定值）
+            taiwan_data['business_climate'] = 25.0
+            taiwan_data['PMI'] = 50.0
+            # 確保所有特徵欄位存在
+            for feature in self.feature_columns:
+                if feature not in taiwan_data.columns:
+                    taiwan_data[feature] = 0.0
+            # 計算未來價格目標
+            for days in [1, 5, 10, 20, 60]:
+                taiwan_data[f'close_{days}d'] = taiwan_data['close'].shift(-days)
+            # 移除缺失值
+            taiwan_data = taiwan_data.dropna()
+            if len(taiwan_data) < 100:
+                print("示例數據不足")
+                return None
+            print(f"成功創建示例數據: {taiwan_data.shape}")
+            return taiwan_data
+        except Exception as e:
+            print(f"創建示例數據時發生錯誤: {e}")
+            return None
+        """調試CSV檔案結構"""
+        try:
+            print(f"\n=== 調試CSV檔案: {csv_path} ===")
+            # 讀取前幾行看看結構
+            with open(csv_path, 'r', encoding='utf-8') as f:
+                first_lines = [f.readline().strip() for _ in range(5)]
+            print("前5行原始內容:")
+            for i, line in enumerate(first_lines):
+                print(f"第{i+1}行: {line[:100]}...")  # 只顯示前100個字符
+            # 嘗試不同的編碼和分隔符
+            encodings = ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252']
+            separators = [',', ';', '\t', '|']
+            for encoding in encodings:
+                for sep in separators:
+                    try:
+                        df_test = pd.read_csv(csv_path, encoding=encoding, sep=sep, nrows=5)
+                        if len(df_test.columns) > 5:  # 如果有合理的欄位數量
+                            print(f"\n成功讀取 (編碼: {encoding}, 分隔符: '{sep}'):")
+                            print(f"欄位: {list(df_test.columns)}")
+                            print(f"數據形狀: {df_test.shape}")
+                            return encoding, sep
+                    except:
+                        continue
+            print("無法找到合適的讀取參數")
+            return None, None
+        except Exception as e:
+            print(f"調試CSV檔案時發生錯誤: {e}")
+            return None, None
+    def prepare_training_data(self, csv_path=None):
+        """準備訓練數據"""
+        try:
+            if csv_path and os.path.exists(csv_path):
+                # 先調試CSV檔案
+                encoding, separator = self.debug_csv_file(csv_path)
+                # 如果提供了CSV檔案，直接載入
+                print(f"\n從 {csv_path} 載入數據...")
+                # 使用找到的最佳參數讀取
+                read_params = {}
+                if encoding:
+                    read_params['encoding'] = encoding
+                if separator and separator != ',':
+                    read_params['sep'] = separator
+                df = pd.read_csv(csv_path, **read_params)
+                # 檢查CSV檔案結構
+                print(f"CSV檔案欄位: {list(df.columns)}")
+                print(f"數據形狀: {df.shape}")
+                print(f"前5行數據:")
+                print(df.head())
+                # 處理日期欄位
+                date_columns = ['Date', 'date', 'DATE', 'Unnamed: 0']
+                date_col = None
+                for col in date_columns:
+                    if col in df.columns:
+                        date_col = col
+                        break
+                if date_col:
+                    print(f"使用日期欄位: {date_col}")
+                    df[date_col] = pd.to_datetime(df[date_col])
+                    df.set_index(date_col, inplace=True)
+                elif df.index.dtype == 'object':
+                    df.index = pd.to_datetime(df.index)
+                print(f"處理日期後的數據形狀: {df.shape}")
+                print(f"日期範圍: {df.index.min()} 到 {df.index.max()}")
+            else:
+                # 從yfinance和外部檔案獲取數據
+                print("從yfinance獲取數據...")
+                df = self.fetch_yfinance_data()
+                if df is None:
+                    return None, None, None, None
+                # 計算技術指標
+                df = self.calculate_technical_indicators(df)
+                # 載入外部經濟數據
+                business_climate, pmi_data = self.load_external_data()
+                # 合併外部數據
+                if not business_climate.empty:
+                    df['business_climate'] = business_climate['Index'].reindex(
+                        df.index, method='ffill'
+                    )
+                else:
+                    df['business_climate'] = 25.0  # 預設值
+                if not pmi_data.empty:
+                    df['PMI'] = pmi_data['Index'].reindex(df.index, method='ffill')
+                else:
+                    df['PMI'] = 50.0  # 預設值
+            # 檢查並映射欄位名稱
+            column_mapping = {
+                # 可能的volume欄位名稱
+                'Volume': 'volume', 'vol': 'volume', 'VOLUME': 'volume',
+                # 可能的close欄位名稱
+                'Close': 'close', 'close_price': 'close', 'CLOSE': 'close', 'price': 'close',
+                # 可能的rate欄位名稱
+                'Rate': 'rate', 'return': 'rate', 'pct_change': 'rate', 'RATE': 'rate',
+                # 美股指數
+                'DJI': 'DJI', 'DOW': 'DJI', 'dow': 'DJI',
+                'NAS': 'NAS', 'NASDAQ': 'NAS', 'nasdaq': 'NAS',
+                'SOX': 'SOX', 'sox': 'SOX',
+                'S&P_500': 'S&P_500', 'SP500': 'S&P_500', 'sp500': 'S&P_500',
+                'TSM_ADR': 'TSM_ADR', 'TSM': 'TSM_ADR', 'tsm': 'TSM_ADR',
+                # 技術指標
+                'rsi': 'RSI', 'macd': 'MACD', 'macdsign': 'MACDsign', 'macdvol': 'MACDvol',
+                'k': 'K', 'd': 'D', '+di': '+DI', '-di': '-DI', 'adx': 'ADX',
+                # 經濟指標
+                'Business_Climate': 'business_climate', 'business_climate_index': 'business_climate',
+                'pmi': 'PMI', 'PMI_Index': 'PMI'
+            }
+            # 應用欄位映射
+            df = df.rename(columns=column_mapping)
+            print(f"映射後的欄位: {list(df.columns)}")
+            # 如果沒有close欄位但有其他價格欄位，嘗試使用
+            if 'close' not in df.columns:
+                price_candidates = ['Close', 'Price', 'CLOSE', 'close_price']
+                for candidate in price_candidates:
+                    if candidate in df.columns:
+                        df['close'] = df[candidate]
+                        print(f"使用 {candidate} 作為 close 價格")
+                        break
+            # 計算missing的技術指標（如果數據中沒有）
+            if 'close' in df.columns:
+                if 'rate' not in df.columns:
+                    df['rate'] = df['close'].pct_change()
+                    print("計算了price return rate")
+                # 如果缺少技術指標，計算它們
+                if 'RSI' not in df.columns:
+                    df = self.calculate_technical_indicators(df)
+                    print("計算了技術指標")
+            # 計算未來價格目標
+            if 'close' in df.columns:
+                for days in [1, 5, 10, 20, 60]:
+                    df[f'close_{days}d'] = df['close'].shift(-days)
+                print("計算了未來價格目標")
+            else:
+                print("錯誤: 找不到價格數據，無法計算目標變數")
+                return None, None, None, None
+            print(f"計算目標變數後的數據形狀: {df.shape}")
+            # 移除缺失值
+            original_len = len(df)
+            df = df.dropna()
+            print(f"移除缺失值: {original_len} -> {len(df)} 行")
+            if df.empty:
+                print("錯誤: 處理後的數據集為空")
+                print("可能原因:")
+                print("1. 所有數據都有缺失值")
+                print("2. 日期格式不正確")
+                print("3. 欄位名稱不匹配")
+                return None, None, None, None
+            # 確保所有需要的欄位都存在
+            missing_features = set(self.feature_columns) - set(df.columns)
+            if missing_features:
+                print(f"警告: 缺少特徵欄位: {missing_features}")
+                # 為缺少的特徵填充預設值
+                for feature in missing_features:
+                    if feature == 'business_climate':
+                        df[feature] = 25.0  # 景氣燈號預設值
+                    elif feature == 'PMI':
+                        df[feature] = 50.0  # PMI預設值
+                    else:
+                        df[feature] = 0.0
+                print("已填充缺失的特徵欄位")
+            missing_targets = set(self.target_columns) - set(df.columns)
+            if missing_targets:
+                print(f"錯誤: 缺少目標欄位: {missing_targets}")
+                return None, None, None, None
+            # 提取特徵和目標變數
+            X = df[self.feature_columns].values
+            y = df[self.target_columns].values
+            print(f"數據形狀: X={X.shape}, y={y.shape}")
+            print(f"數據日期範圍: {df.index.min()} 到 {df.index.max()}")
+            return X, y, df.index, df
+        except Exception as e:
+            print(f"準備訓練數據時發生錯誤: {e}")
+            return None, None, None, None
+    def create_sequences(self, X, y):
+        """創建時間序列序列"""
+        X_seq, y_seq = [], []
+        for i in range(self.sequence_length, len(X)):
+            X_seq.append(X[i-self.sequence_length:i])
+            y_seq.append(y[i])
+        return np.array(X_seq), np.array(y_seq)
+    def build_model(self, input_shape, output_shape):
+        """建構LSTM模型"""
+        if not TENSORFLOW_AVAILABLE:
+            raise ImportError("TensorFlow未安裝，無法建立模型")
+        model = Sequential([
+            # 第一層LSTM
+            LSTM(128, return_sequences=True, input_shape=input_shape,
+                 kernel_regularizer=l2(0.001)),
+            BatchNormalization(),
+            Dropout(0.2),
+            # 第二層LSTM
+            LSTM(64, return_sequences=True, kernel_regularizer=l2(0.001)),
+            BatchNormalization(),
+            Dropout(0.2),
+            # 第三層LSTM
+            LSTM(32, return_sequences=False, kernel_regularizer=l2(0.001)),
+            BatchNormalization(),
+            Dropout(0.2),
+            # 全連接層
+            Dense(64, kernel_regularizer=l2(0.001)),
+            LeakyReLU(alpha=0.1),
+            BatchNormalization(),
+            Dropout(0.3),
+            Dense(32, kernel_regularizer=l2(0.001)),
+            LeakyReLU(alpha=0.1),
+            Dropout(0.2),
+            # 輸出層
+            Dense(output_shape, activation='linear')
+        ])
+        # 編譯模型
+        optimizer = Adam(learning_rate=0.001, clipnorm=1.0)
+        model.compile(
+            optimizer=optimizer,
+            loss='huber',  # 對異常值較不敏感
+            metrics=['mse', 'mae']
+        )
+        return model
+    def train_model(self, csv_path=None):
+        """訓練模型"""
+        if not TENSORFLOW_AVAILABLE:
+            print("錯誤: TensorFlow未安裝，無法訓練模型")
+            return False
+        print("開始準備訓練數據...")
+        X, y, dates, df = self.prepare_training_data(csv_path)
+        # 如果CSV載入失敗，嘗試使用示例數據
+        if (X is None or y is None) and csv_path:
+            print("CSV載入失敗，嘗試創建示例數據...")
+            df = self.create_sample_data()
+            if df is not None:
+                X = df[self.feature_columns].values
+                y = df[self.target_columns].values
+                dates = df.index
+                print("使用示例數據繼續訓練")
+        if X is None or y is None:
+            print("錯誤: 無法準備訓練數據")
+            return False
+        # 檢查數據質量
+        if len(X) < 100:
+            print(f"警告: 訓練數據過少 ({len(X)} 樣本)，建議至少100個樣本")
+            return False
+        print("正在縮放數據...")
+        # 縮放特徵
+        self.feature_scaler = RobustScaler()
+        X_scaled = self.feature_scaler.fit_transform(X)
+        # 為每個目標變數建立獨立的縮放器
+        y_scaled = np.zeros_like(y)
+        for i, target in enumerate(self.target_columns):
+            scaler = RobustScaler()
+            y_scaled[:, i:i+1] = scaler.fit_transform(y[:, i:i+1])
+            self.target_scalers[target] = scaler
+        print("正在創建時間序列...")
+        X_seq, y_seq = self.create_sequences(X_scaled, y_scaled)
+        if len(X_seq) == 0:
+            print(f"錯誤: 序列長度不足，需要至少{self.sequence_length + 1}個數據點")
+            return False
+        print(f"序列形狀: X_seq={X_seq.shape}, y_seq={y_seq.shape}")
+        # 分割訓練和驗證集
+        split_idx = int(len(X_seq) * 0.8)  # 使用時間順序分割而不是隨機分割
+        X_train, X_val = X_seq[:split_idx], X_seq[split_idx:]
+        y_train, y_val = y_seq[:split_idx], y_seq[split_idx:]
+        print(f"訓練集大小: {X_train.shape}, 驗證集大小: {X_val.shape}")
+        # 建立模型
+        print("正在建立模型...")
+        input_shape = (X_seq.shape[1], X_seq.shape[2])
+        output_shape = y_seq.shape[1]
+        self.model = self.build_model(input_shape, output_shape)
+        print(f"模型架構: 輸入={input_shape}, 輸出={output_shape}")
+        # 設定回調函數
+        callbacks = [
+            EarlyStopping(
+                monitor='val_loss',
+                patience=15,
+                restore_best_weights=True,
+                verbose=1
+            ),
+            ReduceLROnPlateau(
+                monitor='val_loss',
+                factor=0.5,
+                patience=8,
+                min_lr=1e-6,
+                verbose=1
+            )
+        ]
+        # 訓練模型
+        print("開始訓練模型...")
+        try:
+            history = self.model.fit(
+                X_train, y_train,
+                validation_data=(X_val, y_val),
+                epochs=50,  # 減少epoch數量以加快訓練
+                batch_size=min(32, len(X_train) // 4),  # 根據數據大小調整batch size
+                callbacks=callbacks,
+                verbose=1
+            )
+        except Exception as e:
+            print(f"訓練過程中發生錯誤: {e}")
+            return False
+        # 評估模型
+        print("\n評估模型性能...")
+        try:
+            train_loss = self.model.evaluate(X_train, y_train, verbose=0)
+            val_loss = self.model.evaluate(X_val, y_val, verbose=0)
+            print(f"訓練集損失: {train_loss[0]:.4f}")
+            print(f"驗證集損失: {val_loss[0]:.4f}")
+        except Exception as e:
+            print(f"評估過程中發生錯誤: {e}")
+        # 儲存模型和縮放器
+        self.save_model()
+        return True
+    def save_model(self):
+        """儲存模型和縮放器"""
+        try:
+            if self.model:
+                self.model.save(self.model_path)
+                print(f"模型已儲存至: {self.model_path}")
+            # 儲存縮放器
+            scalers_dict = {'feature_scaler': self.feature_scaler}
+            scalers_dict.update(self.target_scalers)
+            # 將sklearn縮放器轉換為可序列化的格式
+            scalers_data = {}
+            for name, scaler in scalers_dict.items():
+                if hasattr(scaler, 'scale_'):
+                    scalers_data[f'{name}_scale'] = scaler.scale_
+                    scalers_data[f'{name}_center'] = scaler.center_
+            np.savez(self.scalers_path, **scalers_data)
+            print(f"縮放器已儲存至: {self.scalers_path}")
+        except Exception as e:
+            print(f"儲存模型時發生錯誤: {e}")
+    def load_model(self):
+        """載入模型和縮放器"""
+        try:
+            if os.path.exists(self.model_path) and TENSORFLOW_AVAILABLE:
+                self.model = load_model(self.model_path)
+                print("模型載入成功")
+                # 載入縮放器
+                if os.path.exists(self.scalers_path):
+                    scalers_data = np.load(self.scalers_path)
+                    # 重建特徵縮放器
+                    if 'feature_scaler_scale' in scalers_data:
+                        self.feature_scaler = RobustScaler()
+                        self.feature_scaler.scale_ = scalers_data['feature_scaler_scale']
+                        self.feature_scaler.center_ = scalers_data['feature_scaler_center']
+                    # 重建目標縮放器
+                    for target in self.target_columns:
+                        scale_key = f'{target}_scale'
+                        center_key = f'{target}_center'
+                        if scale_key in scalers_data:
+                            scaler = RobustScaler()
+                            scaler.scale_ = scalers_data[scale_key]
+                            scaler.center_ = scalers_data[center_key]
+                            self.target_scalers[target] = scaler
+                    print("縮放器載入成功")
+                return True
+            else:
+                print("模型文件不存在或TensorFlow未安裝")
+                return False
+        except Exception as e:
+            print(f"載入模型時發生錯誤: {e}")
+            return False
+# 全域預測器實例
+_predictor = None
+def get_predictor():
+    """獲取預測器實例"""
+    global _predictor
+    if _predictor is None:
+        _predictor = StockPredictor()
+        _predictor.load_model()
+    return _predictor
+def advanced_lstm_predict(predict_days):
+    """
+    進階LSTM預測函數 - 與main程式的介面
+    Args:
+        predict_days: 預測天數 (1, 5, 10, 20, 60)
+    Returns:
+        dict: 包含predicted_price, change_pct, confidence的字典
+        None: 如果預測失敗
+    """
+    try:
+        predictor = get_predictor()
+        if predictor.model is None:
+            print("模型未載入，無法進行預測")
+            return None
+        # 獲取最新數據進行預測
+        current_data = predictor.fetch_yfinance_data(
+            start_date=(datetime.now() - timedelta(days=90)).strftime('%Y-%m-%d'),
+            end_date=datetime.now().strftime('%Y-%m-%d')
+        )
+        if current_data is None or len(current_data) < predictor.sequence_length:
+            print("無法獲取足夠的當前數據進行預測")
+            return None
+        # 計算技術指標
+        current_data = predictor.calculate_technical_indicators(current_data)
+        # 載入外部數據
+        business_climate, pmi_data = predictor.load_external_data()
+        # 合併外部數據
+        if not business_climate.empty:
+            current_data['business_climate'] = business_climate['Index'].reindex(
+                current_data.index, method='ffill'
+            ).fillna(25.0)
+        else:
+            current_data['business_climate'] = 25.0
+        if not pmi_data.empty:
+            current_data['PMI'] = pmi_data['Index'].reindex(
+                current_data.index, method='ffill'
+            ).fillna(50.0)
+        else:
+            current_data['PMI'] = 50.0
+        # 填補缺失的特徵
+        for feature in predictor.feature_columns:
+            if feature not in current_data.columns:
+                current_data[feature] = 0.0
+        current_data = current_data.dropna()
+        if len(current_data) < predictor.sequence_length:
+            print("處理後的數據不足以進行預測")
+            return None
+        # 提取特徵並縮放
+        X_current = current_data[predictor.feature_columns].values
+        X_current_scaled = predictor.feature_scaler.transform(X_current)
+        # 創建序列
+        X_seq = X_current_scaled[-predictor.sequence_length:].reshape(
+            1, predictor.sequence_length, len(predictor.feature_columns)
+        )
+        # 進行預測
+        prediction_scaled = predictor.model.predict(X_seq, verbose=0)
+        # 確定目標欄位索引
+        target_map = {1: 'close_1d', 5: 'close_5d', 10: 'close_10d',
+                     20: 'close_20d', 60: 'close_60d'}
+        target_col = target_map.get(predict_days, 'close_5d')
+        target_idx = predictor.target_columns.index(target_col)
+        # 反縮放預測結果
+        if target_col in predictor.target_scalers:
+            predicted_price = predictor.target_scalers[target_col].inverse_transform(
+                prediction_scaled[:, target_idx:target_idx+1]
+            )[0, 0]
+        else:
+            print(f"未找到 {target_col} 的縮放器")
+            return None
+        # 計算變化百分比
+        current_price = current_data['close'].iloc[-1]
+        change_pct = ((predicted_price - current_price) / current_price) * 100
+        # 計算信心度 (簡化版本，基於歷史波動性)
+        recent_volatility = current_data['close'].pct_change().tail(20).std()
+        confidence = max(0.5, min(0.9, 1 - recent_volatility * 5))
+        return {
+            'predicted_price': predicted_price,
+            'change_pct': change_pct,
+            'confidence': confidence
+        }
+    except Exception as e:
+        print(f"LSTM預測時發生錯誤: {e}")
+        return None
+def train_model_from_csv(csv_path):
+    """從CSV檔案訓練模型的便利函數"""
+    predictor = StockPredictor()
+    return predictor.train_model(csv_path)
+if __name__ == "__main__":
+    # 測試模式
+    print("=== 股價預測模型測試 ===")
+    # 首先檢查TensorFlow是否可用
+    if not TENSORFLOW_AVAILABLE:
+        print("警告: TensorFlow未安裝，無法使用深度學習功能")
+        print("請安裝TensorFlow: pip install tensorflow")
+        exit(1)
+    # 檢查是否有CSV檔案
+    csv_file = "新期末專案輸入資料20220912-20250909.csv"
+    if os.path.exists(csv_file):
+        print(f"找到CSV檔案: {csv_file}")
+        # 先創建預測器並調試CSV
+        predictor = StockPredictor()
+        success = predictor.train_model(csv_file)
+        if success:
+            print("模型訓練完成!")
+        else:
+            print("CSV訓練失敗，嘗試使用yfinance數據...")
+            success = predictor.train_model()
+            if success:
+                print("使用yfinance數據訓練完成!")
+            else:
+                print("所有訓練方法都失敗!")
+    else:
+        print(f"未找到CSV檔案: {csv_file}")
+        print("將使用yfinance數據進行訓練...")
+        predictor = StockPredictor()
+        success = predictor.train_model()
+        if success:
+            print("模型訓練完成!")
+        else:
+            print("模型訓練失敗!")
+    # 測試預測
+    print("\n=== 測試預測功能 ===")
+    test_predictions = [1, 5, 10, 20, 60]
+    for days in test_predictions:
+        result = advanced_lstm_predict(days)
+        if result:
+            print(f"{days}日預測: 價格={result['predicted_price']:.2f}, "
+                  f"變化={result['change_pct']:+.2f}%, "
+                  f"信心度={result['confidence']:.1%}")
+        else:
+            print(f"{days}日預測失敗")