Spaces:

AlanRex
/

AITEST

Sleeping

App Files Files Community

AlanRex commited on Sep 16, 2025

Commit

cc07c78

verified ·

1 Parent(s): d6aec87

Delete model_predictor.py

Browse files

Files changed (1) hide show

model_predictor.py +0 -847

model_predictor.py DELETED Viewed

@@ -1,847 +0,0 @@
-# -*- coding: utf-8 -*-
-"""model_predictor.ipynb
-Automatically generated by Colab.
-Original file is located at
-    https://colab.research.google.com/drive/1pIuCvafVPCRzTLojc-rZH_MFKsxMam2L
-"""
-# model_predictor.py
-# 深度學習股價預測模型 - 適用於 HUGGING_FACE_V4.2
-import os
-import numpy as np
-import pandas as pd
-import yfinance as yf
-from datetime import datetime, timedelta
-import warnings
-warnings.filterwarnings('ignore')
-# TensorFlow/Keras 相關
-try:
-    import tensorflow as tf
-    from tensorflow.keras.models import Sequential, load_model
-    from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, LeakyReLU
-    from tensorflow.keras.optimizers import Adam
-    from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
-    from tensorflow.keras.regularizers import l2
-    from sklearn.preprocessing import MinMaxScaler, RobustScaler
-    from sklearn.model_selection import train_test_split
-    from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
-    TENSORFLOW_AVAILABLE = True
-except ImportError:
-    TENSORFLOW_AVAILABLE = False
-# 設定隨機種子以確保結果可重現
-if TENSORFLOW_AVAILABLE:
-    tf.random.set_seed(42)
-np.random.seed(42)
-class StockPredictor:
-    """股價預測模型類別"""
-    def __init__(self):
-        self.model = None
-        self.feature_scaler = None
-        self.target_scalers = {}  # 為每個目標變數建立獨立的縮放器
-        self.feature_columns = [
-            'volume', 'rate', 'DJI', 'NAS', 'SOX', 'S&P_500', 'TSM_ADR',
-            'RSI', 'MACD', 'MACDsign', 'MACDvol', 'K', 'D',
-            '+DI', '-DI', 'ADX', 'business_climate', 'PMI'
-        ]
-        self.target_columns = [
-            'close_1d', 'close_5d', 'close_10d', 'close_20d', 'close_60d'
-        ]
-        self.sequence_length = 60  # 使用60天的歷史數據
-        self.model_path = 'lstm_stock_model.h5'
-        self.scalers_path = 'scalers.npz'
-    def fetch_yfinance_data(self, start_date='2022-09-12', end_date='2025-09-08'):
-    """從yfinance獲取股市數據"""
-    try:
-        # 台積電 (2330.TW) 作為主要目標股票
-        taiwan_stock = yf.Ticker('2330.TW')
-        taiwan_data = taiwan_stock.history(start=start_date, end=end_date)
-        # 移除時區，使索引為 tz-naive
-        taiwan_data.index = taiwan_data.index.tz_localize(None)
-        if taiwan_data.empty:
-            print("警告: 無法獲取台灣股市數據")
-            return None
-        # 獲取美國市場數據
-        symbols = {
-            'DJI': '^DJI',
-            'NAS': '^IXIC',
-            'SOX': '^SOX',
-            'S&P_500': '^GSPC',
-            'TSM_ADR': 'TSM'
-        }
-        us_data = {}
-        for name, symbol in symbols.items():
-            try:
-                ticker = yf.Ticker(symbol)
-                data = ticker.history(start=start_date, end=end_date)
-                # 移除時區，使索引為 tz-naive
-                data.index = data.index.tz_localize(None)
-                if not data.empty:
-                    us_data[name] = data['Close']
-                else:
-                    print(f"警告: 無法獲取 {name} 數據")
-            except Exception as e:
-                print(f"獲取 {name} 數據時發生錯誤: {e}")
-        # 合併數據
-        main_df = pd.DataFrame(index=taiwan_data.index)
-        main_df['close'] = taiwan_data['Close']
-        main_df['volume'] = taiwan_data['Volume']
-        # 計算報酬率
-        main_df['rate'] = main_df['close'].pct_change()
-        # 添加美國市場數據
-        for name, data in us_data.items():
-            main_df[name] = data.reindex(main_df.index, method='ffill')
-        return main_df
-    except Exception as e:
-        print(f"獲取yfinance數據時發生錯誤: {e}")
-        return None
-    def load_external_data(self):
-    """載入外部經濟數據"""
-    business_climate = pd.DataFrame()
-    pmi_data = pd.DataFrame()
-    # 載入景氣燈號數據
-    try:
-        if os.path.exists('business_climate.csv'):
-            business_climate = pd.read_csv('business_climate.csv')
-            business_climate['Date'] = pd.to_datetime(business_climate['Date'])
-            business_climate.set_index('Date', inplace=True)
-            # 新增: 確保索引為 tz-naive
-            business_climate.index = business_climate.index.tz_localize(None)
-            print("成功載入景氣燈號數據")
-    except Exception as e:
-        print(f"載入景氣燈號數據失敗: {e}")
-    # 載入PMI數據
-    try:
-        if os.path.exists('taiwan_pmi.csv'):
-            pmi_data = pd.read_csv('taiwan_pmi.csv')
-            pmi_data['Date'] = pd.to_datetime(pmi_data['Date'])
-            pmi_data.set_index('Date', inplace=True)
-            # 新增: 確保索引為 tz-naive
-            pmi_data.index = pmi_data.index.tz_localize(None)
-            print("成功載入PMI數據")
-    except Exception as e:
-        print(f"載入PMI數據失敗: {e}")
-    return business_climate, pmi_data
-    def calculate_technical_indicators(self, df):
-        """計算技術指標"""
-        try:
-            # RSI
-            delta = df['close'].diff()
-            gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
-            loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
-            rs = gain / loss
-            df['RSI'] = 100 - (100 / (1 + rs))
-            # MACD
-            exp1 = df['close'].ewm(span=12).mean()
-            exp2 = df['close'].ewm(span=26).mean()
-            df['MACD'] = exp1 - exp2
-            df['MACDsign'] = df['MACD'].ewm(span=9).mean()
-            df['MACDvol'] = df['MACD'] - df['MACDsign']
-            # KD指標
-            low_min = df['close'].rolling(window=9).min()
-            high_max = df['close'].rolling(window=9).max()
-            rsv = (df['close'] - low_min) / (high_max - low_min) * 100
-            df['K'] = rsv.ewm(com=2).mean()
-            df['D'] = df['K'].ewm(com=2).mean()
-            # DMI指標 (簡化版本，使用close價格)
-            df['high_low_diff'] = df['close'].rolling(2).max() - df['close'].rolling(2).min()
-            df['+DI'] = df['high_low_diff'].rolling(14).mean()
-            df['-DI'] = df['high_low_diff'].rolling(14).std()
-            df['ADX'] = (df['+DI'] + df['-DI']).rolling(14).mean()
-            # 清理臨時欄位
-            df.drop(['high_low_diff'], axis=1, inplace=True)
-            return df
-        except Exception as e:
-            print(f"計算技術指標時發生錯誤: {e}")
-            return df
-    def create_sample_data(self, days=500):
-        """創建示例數據用於訓練（當CSV載入失敗時的後備方案）"""
-        try:
-            print("創建示例數據進行訓練...")
-            # 獲取台積電數據作為基礎
-            taiwan_data = self.fetch_yfinance_data(
-                start_date=(datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d'),
-                end_date=datetime.now().strftime('%Y-%m-%d')
-            )
-            if taiwan_data is None or taiwan_data.empty:
-                print("無法獲取示例數據")
-                return None
-            # 確保有基本的close和volume數據
-            if 'close' not in taiwan_data.columns or 'volume' not in taiwan_data.columns:
-                print("示例數據缺少必要欄位")
-                return None
-            # 計算技術指標
-            taiwan_data = self.calculate_technical_indicators(taiwan_data)
-            # 添加經濟指標（使用固定值）
-            taiwan_data['business_climate'] = 25.0
-            taiwan_data['PMI'] = 50.0
-            # 確保所有特徵欄位存在
-            for feature in self.feature_columns:
-                if feature not in taiwan_data.columns:
-                    taiwan_data[feature] = 0.0
-            # 計算未來價格目標
-            for days in [1, 5, 10, 20, 60]:
-                taiwan_data[f'close_{days}d'] = taiwan_data['close'].shift(-days)
-            # 移除缺失值
-            taiwan_data = taiwan_data.dropna()
-            if len(taiwan_data) < 100:
-                print("示例數據不足")
-                return None
-            print(f"成功創建示例數據: {taiwan_data.shape}")
-            return taiwan_data
-        except Exception as e:
-            print(f"創建示例數據時發生錯誤: {e}")
-            return None
-        """調試CSV檔案結構"""
-        try:
-            print(f"\n=== 調試CSV檔案: {csv_path} ===")
-            # 讀取前幾行看看結構
-            with open(csv_path, 'r', encoding='utf-8') as f:
-                first_lines = [f.readline().strip() for _ in range(5)]
-            print("前5行原始內容:")
-            for i, line in enumerate(first_lines):
-                print(f"第{i+1}行: {line[:100]}...")  # 只顯示前100個字符
-            # 嘗試不同的編碼和分隔符
-            encodings = ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252']
-            separators = [',', ';', '\t', '|']
-            for encoding in encodings:
-                for sep in separators:
-                    try:
-                        df_test = pd.read_csv(csv_path, encoding=encoding, sep=sep, nrows=5)
-                        if len(df_test.columns) > 5:  # 如果有合理的欄位數量
-                            print(f"\n成功讀取 (編碼: {encoding}, 分隔符: '{sep}'):")
-                            print(f"欄位: {list(df_test.columns)}")
-                            print(f"數據形狀: {df_test.shape}")
-                            return encoding, sep
-                    except:
-                        continue
-            print("無法找到合適的讀取參數")
-            return None, None
-        except Exception as e:
-            print(f"調試CSV檔案時發生錯誤: {e}")
-            return None, None
-    def prepare_training_data(self, csv_path=None):
-        """準備訓練數據"""
-        try:
-            if csv_path and os.path.exists(csv_path):
-                # 先調試CSV檔案
-                encoding, separator = self.debug_csv_file(csv_path)
-                # 如果提供了CSV檔案，直接載入
-                print(f"\n從 {csv_path} 載入數據...")
-                # 使用找到的最佳參數讀取
-                read_params = {}
-                if encoding:
-                    read_params['encoding'] = encoding
-                if separator and separator != ',':
-                    read_params['sep'] = separator
-                df = pd.read_csv(csv_path, **read_params)
-                # 檢查CSV檔案結構
-                print(f"CSV檔案欄位: {list(df.columns)}")
-                print(f"數據形狀: {df.shape}")
-                print(f"前5行數據:")
-                print(df.head())
-                # 處理日期欄位
-                date_columns = ['Date', 'date', 'DATE', 'Unnamed: 0']
-                date_col = None
-                for col in date_columns:
-                    if col in df.columns:
-                        date_col = col
-                        break
-                if date_col:
-                    print(f"使用日期欄位: {date_col}")
-                    df[date_col] = pd.to_datetime(df[date_col])
-                    df.set_index(date_col, inplace=True)
-                elif df.index.dtype == 'object':
-                    df.index = pd.to_datetime(df.index)
-                print(f"處理日期後的數據形狀: {df.shape}")
-                print(f"日期範圍: {df.index.min()} 到 {df.index.max()}")
-            else:
-                # 從yfinance和外部檔案獲取數據
-                print("從yfinance獲取數據...")
-                df = self.fetch_yfinance_data()
-                if df is None:
-                    return None, None, None, None
-                # 計算技術指標
-                df = self.calculate_technical_indicators(df)
-                # 載入外部經濟數據
-                business_climate, pmi_data = self.load_external_data()
-                # 合併外部數據
-                if not business_climate.empty:
-                    df['business_climate'] = business_climate['Index'].reindex(
-                        df.index, method='ffill'
-                    )
-                else:
-                    df['business_climate'] = 25.0  # 預設值
-                if not pmi_data.empty:
-                    df['PMI'] = pmi_data['Index'].reindex(df.index, method='ffill')
-                else:
-                    df['PMI'] = 50.0  # 預設值
-            # 檢查並映射欄位名稱
-            column_mapping = {
-                # 可能的volume欄位名稱
-                'Volume': 'volume', 'vol': 'volume', 'VOLUME': 'volume',
-                # 可能的close欄位名稱
-                'Close': 'close', 'close_price': 'close', 'CLOSE': 'close', 'price': 'close',
-                # 可能的rate欄位名稱
-                'Rate': 'rate', 'return': 'rate', 'pct_change': 'rate', 'RATE': 'rate',
-                # 美股指數
-                'DJI': 'DJI', 'DOW': 'DJI', 'dow': 'DJI',
-                'NAS': 'NAS', 'NASDAQ': 'NAS', 'nasdaq': 'NAS',
-                'SOX': 'SOX', 'sox': 'SOX',
-                'S&P_500': 'S&P_500', 'SP500': 'S&P_500', 'sp500': 'S&P_500',
-                'TSM_ADR': 'TSM_ADR', 'TSM': 'TSM_ADR', 'tsm': 'TSM_ADR',
-                # 技術指標
-                'rsi': 'RSI', 'macd': 'MACD', 'macdsign': 'MACDsign', 'macdvol': 'MACDvol',
-                'k': 'K', 'd': 'D', '+di': '+DI', '-di': '-DI', 'adx': 'ADX',
-                # 經濟指標
-                'Business_Climate': 'business_climate', 'business_climate_index': 'business_climate',
-                'pmi': 'PMI', 'PMI_Index': 'PMI'
-            }
-            # 應用欄位映射
-            df = df.rename(columns=column_mapping)
-            print(f"映射後的欄位: {list(df.columns)}")
-            # 如果沒有close欄位但有其他價格欄位，嘗試使用
-            if 'close' not in df.columns:
-                price_candidates = ['Close', 'Price', 'CLOSE', 'close_price']
-                for candidate in price_candidates:
-                    if candidate in df.columns:
-                        df['close'] = df[candidate]
-                        print(f"使用 {candidate} 作為 close 價格")
-                        break
-            # 計算missing的技術指標（如果數據中沒有）
-            if 'close' in df.columns:
-                if 'rate' not in df.columns:
-                    df['rate'] = df['close'].pct_change()
-                    print("計算了price return rate")
-                # 如果缺少技術指標，計算它們
-                if 'RSI' not in df.columns:
-                    df = self.calculate_technical_indicators(df)
-                    print("計算了技術指��")
-            # 計算未來價格目標
-            if 'close' in df.columns:
-                for days in [1, 5, 10, 20, 60]:
-                    df[f'close_{days}d'] = df['close'].shift(-days)
-                print("計算了未來價格目標")
-            else:
-                print("錯誤: 找不到價格數據，無法計算目標變數")
-                return None, None, None, None
-            print(f"計算目標變數後的數據形狀: {df.shape}")
-            # 移除缺失值
-            original_len = len(df)
-            df = df.dropna()
-            print(f"移除缺失值: {original_len} -> {len(df)} 行")
-            if df.empty:
-                print("錯誤: 處理後的數據集為空")
-                print("可能原因:")
-                print("1. 所有數據都有缺失值")
-                print("2. 日期格式不正確")
-                print("3. 欄位名稱不匹配")
-                return None, None, None, None
-            # 確保所有需要的欄位都存在
-            missing_features = set(self.feature_columns) - set(df.columns)
-            if missing_features:
-                print(f"警告: 缺少特徵欄位: {missing_features}")
-                # 為缺少的特徵填充預設值
-                for feature in missing_features:
-                    if feature == 'business_climate':
-                        df[feature] = 25.0  # 景氣燈號預設值
-                    elif feature == 'PMI':
-                        df[feature] = 50.0  # PMI預設值
-                    else:
-                        df[feature] = 0.0
-                print("已填充缺失的特徵欄位")
-            missing_targets = set(self.target_columns) - set(df.columns)
-            if missing_targets:
-                print(f"錯誤: 缺少目標欄位: {missing_targets}")
-                return None, None, None, None
-            # 提取特徵和目標變數
-            X = df[self.feature_columns].values
-            y = df[self.target_columns].values
-            print(f"數據形狀: X={X.shape}, y={y.shape}")
-            print(f"數據日期範圍: {df.index.min()} 到 {df.index.max()}")
-            return X, y, df.index, df
-        except Exception as e:
-            print(f"準備訓練數據時發生錯誤: {e}")
-            return None, None, None, None
-    def create_sequences(self, X, y):
-        """創建時間序列序列"""
-        X_seq, y_seq = [], []
-        for i in range(self.sequence_length, len(X)):
-            X_seq.append(X[i-self.sequence_length:i])
-            y_seq.append(y[i])
-        return np.array(X_seq), np.array(y_seq)
-    def build_model(self, input_shape, output_shape):
-        """建構LSTM模型"""
-        if not TENSORFLOW_AVAILABLE:
-            raise ImportError("TensorFlow未安裝，無法建立模型")
-        model = Sequential([
-            # 第一層LSTM
-            LSTM(128, return_sequences=True, input_shape=input_shape,
-                 kernel_regularizer=l2(0.001)),
-            BatchNormalization(),
-            Dropout(0.2),
-            # 第二層LSTM
-            LSTM(64, return_sequences=True, kernel_regularizer=l2(0.001)),
-            BatchNormalization(),
-            Dropout(0.2),
-            # 第三層LSTM
-            LSTM(32, return_sequences=False, kernel_regularizer=l2(0.001)),
-            BatchNormalization(),
-            Dropout(0.2),
-            # 全連接層
-            Dense(64, kernel_regularizer=l2(0.001)),
-            LeakyReLU(alpha=0.1),
-            BatchNormalization(),
-            Dropout(0.3),
-            Dense(32, kernel_regularizer=l2(0.001)),
-            LeakyReLU(alpha=0.1),
-            Dropout(0.2),
-            # 輸出層
-            Dense(output_shape, activation='linear')
-        ])
-        # 編譯模型
-        optimizer = Adam(learning_rate=0.001, clipnorm=1.0)
-        model.compile(
-            optimizer=optimizer,
-            loss='huber',  # 對異常值較不敏感
-            metrics=['mse', 'mae']
-        )
-        return model
-    def train_model(self, csv_path=None):
-        """訓練模型"""
-        if not TENSORFLOW_AVAILABLE:
-            print("錯誤: TensorFlow未安裝，無法訓練模型")
-            return False
-        print("開始準備訓練數據...")
-        X, y, dates, df = self.prepare_training_data(csv_path)
-        # 如果CSV載入失敗，嘗試使用示例數據
-        if (X is None or y is None) and csv_path:
-            print("CSV載入失敗，嘗試創建示例數據...")
-            df = self.create_sample_data()
-            if df is not None:
-                X = df[self.feature_columns].values
-                y = df[self.target_columns].values
-                dates = df.index
-                print("使用示例數據繼續訓練")
-        if X is None or y is None:
-            print("錯誤: 無法準備訓練數據")
-            return False
-        # 檢查數據質量
-        if len(X) < 100:
-            print(f"警告: 訓練��據過少 ({len(X)} 樣本)，建議至少100個樣本")
-            return False
-        print("正在縮放數據...")
-        # 縮放特徵
-        self.feature_scaler = RobustScaler()
-        X_scaled = self.feature_scaler.fit_transform(X)
-        # 為每個目標變數建立獨立的縮放器
-        y_scaled = np.zeros_like(y)
-        for i, target in enumerate(self.target_columns):
-            scaler = RobustScaler()
-            y_scaled[:, i:i+1] = scaler.fit_transform(y[:, i:i+1])
-            self.target_scalers[target] = scaler
-        print("正在創建時間序列...")
-        X_seq, y_seq = self.create_sequences(X_scaled, y_scaled)
-        if len(X_seq) == 0:
-            print(f"錯誤: 序列長度不足，需要至少{self.sequence_length + 1}個數據點")
-            return False
-        print(f"序列形狀: X_seq={X_seq.shape}, y_seq={y_seq.shape}")
-        # 分割訓練和驗證集
-        split_idx = int(len(X_seq) * 0.8)  # 使用時間順序分割而不是隨機分割
-        X_train, X_val = X_seq[:split_idx], X_seq[split_idx:]
-        y_train, y_val = y_seq[:split_idx], y_seq[split_idx:]
-        print(f"訓練集大小: {X_train.shape}, 驗證集大小: {X_val.shape}")
-        # 建立模型
-        print("正在建立模型...")
-        input_shape = (X_seq.shape[1], X_seq.shape[2])
-        output_shape = y_seq.shape[1]
-        self.model = self.build_model(input_shape, output_shape)
-        print(f"模型架構: 輸入={input_shape}, 輸出={output_shape}")
-        # 設定回調函數
-        callbacks = [
-            EarlyStopping(
-                monitor='val_loss',
-                patience=15,
-                restore_best_weights=True,
-                verbose=1
-            ),
-            ReduceLROnPlateau(
-                monitor='val_loss',
-                factor=0.5,
-                patience=8,
-                min_lr=1e-6,
-                verbose=1
-            )
-        ]
-        # 訓練模型
-        print("開始訓練模型...")
-        try:
-            history = self.model.fit(
-                X_train, y_train,
-                validation_data=(X_val, y_val),
-                epochs=50,  # 減少epoch數量以加快訓練
-                batch_size=min(32, len(X_train) // 4),  # 根據數據大小調整batch size
-                callbacks=callbacks,
-                verbose=1
-            )
-        except Exception as e:
-            print(f"訓練過程中發生錯誤: {e}")
-            return False
-        # 評估模型
-        print("\n評估模型性能...")
-        try:
-            train_loss = self.model.evaluate(X_train, y_train, verbose=0)
-            val_loss = self.model.evaluate(X_val, y_val, verbose=0)
-            print(f"訓練集損失: {train_loss[0]:.4f}")
-            print(f"驗證集損失: {val_loss[0]:.4f}")
-        except Exception as e:
-            print(f"評估過程中發生錯誤: {e}")
-        # 儲存模型和縮放器
-        self.save_model()
-        return True
-    def save_model(self):
-        """儲存模型和縮放器"""
-        try:
-            if self.model:
-                self.model.save(self.model_path)
-                print(f"模型已儲存至: {self.model_path}")
-            # 儲存縮放器
-            scalers_dict = {'feature_scaler': self.feature_scaler}
-            scalers_dict.update(self.target_scalers)
-            # 將sklearn縮放器轉換為可序列化的格式
-            scalers_data = {}
-            for name, scaler in scalers_dict.items():
-                if hasattr(scaler, 'scale_'):
-                    scalers_data[f'{name}_scale'] = scaler.scale_
-                    scalers_data[f'{name}_center'] = scaler.center_
-            np.savez(self.scalers_path, **scalers_data)
-            print(f"縮放器已儲存至: {self.scalers_path}")
-        except Exception as e:
-            print(f"儲存模型時發生錯誤: {e}")
-    def load_model(self):
-        """載入模型和縮放器"""
-        try:
-            if os.path.exists(self.model_path) and TENSORFLOW_AVAILABLE:
-                self.model = load_model(self.model_path)
-                print("模型載入成功")
-                # 載入縮放器
-                if os.path.exists(self.scalers_path):
-                    scalers_data = np.load(self.scalers_path)
-                    # 重建特徵縮放器
-                    if 'feature_scaler_scale' in scalers_data:
-                        self.feature_scaler = RobustScaler()
-                        self.feature_scaler.scale_ = scalers_data['feature_scaler_scale']
-                        self.feature_scaler.center_ = scalers_data['feature_scaler_center']
-                    # 重建目標縮放器
-                    for target in self.target_columns:
-                        scale_key = f'{target}_scale'
-                        center_key = f'{target}_center'
-                        if scale_key in scalers_data:
-                            scaler = RobustScaler()
-                            scaler.scale_ = scalers_data[scale_key]
-                            scaler.center_ = scalers_data[center_key]
-                            self.target_scalers[target] = scaler
-                    print("縮放器載入成功")
-                return True
-            else:
-                print("模型文件不存在或TensorFlow未安裝")
-                return False
-        except Exception as e:
-            print(f"載入模型時發生錯誤: {e}")
-            return False
-# 全域預測器實例
-_predictor = None
-def get_predictor():
-    """獲取預測器實例"""
-    global _predictor
-    if _predictor is None:
-        _predictor = StockPredictor()
-        _predictor.load_model()
-    return _predictor
-def advanced_lstm_predict(predict_days):
-    """
-    進階LSTM預測函數 - 與main程式的介面
-    Args:
-        predict_days: 預測天數 (1, 5, 10, 20, 60)
-    Returns:
-        dict: 包含predicted_price, change_pct, confidence的字典
-        None: 如果預測失敗
-    """
-    try:
-        predictor = get_predictor()
-        if predictor.model is None:
-            print("模型未載入，無法進行預測")
-            return None
-        # 獲取最新數據進行預測
-        current_data = predictor.fetch_yfinance_data(
-            start_date=(datetime.now() - timedelta(days=90)).strftime('%Y-%m-%d'),
-            end_date=datetime.now().strftime('%Y-%m-%d')
-        )
-        if current_data is None or len(current_data) < predictor.sequence_length:
-            print("無法獲取足夠的當前數據進行預測")
-            return None
-        # 計算技術指標
-        current_data = predictor.calculate_technical_indicators(current_data)
-        # 載入外部數據
-        business_climate, pmi_data = predictor.load_external_data()
-        # 合併外部數據
-        if not business_climate.empty:
-            current_data['business_climate'] = business_climate['Index'].reindex(
-                current_data.index, method='ffill'
-            ).fillna(25.0)
-        else:
-            current_data['business_climate'] = 25.0
-        if not pmi_data.empty:
-            current_data['PMI'] = pmi_data['Index'].reindex(
-                current_data.index, method='ffill'
-            ).fillna(50.0)
-        else:
-            current_data['PMI'] = 50.0
-        # 填補缺失的特徵
-        for feature in predictor.feature_columns:
-            if feature not in current_data.columns:
-                current_data[feature] = 0.0
-        current_data = current_data.dropna()
-        if len(current_data) < predictor.sequence_length:
-            print("處理後的數據不足以進行預測")
-            return None
-        # 提取特徵並縮放
-        X_current = current_data[predictor.feature_columns].values
-        X_current_scaled = predictor.feature_scaler.transform(X_current)
-        # 創建序列
-        X_seq = X_current_scaled[-predictor.sequence_length:].reshape(
-            1, predictor.sequence_length, len(predictor.feature_columns)
-        )
-        # 進行預測
-        prediction_scaled = predictor.model.predict(X_seq, verbose=0)
-        # 確定目標欄位索引
-        target_map = {1: 'close_1d', 5: 'close_5d', 10: 'close_10d',
-                     20: 'close_20d', 60: 'close_60d'}
-        target_col = target_map.get(predict_days, 'close_5d')
-        target_idx = predictor.target_columns.index(target_col)
-        # 反縮放預測結果
-        if target_col in predictor.target_scalers:
-            predicted_price = predictor.target_scalers[target_col].inverse_transform(
-                prediction_scaled[:, target_idx:target_idx+1]
-            )[0, 0]
-        else:
-            print(f"未找到 {target_col} 的縮放器")
-            return None
-        # 計算變化百分比
-        current_price = current_data['close'].iloc[-1]
-        change_pct = ((predicted_price - current_price) / current_price) * 100
-        # 計算信心度 (簡化版本，基於歷史波動性)
-        recent_volatility = current_data['close'].pct_change().tail(20).std()
-        confidence = max(0.5, min(0.9, 1 - recent_volatility * 5))
-        return {
-            'predicted_price': predicted_price,
-            'change_pct': change_pct,
-            'confidence': confidence
-        }
-    except Exception as e:
-        print(f"LSTM預測時發生錯誤: {e}")
-        return None
-def train_model_from_csv(csv_path):
-    """從CSV檔案訓練模型的便利函數"""
-    predictor = StockPredictor()
-    return predictor.train_model(csv_path)
-if __name__ == "__main__":
-    # 測試模式
-    print("=== 股價預測模型測試 ===")
-    # 首先檢查TensorFlow是否可用
-    if not TENSORFLOW_AVAILABLE:
-        print("警告: TensorFlow未安裝，無法使用深度學習功能")
-        print("請安裝TensorFlow: pip install tensorflow")
-        exit(1)
-    # 檢查是否有CSV檔案
-    csv_file = "新期末專案輸入��料20220912-20250909.csv"
-    if os.path.exists(csv_file):
-        print(f"找到CSV檔案: {csv_file}")
-        # 先創建預測器並調試CSV
-        predictor = StockPredictor()
-        success = predictor.train_model(csv_file)
-        if success:
-            print("模型訓練完成!")
-        else:
-            print("CSV訓練失敗，嘗試使用yfinance數據...")
-            success = predictor.train_model()
-            if success:
-                print("使用yfinance數據訓練完成!")
-            else:
-                print("所有訓練方法都失敗!")
-    else:
-        print(f"未找到CSV檔案: {csv_file}")
-        print("將使用yfinance數據進行訓練...")
-        predictor = StockPredictor()
-        success = predictor.train_model()
-        if success:
-            print("模型訓練完成!")
-        else:
-            print("模型訓練失敗!")
-    # 測試預測
-    print("\n=== 測試預測功能 ===")
-    test_predictions = [1, 5, 10, 20, 60]
-    for days in test_predictions:
-        result = advanced_lstm_predict(days)
-        if result:
-            print(f"{days}日預測: 價格={result['predicted_price']:.2f}, "
-                  f"變化={result['change_pct']:+.2f}%, "
-                  f"信心度={result['confidence']:.1%}")
-        else:
-            print(f"{days}日預測失敗")