Spaces:

DouletMedia
/

DouletStock

Runtime error

App Files Files Community

changcheng967 commited on Apr 30, 2025

Commit

9c4550c

verified ·

1 Parent(s): 1994951

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -79

app.py CHANGED Viewed

@@ -3,74 +3,111 @@ import gradio as gr
 import yfinance as yf
 import pandas as pd
 import numpy as np
-from sklearn.preprocessing import MinMaxScaler
 from sklearn.pipeline import make_pipeline
-from sklearn.linear_model import Ridge
 from loguru import logger
-import time
 import threading
-import plotly.graph_objs as go
 # 配置日志
-logger.add("app.log", rotation="1 MB", level="DEBUG")
-def quick_feature_engineering(df):
-    """快速特征工程"""
-    df = df.copy()
-    # 基础特征
-    df['Returns'] = df['Close'].pct_change()
-    df['Volatility'] = df['Returns'].rolling(5).std()
-    # 简化时间特征
-    df['Day'] = df.index.dayofweek
-    df['Month'] = df.index.month
-    return df.dropna()
-def rapid_training(ticker):
-    """快速训练流程（必须在30秒内完成）"""
     start_time = time.time()
     try:
-        # 获取数据（限制为1年数据）
         logger.info(f"Fetching data for {ticker}")
-        data = yf.download(ticker, period="1y", progress=False)
-        if data.empty:
-            raise ValueError("No data available")
         # 特征工程
         logger.debug("Processing features")
-        data = quick_feature_engineering(data)
         # 准备训练数据
         X = data.drop(columns=['Close'])
         y = data['Close']
-        # 最后7天作为测试集
-        train_size = -7
-        X_train, y_train = X.iloc[:train_size], y.iloc[:train_size]
-        # 使用轻量级模型管道
         model = make_pipeline(
-            MinMaxScaler(),
-            Ridge(alpha=1.0)  # 调整正则化强度
-        logger.info("Start training")
-        model.fit(X_train, y_train)
-        # 生成预测（未来7天）
         logger.debug("Generating predictions")
-        last_features = X.iloc[-1:].values
         future_dates = pd.date_range(data.index[-1], periods=8)[1:]
-        predictions = []
-        # 递归预测
-        current_features = last_features.copy()
         for _ in range(7):
             pred = model.predict(current_features)[0]
             predictions.append(pred)
-            # 更新特征（简化处理）
-            current_features[0][0] = pred  # 更新Open
-            current_features[0][3] = pred  # 更新Close
         training_time = time.time() - start_time
         logger.success(f"Training completed in {training_time:.2f}s")
@@ -82,11 +119,11 @@ def rapid_training(ticker):
         }
     except Exception as e:
-        logger.error(f"Error in training: {str(e)}")
         return None
 def create_plot(result):
-    """创建交互式图表"""
     data = result['data']
     pred = result['predictions']
@@ -96,79 +133,106 @@ def create_plot(result):
     fig.add_trace(go.Scatter(
         x=data.index,
         y=data['Close'],
-        name='Historical Price',
-        line=dict(color='blue')
     )
     # 预测价格
     fig.add_trace(go.Scatter(
         x=pred.index,
         y=pred.values,
-        name='Prediction',
-        line=dict(color='red', dash='dot')
     )
     fig.update_layout(
-        title=f"Stock Price Prediction",
-        xaxis_title='Date',
-        yaxis_title='Price (USD)',
         hovermode="x unified",
-        showlegend=True
     )
     return fig
 def predict_stock(ticker):
-    """预测流程处理"""
     start_time = time.time()
-    # 显示加载状态
-    yield "⌛ 正在获取数据并训练模型（最多30秒）...", None
-    # 在后台线程中运行训练
     result = None
-    def train_thread():
         nonlocal result
-        result = rapid_training(ticker)
-    thread = threading.Thread(target=train_thread)
     thread.start()
-    # 等待完成（最多30秒）
-    thread.join(timeout=30)
-    if not result:
-        yield "❌ 训练失败或超时，请尝试其他股票代码", None
         return
-    if result['training_time'] > 30:
-        yield "⚠️ 训练超时，结果可能不准确", create_plot(result)
         return
-    # 显示结果
-    info_msg = f"✅ 训练成功（耗时{result['training_time']:.1f}秒）\n" \
-              f"最新预测：{pred.values[-1]:.2f} USD（{pred.index[-1].strftime('%Y-%m-%d')}）"
-    yield info_msg, create_plot(result)
-with gr.Blocks() as demo:
-    gr.Markdown("# 🚀 实时股票预测系统")
     with gr.Row():
-        ticker_input = gr.Textbox(
-            label="输入股票代码",
-            placeholder="例如：AAPL（美股）, 0005.HK（港股）",
-            max_lines=1
-        )
-        submit_btn = gr.Button("立即预测", variant="primary")
-    status_output = gr.Textbox(label="状态", interactive=False)
-    plot_output = gr.Plot(label="价格预测")
     submit_btn.click(
         predict_stock,
         inputs=ticker_input,
-        outputs=[status_output, plot_output]
     )
 if __name__ == "__main__":

 import yfinance as yf
 import pandas as pd
 import numpy as np
+import plotly.graph_objs as go
 from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import RobustScaler
+from sklearn.model_selection import TimeSeriesSplit
+from lightgbm import LGBMRegressor
 from loguru import logger
 import threading
+import time
+from ta import add_all_ta_features  # 技术指标库
 # 配置日志
+logger.add("app.log", rotation="1 MB", level="DEBUG", backtrace=True, diagnose=True)
+def enhanced_feature_engineering(df):
+    """优化后的特征工程（包含技术指标）"""
+    try:
+        df = df.copy()
+        # 基础特征
+        df['Returns'] = df['Close'].pct_change()
+        df['Volatility'] = df['Returns'].rolling(5).std()
+        # 使用ta库快速添加技术指标
+        df = add_all_ta_features(
+            df,
+            open="Open", high="High", low="Low", close="Close", volume="Volume",
+            fillna=True
+        )
+        # 选择关键特征
+        selected_features = [
+            'Close', 'Returns', 'Volatility',
+            'trend_ema_fast', 'trend_ema_slow',
+            'momentum_rsi', 'volume_obv'
+        ]
+        return df[selected_features].dropna()
+    except Exception as e:
+        logger.error(f"Feature engineering failed: {str(e)}")
+        raise
+def robust_training(ticker):
+    """增强型训练流程（30秒超时保证）"""
     start_time = time.time()
     try:
+        # 获取数据（优化API参数）
         logger.info(f"Fetching data for {ticker}")
+        data = yf.download(
+            ticker,
+            period="1y",
+            interval="1d",
+            prepost=False,
+            threads=False,
+            progress=False
+        )
+        if data.empty or len(data) < 30:
+            raise ValueError("Insufficient data for training")
         # 特征工程
         logger.debug("Processing features")
+        data = enhanced_feature_engineering(data)
         # 准备训练数据
         X = data.drop(columns=['Close'])
         y = data['Close']
+        # 时间序列交叉验证
+        tscv = TimeSeriesSplit(n_splits=3)
+        # 轻量级模型管道
         model = make_pipeline(
+            RobustScaler(),
+            LGBMRegressor(
+                n_estimators=100,
+                max_depth=5,
+                learning_rate=0.1,
+                verbosity=-1,
+                force_row_wise=True
+            )
+        )
+        # 快速交叉验证
+        logger.info("Starting rapid training")
+        for train_index, _ in tscv.split(X):
+            X_train = X.iloc[train_index]
+            y_train = y.iloc[train_index]
+            model.fit(X_train, y_train)
+            if (time.time() - start_time) > 25:  # 保留5秒预测时间
+                break
+        # 生成预测
         logger.debug("Generating predictions")
         future_dates = pd.date_range(data.index[-1], periods=8)[1:]
+        # 使用最后有效特征生成预测
+        current_features = X.iloc[-1:].copy()
+        predictions = []
         for _ in range(7):
             pred = model.predict(current_features)[0]
             predictions.append(pred)
+            # 更新特征（简化逻辑）
+            current_features['Returns'] = (pred - current_features['Close']) / current_features['Close']
+            current_features['Close'] = pred
         training_time = time.time() - start_time
         logger.success(f"Training completed in {training_time:.2f}s")
         }
     except Exception as e:
+        logger.error(f"Training error: {str(e)}")
         return None
 def create_plot(result):
+    """增强型可视化"""
     data = result['data']
     pred = result['predictions']
     fig.add_trace(go.Scatter(
         x=data.index,
         y=data['Close'],
+        name='历史价格',
+        line=dict(color='#1f77b4')
     )
     # 预测价格
     fig.add_trace(go.Scatter(
         x=pred.index,
         y=pred.values,
+        name='AI预测',
+        line=dict(color='#ff7f0e', dash='dot')
     )
     fig.update_layout(
+        title=f"股价预测结果",
+        xaxis_title="日期",
+        yaxis_title="价格 (USD)",
         hovermode="x unified",
+        legend=dict(orientation="h", yanchor="bottom", y=1.02),
+        margin=dict(t=40, b=20),
+        template="plotly_white"
     )
     return fig
 def predict_stock(ticker):
+    """增强型预测流程"""
     start_time = time.time()
+    yield "⌛ 正在快速分析市场数据（预计30秒内完成）...", None, None
     result = None
+    error_msg = ""
+    def training_task():
         nonlocal result
+        try:
+            result = robust_training(ticker)
+        except Exception as e:
+            logger.error(f"Critical error: {str(e)}")
+    thread = threading.Thread(target=training_task)
     thread.start()
+    # 等待线程完成（最多30秒）
+    while thread.is_alive():
+        if (time.time() - start_time) > 30:
+            error_msg = "⏰ 系统响应超时，请简化查询条件后重试"
+            break
+        time.sleep(0.1)
+    if error_msg:
+        yield error_msg, None, None
         return
+    if not result or result['predictions'].empty:
+        yield "⚠️ 数据不足或股票代码无效，请尝试其他代码", None, None
         return
+    # 构建风险提示
+    risk_warning = """
+    **风险提示说明**
+    1. 本预测基于历史数据生成，不构成投资建议
+    2. 实际股价受市场环境、公司公告等多因素影响
+    3. 预测误差可能随市场波动增大
+    4. 过去表现不代表未来结果
+    最新预测仅供参考，请理性判断
+    """
+    # 格式化输出信息
+    time_used = f"{result['training_time']:.1f}秒"
+    latest_pred = f"{result['predictions'].iloc[-1]:.2f} USD"
+    info_box = f"""
+    ✅ 分析完成（耗时：{time_used}）
+    📅 最新预测日期：{result['predictions'].index[-1].strftime('%Y-%m-%d')}
+    💵 预测收盘价：{latest_pred}
+    """
+    yield info_box, create_plot(result), risk_warning
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📊 智能股票预测系统")
     with gr.Row():
+        with gr.Column(scale=2):
+            ticker_input = gr.Textbox(
+                label="输入股票代码",
+                placeholder="例如：AAPL (苹果), 00700.HK (腾讯)",
+                max_lines=1
+            )
+            submit_btn = gr.Button("开始分析", variant="primary")
+        with gr.Column(scale=3):
+            status_output = gr.Markdown(label="分析进度")
+            plot_output = gr.Plot(label="价格趋势")
+            risk_output = gr.Markdown()
     submit_btn.click(
         predict_stock,
         inputs=ticker_input,
+        outputs=[status_output, plot_output, risk_output]
     )
 if __name__ == "__main__":