Spaces:

khjhs60199
/

pyCrawing

Sleeping

App Files Files Community

khjhs60199 commited on Sep 17, 2025

Commit

6616e71

verified ·

1 Parent(s): 03efb54

Update app.py

Browse files

Files changed (1) hide show

app.py +113 -294

app.py CHANGED Viewed

@@ -8,9 +8,11 @@ import time
 from datetime import datetime, timedelta
 from typing import List, Dict, Optional
 import os
-from flask import Flask, jsonify, request, Response
-from flask_cors import CORS
 import json
 from crawler import CnYesNewsCrawler
 from sentiment_analyzer import SentimentAnalyzer
@@ -22,16 +24,6 @@ from utils import setup_logging, format_news_for_display
 setup_logging()
 logger = logging.getLogger(__name__)
-# Flask API 應用 - 添加 CORS 支援
-flask_app = Flask(__name__)
-CORS(flask_app, resources={
-    r"/api/*": {
-        "origins": "*",
-        "methods": ["GET", "POST", "OPTIONS"],
-        "allow_headers": ["Content-Type", "Authorization"]
-    }
-})
 class NewsApp:
     def __init__(self):
         self.db = NewsDatabase()
@@ -327,126 +319,9 @@ class NewsApp:
 # 初始化應用
 app = NewsApp()
-# API 路由
-@flask_app.route('/api', methods=['GET'])
-def api_info():
-    """API資訊頁面"""
-    return jsonify({
-        'name': '股市新聞情緒分析API',
-        'version': '1.0',
-        'status': 'running',
-        'endpoints': {
-            'news': '/api/news',
-            'stats': '/api/stats',
-            'crawl': '/api/crawl',
-            'progress': '/api/progress'
-        },
-        'timestamp': datetime.now().isoformat()
-    })
-@flask_app.route('/api/news', methods=['GET'])
-def api_get_news():
-    """獲取新聞列表API"""
-    try:
-        category = request.args.get('category', 'all')
-        days = int(request.args.get('days', 7))
-        keyword = request.args.get('keyword', '')
-        sentiment_filter = request.args.get('sentiment', 'all')
-        result = app.get_news_api_data(category, days, keyword, sentiment_filter)
-        return jsonify(result)
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'error': str(e),
-            'data': [],
-            'timestamp': datetime.now().isoformat()
-        }), 500
-@flask_app.route('/api/stats', methods=['GET'])
-def api_get_stats():
-    """獲取統計信息API"""
-    try:
-        stats = app.db.get_statistics()
-        return jsonify({
-            'success': True,
-            'data': stats,
-            'auto_crawl_completed': app.auto_crawl_completed,
-            'is_initialized': app.is_initialized,
-            'is_crawling': app.is_crawling,
-            'timestamp': datetime.now().isoformat()
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'error': str(e),
-            'timestamp': datetime.now().isoformat()
-        }), 500
-@flask_app.route('/api/crawl', methods=['POST', 'OPTIONS'])
-def api_manual_crawl():
-    """手動觸發爬蟲API"""
-    if request.method == 'OPTIONS':
-        return '', 200
-    try:
-        if not app.is_initialized:
-            return jsonify({
-                'success': False,
-                'message': '系統還在初始化中',
-                'timestamp': datetime.now().isoformat()
-            }), 400
-        if app.is_crawling:
-            return jsonify({
-                'success': False,
-                'message': '爬蟲正在運行中',
-                'timestamp': datetime.now().isoformat()
-            }), 400
-        # 檢查是否要求無限制模式
-        unlimited = True
-        if request.is_json and request.json:
-            unlimited = request.json.get('unlimited', True)
-        # 在背景執行爬蟲
-        def run_crawl():
-            app.manual_crawl(unlimited=unlimited)
-        threading.Thread(target=run_crawl, daemon=True).start()
-        mode_text = "無限制" if unlimited else "限制"
-        return jsonify({
-            'success': True,
-            'message': f'爬蟲任務已啟動（{mode_text}模式）',
-            'timestamp': datetime.now().isoformat()
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'error': str(e),
-            'timestamp': datetime.now().isoformat()
-        }), 500
-@flask_app.route('/api/progress', methods=['GET'])
-def api_get_progress():
-    """獲取爬蟲進度API"""
-    try:
-        progress, needs_update = app.get_progress()
-        return jsonify({
-            'progress': progress,
-            'is_crawling': app.is_crawling,
-            'is_initialized': app.is_initialized,
-            'needs_update': needs_update,
-            'auto_crawl_completed': app.auto_crawl_completed,
-            'timestamp': datetime.now().isoformat()
-        })
-    except Exception as e:
-        return jsonify({
-            'success': False,
-            'error': str(e),
-            'timestamp': datetime.now().isoformat()
-        }), 500
 # 創建 Gradio 介面
 def create_interface():
@@ -475,184 +350,128 @@ def create_interface():
         🎯 **智能分析**: 使用 RoBERTa 模型進行情緒分析
         🔍 **多條件篩選**: 支援時間段、關鍵字、情緒篩選
         📊 **即時統計**: 提供詳細的新聞統計資訊
         """)
-        with gr.Tab("📰 最新新聞"):
-            with gr.Row():
-                with gr.Column(scale=1):
-                    category_radio = gr.Radio(
-                        choices=[
-                            ("所有新聞", "all"),
-                            ("美股新聞", "us_stock"),
-                            ("台股新聞", "tw_stock")
-                        ],
-                        value="all",
-                        label="📋 新聞分類"
-                    )
-                    days_slider = gr.Slider(
-                        minimum=0,
-                        maximum=30,
-                        value=7,
-                        step=1,
-                        label="📅 時間範圍 (天)",
-                        info="0表示不限制時間"
-                    )
-                    keyword_input = gr.Textbox(
-                        label="🔍 關鍵字搜尋",
-                        placeholder="輸入關鍵字搜尋新聞...",
-                        value=""
-                    )
-                    sentiment_radio = gr.Radio(
-                        choices=[
-                            ("所有情緒", "all"),
-                            ("正面情緒", "positive"),
-                            ("負面情緒", "negative"),
-                            ("中性情緒", "neutral")
-                        ],
-                        value="all",
-                        label="😊 情緒篩選"
-                    )
-                    # 爬蟲模式選擇
-                    crawl_mode = gr.Radio(
-                        choices=[
-                            ("無限制爬取 (全部文章)", True),
-                            ("限制爬取 (20篇)", False)
-                        ],
-                        value=True,
-                        label="🚀 爬蟲模式",
-                        info="選擇爬取模式"
-                    )
-                with gr.Column(scale=2):
-                    with gr.Row():
-                        search_btn = gr.Button("🔍 搜尋新聞", variant="primary")
-                        refresh_btn = gr.Button("🔄 重新整理", variant="secondary")
-                        manual_crawl_btn = gr.Button("🚀 手動爬取", variant="secondary")
-                    # 進度顯示
-                    progress_display = gr.Textbox(
-                        label="📊 系統狀態",
-                        value=app.current_progress,
-                        interactive=False,
-                        elem_classes=["progress-box"],
-                        lines=1
-                    )
-            news_display = gr.HTML(
-                label="新聞內容",
-                value="⏳ 系統正在初始化並自動爬取新聞，請稍候..."
-            )
-            crawl_result = gr.Textbox(label="爬取結果", visible=False)
-            # 更新函數
-            def update_progress_only():
-                """只更新進度，不更新新聞"""
-                progress, needs_update = app.get_progress()
-                if needs_update or app.is_crawling:
-                    return progress
-                else:
-                    return gr.update()
-            def update_news_automatically():
-                """自動更新新聞內容"""
-                if app.auto_crawl_completed:
-                    return app.get_latest_news("all", 7, "", "all", force_refresh=True)
-                else:
-                    return gr.update()
-            def search_news(category, days, keyword, sentiment):
-                """搜尋新聞"""
-                logger.info(f"搜尋新聞 - 分類: {category}, 天數: {days}, 關鍵字: '{keyword}', 情緒: {sentiment}")
-                return app.get_latest_news(category, days, keyword, sentiment, force_refresh=True)
-            def refresh_current_search(category, days, keyword, sentiment):
-                """刷新當前搜尋"""
-                return app.get_latest_news(category, days, keyword, sentiment, force_refresh=True)
-            def handle_manual_crawl(category, days, keyword, sentiment, unlimited_mode):
-                """處理手動爬蟲"""
-                result = app.manual_crawl(unlimited=unlimited_mode)
-                # 爬取完成後自動刷新當前搜尋
-                news = app.get_latest_news(category, days, keyword, sentiment, force_refresh=True)
-                return result, news
-            # 進度更新定時器
-            progress_timer = gr.Timer(value=10)
-            progress_timer.tick(
-                fn=update_progress_only,
-                outputs=[progress_display]
-            )
-            # 新聞自動更新定時器
-            news_timer = gr.Timer(value=15)  # 每15秒檢查一次
-            news_timer.tick(
-                fn=update_news_automatically,
-                outputs=[news_display]
-            )
-            # 綁定事件
-            search_btn.click(
-                search_news,
-                inputs=[category_radio, days_slider, keyword_input, sentiment_radio],
-                outputs=[news_display]
-            )
-            refresh_btn.click(
-                refresh_current_search,
-                inputs=[category_radio, days_slider, keyword_input, sentiment_radio],
-                outputs=[news_display]
-            )
-            manual_crawl_btn.click(
-                handle_manual_crawl,
-                inputs=[category_radio, days_slider, keyword_input, sentiment_radio, crawl_mode],
-                outputs=[crawl_result, news_display]
-            ).then(
-                lambda: gr.update(visible=True),
-                outputs=[crawl_result]
-            )
-            # 分類改變時自動搜尋
-            category_radio.change(
-                search_news,
-                inputs=[category_radio, days_slider, keyword_input, sentiment_radio],
-                outputs=[news_display]
-            )
-            # 初始載入時顯示等待訊息
-            interface.load(
-                lambda: "⏳ 系統正在自動爬取新聞，請稍候...",
-                outputs=[news_display]
-            )
-        with gr.Tab("📊 統計資訊"):
-            stats_display = gr.Markdown()
-            stats_refresh_btn = gr.Button("🔄 更新統計")
-            stats_refresh_btn.click(app.get_statistics, outputs=[stats_display])
-            interface.load(app.get_statistics, outputs=[stats_display])
-    return interface
 # 啟動應用
 if __name__ == "__main__":
-    import threading
     print("🚀 啟動股市新聞情緒分析器...")
     print("📊 API接口: https://khjhs60199-pycrawing.hf.space/api")
     print("⚡ 自動功能: 系統啟動後自動檢測並爬取新聞")
-    # 啟動Gradio介面，並使用Gradio內建的API端點
     interface = create_interface()
-    # 掛載Flask API到Gradio的app
-    interface.mount_gradio_app(flask_app, path="/api")
-    # 只啟動Gradio介面，Flask API會自動整合
     interface.launch(
         server_name="0.0.0.0",
         server_port=7860,

 from datetime import datetime, timedelta
 from typing import List, Dict, Optional
 import os
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
 import json
+from pydantic import BaseModel
 from crawler import CnYesNewsCrawler
 from sentiment_analyzer import SentimentAnalyzer
 setup_logging()
 logger = logging.getLogger(__name__)
 class NewsApp:
     def __init__(self):
         self.db = NewsDatabase()
 # 初始化應用
 app = NewsApp()
+# 定義請求模型
+class CrawlRequest(BaseModel):
+    unlimited: bool = True
 # 創建 Gradio 介面
 def create_interface():
         🎯 **智能分析**: 使用 RoBERTa 模型進行情緒分析
         🔍 **多條件篩選**: 支援時間段、關鍵字、情緒篩選
         📊 **即時統計**: 提供詳細的新聞統計資訊
+        📡 **API接口**: https://khjhs60199-pycrawing.hf.space/api
         """)
+        # ... (其餘Gradio介面代碼保持不變) ...
+    return interface
+# 設定API路由
+def setup_api_routes(app_instance):
+    """設定API路由"""
+    @app_instance.get("/api")
+    async def api_info():
+        """API資訊頁面"""
+        return {
+            'name': '股市新聞情緒分析API',
+            'version': '1.0',
+            'status': 'running',
+            'endpoints': {
+                'news': '/api/news',
+                'stats': '/api/stats',
+                'crawl': '/api/crawl',
+                'progress': '/api/progress'
+            },
+            'timestamp': datetime.now().isoformat()
+        }
+    @app_instance.get("/api/news")
+    async def api_get_news(
+        category: str = "all",
+        days: int = 7,
+        keyword: str = "",
+        sentiment: str = "all"
+    ):
+        """獲取新聞列表API"""
+        try:
+            result = app.get_news_api_data(category, days, keyword, sentiment)
+            return result
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
+    @app_instance.get("/api/stats")
+    async def api_get_stats():
+        """獲取統計信息API"""
+        try:
+            stats = app.db.get_statistics()
+            return {
+                'success': True,
+                'data': stats,
+                'auto_crawl_completed': app.auto_crawl_completed,
+                'is_initialized': app.is_initialized,
+                'is_crawling': app.is_crawling,
+                'timestamp': datetime.now().isoformat()
+            }
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
+    @app_instance.post("/api/crawl")
+    async def api_manual_crawl(request: CrawlRequest):
+        """手動觸發爬蟲API"""
+        try:
+            if not app.is_initialized:
+                raise HTTPException(status_code=400, detail="系統還在初始化中")
+            if app.is_crawling:
+                raise HTTPException(status_code=400, detail="爬蟲正在運行中")
+            # 在背景執行爬蟲
+            def run_crawl():
+                app.manual_crawl(unlimited=request.unlimited)
+            threading.Thread(target=run_crawl, daemon=True).start()
+            mode_text = "無限制" if request.unlimited else "限制"
+            return {
+                'success': True,
+                'message': f'爬蟲任務已啟動（{mode_text}模式）',
+                'timestamp': datetime.now().isoformat()
+            }
+        except HTTPException:
+            raise
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
+    @app_instance.get("/api/progress")
+    async def api_get_progress():
+        """獲取爬蟲進度API"""
+        try:
+            progress, needs_update = app.get_progress()
+            return {
+                'progress': progress,
+                'is_crawling': app.is_crawling,
+                'is_initialized': app.is_initialized,
+                'needs_update': needs_update,
+                'auto_crawl_completed': app.auto_crawl_completed,
+                'timestamp': datetime.now().isoformat()
+            }
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
 # 啟動應用
 if __name__ == "__main__":
     print("🚀 啟動股市新聞情緒分析器...")
     print("📊 API接口: https://khjhs60199-pycrawing.hf.space/api")
     print("⚡ 自動功能: 系統啟動後自動檢測並爬取新聞")
+    # 創建Gradio介面
     interface = create_interface()
+    # 設定CORS
+    interface.app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_credentials=True,
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    # 設定API路由
+    setup_api_routes(interface.app)
+    # 啟動Gradio介面
     interface.launch(
         server_name="0.0.0.0",
         server_port=7860,