Spaces:

khjhs60199
/

pyCrawing

Sleeping

App Files Files Community

khjhs60199 commited on Sep 17, 2025

Commit

deb1a9b

verified ·

1 Parent(s): 6616e71

Update app.py

Browse files

Files changed (1) hide show

app.py +270 -125

app.py CHANGED Viewed

@@ -8,11 +8,8 @@ import time
 from datetime import datetime, timedelta
 from typing import List, Dict, Optional
 import os
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import JSONResponse
 import json
-from pydantic import BaseModel
 from crawler import CnYesNewsCrawler
 from sentiment_analyzer import SentimentAnalyzer
@@ -24,6 +21,9 @@ from utils import setup_logging, format_news_for_display
 setup_logging()
 logger = logging.getLogger(__name__)
 class NewsApp:
     def __init__(self):
         self.db = NewsDatabase()
@@ -37,7 +37,7 @@ class NewsApp:
         self.current_progress = "正在初始化系統..."
         self.is_crawling = False
         self.is_initialized = False
-        self.auto_crawl_completed = False
         # 上次新聞更新時間，用於防止無意義的刷新
         self.last_news_update = 0
@@ -74,7 +74,7 @@ class NewsApp:
                 self.update_progress("系統初始化完成，開始自動爬取新聞...")
                 logger.info("所有組件初始化完成")
-                # 自動執行第一次爬取
                 self._auto_initial_crawl()
             except Exception as e:
@@ -90,6 +90,7 @@ class NewsApp:
         """自動執行初始爬取"""
         def auto_crawl_task():
             try:
                 time.sleep(3)
                 self.update_progress("🚀 自動開始首次爬取...")
@@ -98,7 +99,7 @@ class NewsApp:
                 # 檢查資料庫是否已有最近的新聞
                 recent_news = self.db.get_recent_news(category="all", days=1)
-                if len(recent_news) < 10:
                     self.update_progress("📊 檢測到新聞數量較少，開始自動爬取...")
                     results = self.crawler.crawl_all_categories(unlimited=True)
@@ -120,6 +121,7 @@ class NewsApp:
             finally:
                 self.is_crawling = False
         auto_crawl_thread = threading.Thread(target=auto_crawl_task, daemon=True)
         auto_crawl_thread.start()
@@ -133,20 +135,24 @@ class NewsApp:
     def get_progress(self) -> tuple:
         """獲取當前進度和是否需要更新"""
         current_time = time.time()
-        needs_update = (current_time - self.last_progress_update) < 5
         return self.current_progress, needs_update
     def get_latest_news(self, category: str = "all", days: int = 7,
                        keyword: str = "", sentiment_filter: str = "all",
                        force_refresh: bool = False) -> str:
-        """獲取最新新聞並格式化顯示"""
         try:
             current_time = time.time()
             if not force_refresh and (current_time - self.last_news_update) < 5:
                 pass
             self.last_news_update = current_time
             logger.info(f"獲取新聞 - 分類: {category}, 天數: {days}, 關鍵字: '{keyword}', 情緒: {sentiment_filter}")
             news_data = self.db.get_recent_news(
@@ -157,6 +163,7 @@ class NewsApp:
             )
             if not news_data:
                 if not self.auto_crawl_completed:
                     return "⏳ 系統正在自動爬取新聞，請稍候..."
@@ -173,6 +180,7 @@ class NewsApp:
                 filter_text = "、".join(filter_desc) if filter_desc else "所有條件"
                 return f"📰 暫無符合條件的新聞資料 ({filter_text})，請調整篩選條件或執行爬蟲任務"
             filter_parts = []
             if category != "all":
                 filter_parts.append(self._get_category_name(category))
@@ -217,7 +225,7 @@ class NewsApp:
         return sentiment_names.get(sentiment, sentiment)
     def manual_crawl(self, unlimited: bool = True) -> str:
-        """手動觸發爬蟲"""
         if not self.is_initialized:
             return "⚠️ 系統還在初始化���，請稍後再試"
@@ -230,6 +238,7 @@ class NewsApp:
             self.update_progress(f"🚀 手動爬蟲開始（{mode_text}模式）")
             results = self.crawler.crawl_all_categories(unlimited=unlimited)
             total_articles = sum(len(articles) for articles in results.values())
@@ -250,6 +259,7 @@ class NewsApp:
         try:
             stats = self.db.get_statistics()
             auto_status = "✅ 已完成" if self.auto_crawl_completed else "⏳ 進行中" if self.is_crawling else "⚠️ 未執行"
             return f"""
@@ -303,8 +313,7 @@ class NewsApp:
                 'success': True,
                 'count': len(api_data),
                 'data': api_data,
-                'auto_crawl_completed': self.auto_crawl_completed,
-                'timestamp': datetime.now().isoformat()
             }
         except Exception as e:
@@ -312,16 +321,89 @@ class NewsApp:
             return {
                 'success': False,
                 'error': str(e),
-                'data': [],
-                'timestamp': datetime.now().isoformat()
             }
 # 初始化應用
 app = NewsApp()
-# 定義請求模型
-class CrawlRequest(BaseModel):
-    unlimited: bool = True
 # 創建 Gradio 介面
 def create_interface():
@@ -350,128 +432,191 @@ def create_interface():
         🎯 **智能分析**: 使用 RoBERTa 模型進行情緒分析
         🔍 **多條件篩選**: 支援時間段、關鍵字、情緒篩選
         📊 **即時統計**: 提供詳細的新聞統計資訊
-        📡 **API接口**: https://khjhs60199-pycrawing.hf.space/api
         """)
-        # ... (其餘Gradio介面代碼保持不變) ...
-    return interface
-# 設定API路由
-def setup_api_routes(app_instance):
-    """設定API路由"""
-    @app_instance.get("/api")
-    async def api_info():
-        """API資訊頁面"""
-        return {
-            'name': '股市新聞情緒分析API',
-            'version': '1.0',
-            'status': 'running',
-            'endpoints': {
-                'news': '/api/news',
-                'stats': '/api/stats',
-                'crawl': '/api/crawl',
-                'progress': '/api/progress'
-            },
-            'timestamp': datetime.now().isoformat()
-        }
-    @app_instance.get("/api/news")
-    async def api_get_news(
-        category: str = "all",
-        days: int = 7,
-        keyword: str = "",
-        sentiment: str = "all"
-    ):
-        """獲取新聞列表API"""
-        try:
-            result = app.get_news_api_data(category, days, keyword, sentiment)
-            return result
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=str(e))
-    @app_instance.get("/api/stats")
-    async def api_get_stats():
-        """獲取統計信息API"""
-        try:
-            stats = app.db.get_statistics()
-            return {
-                'success': True,
-                'data': stats,
-                'auto_crawl_completed': app.auto_crawl_completed,
-                'is_initialized': app.is_initialized,
-                'is_crawling': app.is_crawling,
-                'timestamp': datetime.now().isoformat()
-            }
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=str(e))
-    @app_instance.post("/api/crawl")
-    async def api_manual_crawl(request: CrawlRequest):
-        """手動觸發爬蟲API"""
-        try:
-            if not app.is_initialized:
-                raise HTTPException(status_code=400, detail="系統還在初始化中")
-            if app.is_crawling:
-                raise HTTPException(status_code=400, detail="爬蟲正在運行中")
-            # 在背景執行爬蟲
-            def run_crawl():
-                app.manual_crawl(unlimited=request.unlimited)
-            threading.Thread(target=run_crawl, daemon=True).start()
-            mode_text = "無限制" if request.unlimited else "限制"
-            return {
-                'success': True,
-                'message': f'爬蟲任務已啟動（{mode_text}模式）',
-                'timestamp': datetime.now().isoformat()
-            }
-        except HTTPException:
-            raise
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=str(e))
-    @app_instance.get("/api/progress")
-    async def api_get_progress():
-        """獲取爬蟲進度API"""
-        try:
-            progress, needs_update = app.get_progress()
-            return {
-                'progress': progress,
-                'is_crawling': app.is_crawling,
-                'is_initialized': app.is_initialized,
-                'needs_update': needs_update,
-                'auto_crawl_completed': app.auto_crawl_completed,
-                'timestamp': datetime.now().isoformat()
-            }
-        except Exception as e:
-            raise HTTPException(status_code=500, detail=str(e))
 # 啟動應用
 if __name__ == "__main__":
-    print("🚀 啟動股市新聞情緒分析器...")
-    print("📊 API接口: https://khjhs60199-pycrawing.hf.space/api")
-    print("⚡ 自動功能: 系統啟動後自動檢測並爬取新聞")
-    # 創建Gradio介面
-    interface = create_interface()
-    # 設定CORS
-    interface.app.add_middleware(
-        CORSMiddleware,
-        allow_origins=["*"],
-        allow_credentials=True,
-        allow_methods=["*"],
-        allow_headers=["*"],
-    )
-    # 設定API路由
-    setup_api_routes(interface.app)
     # 啟動Gradio介面
     interface.launch(
         server_name="0.0.0.0",
         server_port=7860,

 from datetime import datetime, timedelta
 from typing import List, Dict, Optional
 import os
+from flask import Flask, jsonify, request
 import json
 from crawler import CnYesNewsCrawler
 from sentiment_analyzer import SentimentAnalyzer
 setup_logging()
 logger = logging.getLogger(__name__)
+# Flask API 應用
+flask_app = Flask(__name__)
 class NewsApp:
     def __init__(self):
         self.db = NewsDatabase()
         self.current_progress = "正在初始化系統..."
         self.is_crawling = False
         self.is_initialized = False
+        self.auto_crawl_completed = False  # 新增：追蹤自動爬取是否完成
         # 上次新聞更新時間，用於防止無意義的刷新
         self.last_news_update = 0
                 self.update_progress("系統初始化完成，開始自動爬取新聞...")
                 logger.info("所有組件初始化完成")
+                # **新增：自動執行第一次爬取**
                 self._auto_initial_crawl()
             except Exception as e:
         """自動執行初始爬取"""
         def auto_crawl_task():
             try:
+                # 等待一小段時間確保系統完全就緒
                 time.sleep(3)
                 self.update_progress("🚀 自動開始首次爬取...")
                 # 檢查資料庫是否已有最近的新聞
                 recent_news = self.db.get_recent_news(category="all", days=1)
+                if len(recent_news) < 10:  # 如果最近1天的新聞少於10篇，就執行爬取
                     self.update_progress("📊 檢測到新聞數量較少，開始自動爬取...")
                     results = self.crawler.crawl_all_categories(unlimited=True)
             finally:
                 self.is_crawling = False
+        # 在獨立線程中執行自動爬取
         auto_crawl_thread = threading.Thread(target=auto_crawl_task, daemon=True)
         auto_crawl_thread.start()
     def get_progress(self) -> tuple:
         """獲取當前進度和是否需要更新"""
         current_time = time.time()
+        # 只有在進度真的有更新時才返回新內容
+        needs_update = (current_time - self.last_progress_update) < 5  # 5秒內的更新才顯示
         return self.current_progress, needs_update
     def get_latest_news(self, category: str = "all", days: int = 7,
                        keyword: str = "", sentiment_filter: str = "all",
                        force_refresh: bool = False) -> str:
+        """獲取最新新聞並格式化顯示 - 增強版"""
         try:
+            # 檢查是否需要刷新（避免無意義的閃爍）
             current_time = time.time()
             if not force_refresh and (current_time - self.last_news_update) < 5:
+                # 5秒內不重複查詢，除非強制刷新
                 pass
             self.last_news_update = current_time
+            # 記錄查詢參數
             logger.info(f"獲取新聞 - 分類: {category}, 天數: {days}, 關鍵字: '{keyword}', 情緒: {sentiment_filter}")
             news_data = self.db.get_recent_news(
             )
             if not news_data:
+                # 如果沒有新聞且系統剛初始化，顯示等待訊息
                 if not self.auto_crawl_completed:
                     return "⏳ 系統正在自動爬取新聞，請稍候..."
                 filter_text = "、".join(filter_desc) if filter_desc else "所有條件"
                 return f"📰 暫無符合條件的新聞資料 ({filter_text})，請調整篩選條件或執行爬蟲任務"
+            # 添加查詢結果標題
             filter_parts = []
             if category != "all":
                 filter_parts.append(self._get_category_name(category))
         return sentiment_names.get(sentiment, sentiment)
     def manual_crawl(self, unlimited: bool = True) -> str:
+        """手動觸發爬蟲 - 支援無限制模式"""
         if not self.is_initialized:
             return "⚠️ 系統還在初始化���，請稍後再試"
             self.update_progress(f"🚀 手動爬蟲開始（{mode_text}模式）")
+            # **關鍵修正：使用unlimited參數而非max_articles_per_category**
             results = self.crawler.crawl_all_categories(unlimited=unlimited)
             total_articles = sum(len(articles) for articles in results.values())
         try:
             stats = self.db.get_statistics()
+            # 新增自動爬取狀態
             auto_status = "✅ 已完成" if self.auto_crawl_completed else "⏳ 進行中" if self.is_crawling else "⚠️ 未執行"
             return f"""
                 'success': True,
                 'count': len(api_data),
                 'data': api_data,
+                'auto_crawl_completed': self.auto_crawl_completed
             }
         except Exception as e:
             return {
                 'success': False,
                 'error': str(e),
+                'data': []
             }
 # 初始化應用
 app = NewsApp()
+# API 路由
+@flask_app.route('/api/news', methods=['GET'])
+def api_get_news():
+    """獲取新聞列表API - 增強版"""
+    category = request.args.get('category', 'all')
+    days = int(request.args.get('days', 7))
+    keyword = request.args.get('keyword', '')
+    sentiment_filter = request.args.get('sentiment', 'all')
+    result = app.get_news_api_data(category, days, keyword, sentiment_filter)
+    return jsonify(result)
+@flask_app.route('/api/stats', methods=['GET'])
+def api_get_stats():
+    """獲取統計信息API"""
+    try:
+        stats = app.db.get_statistics()
+        return jsonify({
+            'success': True,
+            'data': stats,
+            'auto_crawl_completed': app.auto_crawl_completed,
+            'is_initialized': app.is_initialized,
+            'is_crawling': app.is_crawling
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'error': str(e)
+        })
+@flask_app.route('/api/crawl', methods=['POST'])
+def api_manual_crawl():
+    """手動觸發爬蟲API"""
+    try:
+        if not app.is_initialized:
+            return jsonify({
+                'success': False,
+                'message': '系統還在初始化中'
+            })
+        if app.is_crawling:
+            return jsonify({
+                'success': False,
+                'message': '爬蟲正在運行中'
+            })
+        # 檢查是否要求無限制模式
+        unlimited = request.json.get('unlimited', True) if request.json else True
+        # 在背景執行爬蟲
+        def run_crawl():
+            app.manual_crawl(unlimited=unlimited)
+        threading.Thread(target=run_crawl, daemon=True).start()
+        mode_text = "無限制" if unlimited else "限制"
+        return jsonify({
+            'success': True,
+            'message': f'爬蟲任務已啟動（{mode_text}模式）'
+        })
+    except Exception as e:
+        return jsonify({
+            'success': False,
+            'error': str(e)
+        })
+@flask_app.route('/api/progress', methods=['GET'])
+def api_get_progress():
+    """獲取爬蟲進度API"""
+    progress, needs_update = app.get_progress()
+    return jsonify({
+        'progress': progress,
+        'is_crawling': app.is_crawling,
+        'is_initialized': app.is_initialized,
+        'needs_update': needs_update,
+        'auto_crawl_completed': app.auto_crawl_completed
+    })
 # 創建 Gradio 介面
 def create_interface():
         🎯 **智能分析**: 使用 RoBERTa 模型進行情緒分析
         🔍 **多條件篩選**: 支援時間段、關鍵字、情緒篩選
         📊 **即時統計**: 提供詳細的新聞統計資訊
         """)
+        with gr.Tab("📰 最新新聞"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    category_radio = gr.Radio(
+                        choices=[
+                            ("所有新聞", "all"),
+                            ("美股新聞", "us_stock"),
+                            ("台股新聞", "tw_stock")
+                        ],
+                        value="all",
+                        label="📋 新聞分類"
+                    )
+                    days_slider = gr.Slider(
+                        minimum=0,
+                        maximum=30,
+                        value=7,
+                        step=1,
+                        label="📅 時間範圍 (天)",
+                        info="0表示不限制時間"
+                    )
+                    keyword_input = gr.Textbox(
+                        label="🔍 關鍵字搜尋",
+                        placeholder="輸入關鍵字搜尋新聞...",
+                        value=""
+                    )
+                    sentiment_radio = gr.Radio(
+                        choices=[
+                            ("所有情緒", "all"),
+                            ("正面情緒", "positive"),
+                            ("負面情緒", "negative"),
+                            ("中性情緒", "neutral")
+                        ],
+                        value="all",
+                        label="😊 情緒篩選"
+                    )
+                    # 爬蟲模式選擇
+                    crawl_mode = gr.Radio(
+                        choices=[
+                            ("無限制爬取 (全部文章)", True),
+                            ("限制爬取 (20篇)", False)
+                        ],
+                        value=True,
+                        label="🚀 爬蟲模式",
+                        info="選擇爬取模式"
+                    )
+                with gr.Column(scale=2):
+                    with gr.Row():
+                        search_btn = gr.Button("🔍 搜尋新聞", variant="primary")
+                        refresh_btn = gr.Button("🔄 重新整理", variant="secondary")
+                        manual_crawl_btn = gr.Button("🚀 手動爬取", variant="secondary")
+                    # 進度顯示
+                    progress_display = gr.Textbox(
+                        label="📊 系統狀態",
+                        value=app.current_progress,
+                        interactive=False,
+                        elem_classes=["progress-box"],
+                        lines=1
+                    )
+            news_display = gr.HTML(
+                label="新聞內容",
+                value="⏳ 系統正在初始化並自動爬取新聞，請稍候..."
+            )
+            crawl_result = gr.Textbox(label="爬取結果", visible=False)
+            # 更新函數
+            def update_progress_only():
+                """只更新進度，不更新新聞"""
+                progress, needs_update = app.get_progress()
+                if needs_update or app.is_crawling:
+                    return progress
+                else:
+                    return gr.update()
+            def update_news_automatically():
+                """自動更新新聞內容"""
+                if app.auto_crawl_completed:
+                    return app.get_latest_news("all", 7, "", "all", force_refresh=True)
+                else:
+                    return gr.update()
+            def search_news(category, days, keyword, sentiment):
+                """搜尋新聞"""
+                logger.info(f"搜尋新聞 - 分類: {category}, 天數: {days}, 關鍵字: '{keyword}', 情緒: {sentiment}")
+                return app.get_latest_news(category, days, keyword, sentiment, force_refresh=True)
+            def refresh_current_search(category, days, keyword, sentiment):
+                """刷新當前搜尋"""
+                return app.get_latest_news(category, days, keyword, sentiment, force_refresh=True)
+            def handle_manual_crawl(category, days, keyword, sentiment, unlimited_mode):
+                """處理手動爬蟲"""
+                result = app.manual_crawl(unlimited=unlimited_mode)
+                # 爬取完成後自動刷新當前搜尋
+                news = app.get_latest_news(category, days, keyword, sentiment, force_refresh=True)
+                return result, news
+            # 進度更新定時器
+            progress_timer = gr.Timer(value=10)
+            progress_timer.tick(
+                fn=update_progress_only,
+                outputs=[progress_display]
+            )
+            # 新聞自動更新定時器
+            news_timer = gr.Timer(value=15)  # 每15秒檢查一次
+            news_timer.tick(
+                fn=update_news_automatically,
+                outputs=[news_display]
+            )
+            # 綁定事件
+            search_btn.click(
+                search_news,
+                inputs=[category_radio, days_slider, keyword_input, sentiment_radio],
+                outputs=[news_display]
+            )
+            refresh_btn.click(
+                refresh_current_search,
+                inputs=[category_radio, days_slider, keyword_input, sentiment_radio],
+                outputs=[news_display]
+            )
+            manual_crawl_btn.click(
+                handle_manual_crawl,
+                inputs=[category_radio, days_slider, keyword_input, sentiment_radio, crawl_mode],
+                outputs=[crawl_result, news_display]
+            ).then(
+                lambda: gr.update(visible=True),
+                outputs=[crawl_result]
+            )
+            # 分類改變時自動搜尋
+            category_radio.change(
+                search_news,
+                inputs=[category_radio, days_slider, keyword_input, sentiment_radio],
+                outputs=[news_display]
+            )
+            # 初始載入時顯示等待訊息
+            interface.load(
+                lambda: "⏳ 系統正在自動爬取新聞，請稍候...",
+                outputs=[news_display]
+            )
+        with gr.Tab("📊 統計資訊"):
+            stats_display = gr.Markdown()
+            stats_refresh_btn = gr.Button("🔄 更新統計")
+            stats_refresh_btn.click(app.get_statistics, outputs=[stats_display])
+            interface.load(app.get_statistics, outputs=[stats_display])
+        # 只保留兩個分頁：最新新聞 和 統計資訊
+        # 移除了 "🔌 API接口" 和 "ℹ️ 關於" 分頁
+    return interface
 # 啟動應用
 if __name__ == "__main__":
+    import threading
+    # 在背景啟動Flask API
+    def run_flask():
+        flask_app.run(host='127.0.0.1', port=5000, debug=False)
+    flask_thread = threading.Thread(target=run_flask, daemon=True)
+    flask_thread.start()
+    print("🚀 啟動股市新聞情緒分析器（自動版）...")
+    print("📊 網頁介面: http://localhost:7860")
+    print("🔒 API接口: http://127.0.0.1:5000 (僅限本機存取)")
+    print("⚡ 自動功能: 系統啟動後自動檢測並爬取新聞")
+    print("💡 特色: 無需手動設定，啟動即可使用")
     # 啟動Gradio介面
+    interface = create_interface()
     interface.launch(
         server_name="0.0.0.0",
         server_port=7860,