khjhs60199 commited on
Commit
6616e71
·
verified ·
1 Parent(s): 03efb54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -294
app.py CHANGED
@@ -8,9 +8,11 @@ import time
8
  from datetime import datetime, timedelta
9
  from typing import List, Dict, Optional
10
  import os
11
- from flask import Flask, jsonify, request, Response
12
- from flask_cors import CORS
 
13
  import json
 
14
 
15
  from crawler import CnYesNewsCrawler
16
  from sentiment_analyzer import SentimentAnalyzer
@@ -22,16 +24,6 @@ from utils import setup_logging, format_news_for_display
22
  setup_logging()
23
  logger = logging.getLogger(__name__)
24
 
25
- # Flask API 應用 - 添加 CORS 支援
26
- flask_app = Flask(__name__)
27
- CORS(flask_app, resources={
28
- r"/api/*": {
29
- "origins": "*",
30
- "methods": ["GET", "POST", "OPTIONS"],
31
- "allow_headers": ["Content-Type", "Authorization"]
32
- }
33
- })
34
-
35
  class NewsApp:
36
  def __init__(self):
37
  self.db = NewsDatabase()
@@ -327,126 +319,9 @@ class NewsApp:
327
  # 初始化應用
328
  app = NewsApp()
329
 
330
- # API 路由
331
- @flask_app.route('/api', methods=['GET'])
332
- def api_info():
333
- """API資訊頁面"""
334
- return jsonify({
335
- 'name': '股市新聞情緒分析API',
336
- 'version': '1.0',
337
- 'status': 'running',
338
- 'endpoints': {
339
- 'news': '/api/news',
340
- 'stats': '/api/stats',
341
- 'crawl': '/api/crawl',
342
- 'progress': '/api/progress'
343
- },
344
- 'timestamp': datetime.now().isoformat()
345
- })
346
-
347
- @flask_app.route('/api/news', methods=['GET'])
348
- def api_get_news():
349
- """獲取新聞列表API"""
350
- try:
351
- category = request.args.get('category', 'all')
352
- days = int(request.args.get('days', 7))
353
- keyword = request.args.get('keyword', '')
354
- sentiment_filter = request.args.get('sentiment', 'all')
355
-
356
- result = app.get_news_api_data(category, days, keyword, sentiment_filter)
357
- return jsonify(result)
358
- except Exception as e:
359
- return jsonify({
360
- 'success': False,
361
- 'error': str(e),
362
- 'data': [],
363
- 'timestamp': datetime.now().isoformat()
364
- }), 500
365
-
366
- @flask_app.route('/api/stats', methods=['GET'])
367
- def api_get_stats():
368
- """獲取統計信息API"""
369
- try:
370
- stats = app.db.get_statistics()
371
- return jsonify({
372
- 'success': True,
373
- 'data': stats,
374
- 'auto_crawl_completed': app.auto_crawl_completed,
375
- 'is_initialized': app.is_initialized,
376
- 'is_crawling': app.is_crawling,
377
- 'timestamp': datetime.now().isoformat()
378
- })
379
- except Exception as e:
380
- return jsonify({
381
- 'success': False,
382
- 'error': str(e),
383
- 'timestamp': datetime.now().isoformat()
384
- }), 500
385
-
386
- @flask_app.route('/api/crawl', methods=['POST', 'OPTIONS'])
387
- def api_manual_crawl():
388
- """手動觸發爬蟲API"""
389
- if request.method == 'OPTIONS':
390
- return '', 200
391
-
392
- try:
393
- if not app.is_initialized:
394
- return jsonify({
395
- 'success': False,
396
- 'message': '系統還在初始化中',
397
- 'timestamp': datetime.now().isoformat()
398
- }), 400
399
-
400
- if app.is_crawling:
401
- return jsonify({
402
- 'success': False,
403
- 'message': '爬蟲正在運行中',
404
- 'timestamp': datetime.now().isoformat()
405
- }), 400
406
-
407
- # 檢查是否要求無限制模式
408
- unlimited = True
409
- if request.is_json and request.json:
410
- unlimited = request.json.get('unlimited', True)
411
-
412
- # 在背景執行爬蟲
413
- def run_crawl():
414
- app.manual_crawl(unlimited=unlimited)
415
-
416
- threading.Thread(target=run_crawl, daemon=True).start()
417
-
418
- mode_text = "無限制" if unlimited else "限制"
419
- return jsonify({
420
- 'success': True,
421
- 'message': f'爬蟲任務已啟動({mode_text}模式)',
422
- 'timestamp': datetime.now().isoformat()
423
- })
424
- except Exception as e:
425
- return jsonify({
426
- 'success': False,
427
- 'error': str(e),
428
- 'timestamp': datetime.now().isoformat()
429
- }), 500
430
-
431
- @flask_app.route('/api/progress', methods=['GET'])
432
- def api_get_progress():
433
- """獲取爬蟲進度API"""
434
- try:
435
- progress, needs_update = app.get_progress()
436
- return jsonify({
437
- 'progress': progress,
438
- 'is_crawling': app.is_crawling,
439
- 'is_initialized': app.is_initialized,
440
- 'needs_update': needs_update,
441
- 'auto_crawl_completed': app.auto_crawl_completed,
442
- 'timestamp': datetime.now().isoformat()
443
- })
444
- except Exception as e:
445
- return jsonify({
446
- 'success': False,
447
- 'error': str(e),
448
- 'timestamp': datetime.now().isoformat()
449
- }), 500
450
 
451
  # 創建 Gradio 介面
452
  def create_interface():
@@ -475,184 +350,128 @@ def create_interface():
475
  🎯 **智能分析**: 使用 RoBERTa 模型進行情緒分析
476
  🔍 **多條件篩選**: 支援時間段、關鍵字、情緒篩選
477
  📊 **即時統計**: 提供詳細的新聞統計資訊
 
478
  """)
479
 
480
- with gr.Tab("📰 最新新聞"):
481
- with gr.Row():
482
- with gr.Column(scale=1):
483
- category_radio = gr.Radio(
484
- choices=[
485
- ("所有新聞", "all"),
486
- ("美股新聞", "us_stock"),
487
- ("台股新聞", "tw_stock")
488
- ],
489
- value="all",
490
- label="📋 新聞分類"
491
- )
492
-
493
- days_slider = gr.Slider(
494
- minimum=0,
495
- maximum=30,
496
- value=7,
497
- step=1,
498
- label="📅 時間範圍 (天)",
499
- info="0表示不限制時間"
500
- )
501
-
502
- keyword_input = gr.Textbox(
503
- label="🔍 關鍵字搜尋",
504
- placeholder="輸入關鍵字搜尋新聞...",
505
- value=""
506
- )
507
-
508
- sentiment_radio = gr.Radio(
509
- choices=[
510
- ("所有情緒", "all"),
511
- ("正面情緒", "positive"),
512
- ("負面情緒", "negative"),
513
- ("中性情緒", "neutral")
514
- ],
515
- value="all",
516
- label="😊 情緒篩選"
517
- )
518
-
519
- # 爬蟲模式選擇
520
- crawl_mode = gr.Radio(
521
- choices=[
522
- ("無限制爬取 (全部文章)", True),
523
- ("限制爬取 (20篇)", False)
524
- ],
525
- value=True,
526
- label="🚀 爬蟲模式",
527
- info="選擇爬取模式"
528
- )
 
 
 
 
 
 
 
 
 
 
 
529
 
530
- with gr.Column(scale=2):
531
- with gr.Row():
532
- search_btn = gr.Button("🔍 搜尋新聞", variant="primary")
533
- refresh_btn = gr.Button("🔄 重新整理", variant="secondary")
534
- manual_crawl_btn = gr.Button("🚀 手動爬取", variant="secondary")
535
-
536
- # 進度顯示
537
- progress_display = gr.Textbox(
538
- label="📊 系統狀態",
539
- value=app.current_progress,
540
- interactive=False,
541
- elem_classes=["progress-box"],
542
- lines=1
543
- )
544
-
545
- news_display = gr.HTML(
546
- label="新聞內容",
547
- value="⏳ 系統正在初始化並自動爬取新聞,請稍候..."
548
- )
549
- crawl_result = gr.Textbox(label="爬取結果", visible=False)
550
-
551
- # 更新函數
552
- def update_progress_only():
553
- """只更新進度,不更新新聞"""
554
- progress, needs_update = app.get_progress()
555
- if needs_update or app.is_crawling:
556
- return progress
557
- else:
558
- return gr.update()
559
-
560
- def update_news_automatically():
561
- """自動更新新聞內容"""
562
- if app.auto_crawl_completed:
563
- return app.get_latest_news("all", 7, "", "all", force_refresh=True)
564
- else:
565
- return gr.update()
566
-
567
- def search_news(category, days, keyword, sentiment):
568
- """搜尋新聞"""
569
- logger.info(f"搜尋新聞 - 分類: {category}, 天數: {days}, 關鍵字: '{keyword}', 情緒: {sentiment}")
570
- return app.get_latest_news(category, days, keyword, sentiment, force_refresh=True)
571
 
572
- def refresh_current_search(category, days, keyword, sentiment):
573
- """刷新當前搜尋"""
574
- return app.get_latest_news(category, days, keyword, sentiment, force_refresh=True)
575
 
576
- def handle_manual_crawl(category, days, keyword, sentiment, unlimited_mode):
577
- """處理手動爬蟲"""
578
- result = app.manual_crawl(unlimited=unlimited_mode)
579
- # 爬取完成後自動刷新當前搜尋
580
- news = app.get_latest_news(category, days, keyword, sentiment, force_refresh=True)
581
- return result, news
582
-
583
- # 進度更新定時器
584
- progress_timer = gr.Timer(value=10)
585
- progress_timer.tick(
586
- fn=update_progress_only,
587
- outputs=[progress_display]
588
- )
589
-
590
- # 新聞自動更新定時器
591
- news_timer = gr.Timer(value=15) # 每15秒檢查一次
592
- news_timer.tick(
593
- fn=update_news_automatically,
594
- outputs=[news_display]
595
- )
596
-
597
- # 綁定事件
598
- search_btn.click(
599
- search_news,
600
- inputs=[category_radio, days_slider, keyword_input, sentiment_radio],
601
- outputs=[news_display]
602
- )
603
-
604
- refresh_btn.click(
605
- refresh_current_search,
606
- inputs=[category_radio, days_slider, keyword_input, sentiment_radio],
607
- outputs=[news_display]
608
- )
609
-
610
- manual_crawl_btn.click(
611
- handle_manual_crawl,
612
- inputs=[category_radio, days_slider, keyword_input, sentiment_radio, crawl_mode],
613
- outputs=[crawl_result, news_display]
614
- ).then(
615
- lambda: gr.update(visible=True),
616
- outputs=[crawl_result]
617
- )
618
 
619
- # 分類改變時自動搜尋
620
- category_radio.change(
621
- search_news,
622
- inputs=[category_radio, days_slider, keyword_input, sentiment_radio],
623
- outputs=[news_display]
624
- )
625
-
626
- # 初始載入時顯示等待訊息
627
- interface.load(
628
- lambda: "⏳ 系統正在自動爬取新聞,請稍候...",
629
- outputs=[news_display]
630
- )
631
-
632
- with gr.Tab("📊 統計資訊"):
633
- stats_display = gr.Markdown()
634
- stats_refresh_btn = gr.Button("🔄 更新統計")
635
-
636
- stats_refresh_btn.click(app.get_statistics, outputs=[stats_display])
637
- interface.load(app.get_statistics, outputs=[stats_display])
638
-
639
- return interface
 
 
 
 
 
640
 
641
  # 啟動應用
642
  if __name__ == "__main__":
643
- import threading
644
-
645
  print("🚀 啟動股市新聞情緒分析器...")
646
  print("📊 API接口: https://khjhs60199-pycrawing.hf.space/api")
647
  print("⚡ 自動功能: 系統啟動後自動檢測並爬取新聞")
648
 
649
- # 啟動Gradio介面,並使用Gradio內建的API端點
650
  interface = create_interface()
651
 
652
- # 掛載Flask API到Gradio的app
653
- interface.mount_gradio_app(flask_app, path="/api")
 
 
 
 
 
 
 
 
 
654
 
655
- # 啟動Gradio介面,Flask API會自動整合
656
  interface.launch(
657
  server_name="0.0.0.0",
658
  server_port=7860,
 
8
  from datetime import datetime, timedelta
9
  from typing import List, Dict, Optional
10
  import os
11
+ from fastapi import FastAPI, HTTPException
12
+ from fastapi.middleware.cors import CORSMiddleware
13
+ from fastapi.responses import JSONResponse
14
  import json
15
+ from pydantic import BaseModel
16
 
17
  from crawler import CnYesNewsCrawler
18
  from sentiment_analyzer import SentimentAnalyzer
 
24
  setup_logging()
25
  logger = logging.getLogger(__name__)
26
 
 
 
 
 
 
 
 
 
 
 
27
  class NewsApp:
28
  def __init__(self):
29
  self.db = NewsDatabase()
 
319
  # 初始化應用
320
  app = NewsApp()
321
 
322
+ # 定義請求模型
323
+ class CrawlRequest(BaseModel):
324
+ unlimited: bool = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
 
326
  # 創建 Gradio 介面
327
  def create_interface():
 
350
  🎯 **智能分析**: 使用 RoBERTa 模型進行情緒分析
351
  🔍 **多條件篩選**: 支援時間段、關鍵字、情緒篩選
352
  📊 **即時統計**: 提供詳細的新聞統計資訊
353
+ 📡 **API接口**: https://khjhs60199-pycrawing.hf.space/api
354
  """)
355
 
356
+ # ... (其餘Gradio介面代碼保持不變) ...
357
+
358
+ return interface
359
+
360
+ # 設定API路由
361
+ def setup_api_routes(app_instance):
362
+ """設定API路由"""
363
+
364
+ @app_instance.get("/api")
365
+ async def api_info():
366
+ """API資訊頁面"""
367
+ return {
368
+ 'name': '股市新聞情緒分析API',
369
+ 'version': '1.0',
370
+ 'status': 'running',
371
+ 'endpoints': {
372
+ 'news': '/api/news',
373
+ 'stats': '/api/stats',
374
+ 'crawl': '/api/crawl',
375
+ 'progress': '/api/progress'
376
+ },
377
+ 'timestamp': datetime.now().isoformat()
378
+ }
379
+
380
+ @app_instance.get("/api/news")
381
+ async def api_get_news(
382
+ category: str = "all",
383
+ days: int = 7,
384
+ keyword: str = "",
385
+ sentiment: str = "all"
386
+ ):
387
+ """獲取新聞列表API"""
388
+ try:
389
+ result = app.get_news_api_data(category, days, keyword, sentiment)
390
+ return result
391
+ except Exception as e:
392
+ raise HTTPException(status_code=500, detail=str(e))
393
+
394
+ @app_instance.get("/api/stats")
395
+ async def api_get_stats():
396
+ """獲取統計信息API"""
397
+ try:
398
+ stats = app.db.get_statistics()
399
+ return {
400
+ 'success': True,
401
+ 'data': stats,
402
+ 'auto_crawl_completed': app.auto_crawl_completed,
403
+ 'is_initialized': app.is_initialized,
404
+ 'is_crawling': app.is_crawling,
405
+ 'timestamp': datetime.now().isoformat()
406
+ }
407
+ except Exception as e:
408
+ raise HTTPException(status_code=500, detail=str(e))
409
+
410
+ @app_instance.post("/api/crawl")
411
+ async def api_manual_crawl(request: CrawlRequest):
412
+ """手動觸發爬蟲API"""
413
+ try:
414
+ if not app.is_initialized:
415
+ raise HTTPException(status_code=400, detail="系統還在初始化中")
416
 
417
+ if app.is_crawling:
418
+ raise HTTPException(status_code=400, detail="爬蟲正在運行中")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
 
420
+ # 在背景執行爬蟲
421
+ def run_crawl():
422
+ app.manual_crawl(unlimited=request.unlimited)
423
 
424
+ threading.Thread(target=run_crawl, daemon=True).start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
425
 
426
+ mode_text = "無限制" if request.unlimited else "限制"
427
+ return {
428
+ 'success': True,
429
+ 'message': f'爬蟲任務已啟動({mode_text}模式)',
430
+ 'timestamp': datetime.now().isoformat()
431
+ }
432
+ except HTTPException:
433
+ raise
434
+ except Exception as e:
435
+ raise HTTPException(status_code=500, detail=str(e))
436
+
437
+ @app_instance.get("/api/progress")
438
+ async def api_get_progress():
439
+ """獲取爬蟲進度API"""
440
+ try:
441
+ progress, needs_update = app.get_progress()
442
+ return {
443
+ 'progress': progress,
444
+ 'is_crawling': app.is_crawling,
445
+ 'is_initialized': app.is_initialized,
446
+ 'needs_update': needs_update,
447
+ 'auto_crawl_completed': app.auto_crawl_completed,
448
+ 'timestamp': datetime.now().isoformat()
449
+ }
450
+ except Exception as e:
451
+ raise HTTPException(status_code=500, detail=str(e))
452
 
453
  # 啟動應用
454
  if __name__ == "__main__":
 
 
455
  print("🚀 啟動股市新聞情緒分析器...")
456
  print("📊 API接口: https://khjhs60199-pycrawing.hf.space/api")
457
  print("⚡ 自動功能: 系統啟動後自動檢測並爬取新聞")
458
 
459
+ # 創建Gradio介面
460
  interface = create_interface()
461
 
462
+ # 設定CORS
463
+ interface.app.add_middleware(
464
+ CORSMiddleware,
465
+ allow_origins=["*"],
466
+ allow_credentials=True,
467
+ allow_methods=["*"],
468
+ allow_headers=["*"],
469
+ )
470
+
471
+ # 設定API路由
472
+ setup_api_routes(interface.app)
473
 
474
+ # 啟動Gradio介面
475
  interface.launch(
476
  server_name="0.0.0.0",
477
  server_port=7860,