File size: 5,478 Bytes
ec6ea02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import logging
import re
from datetime import datetime
from typing import List, Dict
import html

def setup_logging():
    """設置日誌系統"""
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        handlers=[
            logging.StreamHandler(),
            logging.FileHandler('news_app.log', encoding='utf-8')
        ]
    )

def format_news_for_display(news_data: List[Dict]) -> str:
    """格式化新聞用於顯示"""
    if not news_data:
        return "📰 暫無新聞資料"
    
    html_content = """
    <div style="max-width: 100%; font-family: Arial, sans-serif;">
    """
    
    for news in news_data:
        # 獲取情緒和對應的樣式
        sentiment = news.get('sentiment', 'neutral')
        sentiment_class = f"news-{sentiment}"
        
        # 情緒徽章
        sentiment_badges = {
            'positive': '<span class="sentiment-badge positive-badge">正面 😊</span>',
            'negative': '<span class="sentiment-badge negative-badge">負面 😔</span>',
            'neutral': '<span class="sentiment-badge neutral-badge">中性 😐</span>'
        }
        
        sentiment_badge = sentiment_badges.get(sentiment, sentiment_badges['neutral'])
        
        # 格式化發布時間
        published_date = news.get('published_date', '')
        if isinstance(published_date, str):
            try:
                dt = datetime.fromisoformat(published_date.replace('Z', '+00:00'))
                formatted_date = dt.strftime('%Y-%m-%d %H:%M')
            except:
                formatted_date = published_date
        else:
            formatted_date = str(published_date)
        
        # 清理和截斷內容
        title = html.escape(news.get('title', ''))
        content = html.escape(news.get('content', ''))
        url = news.get('url', '')
        source = html.escape(news.get('source', ''))
        category_name = {'us_stock': '美股', 'tw_stock': '台股'}.get(news.get('category', ''), '財經')
        
        # 截斷長內容
        if len(content) > 300:
            content = content[:300] + "..."
        
        # 新聞卡片HTML
        news_card = f"""
        <div class="news-card {sentiment_class}" style="margin-bottom: 20px; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
            <div style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 10px;">
                <h3 style="margin: 0; font-size: 18px; color: #333; flex: 1; margin-right: 10px;">
                    <a href="{url}" target="_blank" style="color: #333; text-decoration: none;">{title}</a>
                </h3>
                {sentiment_badge}
            </div>
            
            <div style="margin-bottom: 10px; color: #666; font-size: 14px;">
                <span style="background: #007bff; color: white; padding: 2px 6px; border-radius: 4px; font-size: 12px; margin-right: 8px;">{category_name}</span>
                <span>{source}</span>
                <span style="margin-left: 8px;">📅 {formatted_date}</span>
            </div>
            
            <p style="margin: 10px 0; color: #555; line-height: 1.6;">{content}</p>
            
            <div style="margin-top: 10px; text-align: right;">
                <a href="{url}" target="_blank" style="color: #007bff; text-decoration: none; font-size: 14px;">閱讀全文 →</a>
            </div>
        </div>
        """
        
        html_content += news_card
    
    html_content += "</div>"
    
    return html_content

def clean_text(text: str) -> str:
    """清理文本"""
    if not text:
        return ""
    
    # 移除HTML標籤
    text = re.sub(r'<[^>]+>', '', text)
    
    # 移除多餘空格
    text = re.sub(r'\s+', ' ', text)
    
    # 移除特殊字符
    text = re.sub(r'[^\u4e00-\u9fff\u3400-\u4dbf\w\s.,!?()(),。!?]', '', text)
    
    return text.strip()

def calculate_similarity(text1: str, text2: str) -> float:
    """計算兩個文本的相似度"""
    if not text1 or not text2:
        return 0.0
    
    # 簡單的詞彙相似度計算
    words1 = set(text1.lower().split())
    words2 = set(text2.lower().split())
    
    if not words1 or not words2:
        return 0.0
    
    intersection = words1.intersection(words2)
    union = words1.union(words2)
    
    return len(intersection) / len(union) if union else 0.0

def validate_url(url: str) -> bool:
    """驗證URL格式"""
    url_pattern = re.compile(
        r'^https?://'  # http:// or https://
        r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|'  # domain...
        r'localhost|'  # localhost...
        r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'  # ...or ip
        r'(?::\d+)?'  # optional port
        r'(?:/?|[/?]\S+)$', re.IGNORECASE)
    
    return url_pattern.match(url) is not None

def format_duration(seconds: float) -> str:
    """格式化時間長度"""
    if seconds < 60:
        return f"{seconds:.1f}秒"
    elif seconds < 3600:
        minutes = seconds / 60
        return f"{minutes:.1f}分鐘"
    else:
        hours = seconds / 3600
        return f"{hours:.1f}小時"

def truncate_text(text: str, max_length: int = 100) -> str:
    """截斷文本"""
    if not text:
        return ""
    
    if len(text) <= max_length:
        return text
    
    return text[:max_length].rsplit(' ', 1)[0] + "..."