Spaces:
Sleeping
Sleeping
| import logging | |
| import re | |
| from datetime import datetime | |
| from typing import List, Dict | |
| import html | |
| def setup_logging(): | |
| """設置日誌系統""" | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.StreamHandler(), | |
| logging.FileHandler('news_app.log', encoding='utf-8') | |
| ] | |
| ) | |
| def format_news_for_display(news_data: List[Dict]) -> str: | |
| """格式化新聞用於顯示""" | |
| if not news_data: | |
| return "📰 暫無新聞資料" | |
| html_content = """ | |
| <div style="max-width: 100%; font-family: Arial, sans-serif;"> | |
| """ | |
| for news in news_data: | |
| # 獲取情緒和對應的樣式 | |
| sentiment = news.get('sentiment', 'neutral') | |
| sentiment_class = f"news-{sentiment}" | |
| # 情緒徽章 | |
| sentiment_badges = { | |
| 'positive': '<span class="sentiment-badge positive-badge">正面 😊</span>', | |
| 'negative': '<span class="sentiment-badge negative-badge">負面 😔</span>', | |
| 'neutral': '<span class="sentiment-badge neutral-badge">中性 😐</span>' | |
| } | |
| sentiment_badge = sentiment_badges.get(sentiment, sentiment_badges['neutral']) | |
| # 格式化發布時間 | |
| published_date = news.get('published_date', '') | |
| if isinstance(published_date, str): | |
| try: | |
| dt = datetime.fromisoformat(published_date.replace('Z', '+00:00')) | |
| formatted_date = dt.strftime('%Y-%m-%d %H:%M') | |
| except: | |
| formatted_date = published_date | |
| else: | |
| formatted_date = str(published_date) | |
| # 清理和截斷內容 | |
| title = html.escape(news.get('title', '')) | |
| content = html.escape(news.get('content', '')) | |
| url = news.get('url', '') | |
| source = html.escape(news.get('source', '')) | |
| category_name = {'us_stock': '美股', 'tw_stock': '台股'}.get(news.get('category', ''), '財經') | |
| # 截斷長內容 | |
| if len(content) > 300: | |
| content = content[:300] + "..." | |
| # 新聞卡片HTML | |
| news_card = f""" | |
| <div class="news-card {sentiment_class}" style="margin-bottom: 20px; padding: 15px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);"> | |
| <div style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 10px;"> | |
| <h3 style="margin: 0; font-size: 18px; color: #333; flex: 1; margin-right: 10px;"> | |
| <a href="{url}" target="_blank" style="color: #333; text-decoration: none;">{title}</a> | |
| </h3> | |
| {sentiment_badge} | |
| </div> | |
| <div style="margin-bottom: 10px; color: #666; font-size: 14px;"> | |
| <span style="background: #007bff; color: white; padding: 2px 6px; border-radius: 4px; font-size: 12px; margin-right: 8px;">{category_name}</span> | |
| <span>{source}</span> | |
| <span style="margin-left: 8px;">📅 {formatted_date}</span> | |
| </div> | |
| <p style="margin: 10px 0; color: #555; line-height: 1.6;">{content}</p> | |
| <div style="margin-top: 10px; text-align: right;"> | |
| <a href="{url}" target="_blank" style="color: #007bff; text-decoration: none; font-size: 14px;">閱讀全文 →</a> | |
| </div> | |
| </div> | |
| """ | |
| html_content += news_card | |
| html_content += "</div>" | |
| return html_content | |
| def clean_text(text: str) -> str: | |
| """清理文本""" | |
| if not text: | |
| return "" | |
| # 移除HTML標籤 | |
| text = re.sub(r'<[^>]+>', '', text) | |
| # 移除多餘空格 | |
| text = re.sub(r'\s+', ' ', text) | |
| # 移除特殊字符 | |
| text = re.sub(r'[^\u4e00-\u9fff\u3400-\u4dbf\w\s.,!?()(),。!?]', '', text) | |
| return text.strip() | |
| def calculate_similarity(text1: str, text2: str) -> float: | |
| """計算兩個文本的相似度""" | |
| if not text1 or not text2: | |
| return 0.0 | |
| # 簡單的詞彙相似度計算 | |
| words1 = set(text1.lower().split()) | |
| words2 = set(text2.lower().split()) | |
| if not words1 or not words2: | |
| return 0.0 | |
| intersection = words1.intersection(words2) | |
| union = words1.union(words2) | |
| return len(intersection) / len(union) if union else 0.0 | |
| def validate_url(url: str) -> bool: | |
| """驗證URL格式""" | |
| url_pattern = re.compile( | |
| r'^https?://' # http:// or https:// | |
| r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain... | |
| r'localhost|' # localhost... | |
| r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip | |
| r'(?::\d+)?' # optional port | |
| r'(?:/?|[/?]\S+)$', re.IGNORECASE) | |
| return url_pattern.match(url) is not None | |
| def format_duration(seconds: float) -> str: | |
| """格式化時間長度""" | |
| if seconds < 60: | |
| return f"{seconds:.1f}秒" | |
| elif seconds < 3600: | |
| minutes = seconds / 60 | |
| return f"{minutes:.1f}分鐘" | |
| else: | |
| hours = seconds / 3600 | |
| return f"{hours:.1f}小時" | |
| def truncate_text(text: str, max_length: int = 100) -> str: | |
| """截斷文本""" | |
| if not text: | |
| return "" | |
| if len(text) <= max_length: | |
| return text | |
| return text[:max_length].rsplit(' ', 1)[0] + "..." |