Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| from transformers import pipeline | |
| import pandas as pd | |
| from datetime import datetime, timedelta, timezone | |
| import calendar | |
| # --- Imports for Yahoo Scraper --- | |
| import feedparser | |
| from urllib.parse import quote | |
| from bs4 import BeautifulSoup | |
| # --- 1. การตั้งค่าหน้าและ API Key --- | |
| st.set_page_config(page_title="📰 News Theme Analysis", layout="wide") | |
| API_KEY = "88bc396d4eab4be494a4b86ec842db47" | |
| NEWS_API_URL = "https://newsapi.org/v2/everything" | |
| # --- 2. โหลดโมเดล (อัปเกรดแล้ว) --- | |
| def load_model(): | |
| try: | |
| print("Loading model: MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli") | |
| classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli") | |
| print("Model loaded successfully!") | |
| return classifier | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| return None | |
| classifier = load_model() | |
| theme_labels = ["Earnings & Finance", "Product & Innovation", "Legal & Regulation", "Partnership & Deals", "Stock Price Movement", "Executive Changes"] | |
| # --- 3. ฟังก์ชันดึงข่าว (วิธีที่ 1: NewsAPI) --- | |
| def fetch_news_from_api(topic, api_key): | |
| """ | |
| ดึงข่าวโดยใช้ NewsAPI.org (Reuters, Bloomberg...) | |
| """ | |
| headers = {'X-Api-Key': api_key} | |
| from_date = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d') | |
| trusted_sources = "reuters,bloomberg,the-wall-street-journal,marketwatch,cnbc" | |
| params = { | |
| 'q': topic, | |
| 'sortBy': 'publishedAt', | |
| 'pageSize': 20, | |
| 'from': from_date, | |
| 'sources': trusted_sources | |
| } | |
| try: | |
| response = requests.get(NEWS_API_URL, headers=headers, params=params) | |
| response.raise_for_status() | |
| articles = response.json().get('articles', []) | |
| news_list = [] | |
| for article in articles: | |
| if article.get('title') == "[Removed]": | |
| continue | |
| news_list.append({ | |
| 'title': article.get('title'), | |
| 'summary': article.get('description', 'No description available.'), | |
| 'link': article.get('url'), | |
| 'published': article.get('publishedAt') | |
| }) | |
| return news_list | |
| except Exception as e: | |
| st.error(f"NewsAPI Error: {e}") | |
| return [] | |
| # --- 4. ฟังก์ชันดึงข่าว (วิธีที่ 2: Yahoo Finance RSS) --- | |
| def _parse_feed(url, max_articles=10): | |
| """ | |
| Helper function for Yahoo Scraper | |
| """ | |
| news_list = [] | |
| try: | |
| cutoff_date_utc = datetime.now(timezone.utc) - timedelta(days=7) | |
| feed = feedparser.parse(url, agent='Mozilla/5.0') | |
| for entry in feed.entries: | |
| published_struct = entry.get('published_parsed') | |
| if not published_struct: | |
| continue | |
| try: | |
| article_timestamp = calendar.timegm(published_struct) | |
| article_date_utc = datetime.fromtimestamp(article_timestamp, tz=timezone.utc) | |
| except Exception: | |
| continue | |
| if article_date_utc >= cutoff_date_utc: | |
| raw_summary_html = entry.get('summary', '') | |
| soup = BeautifulSoup(raw_summary_html, 'html.parser') | |
| clean_summary = soup.get_text() | |
| news_list.append({ | |
| 'title': entry.get('title', 'No title'), | |
| 'link': entry.get('link', '').split('&url=')[-1], | |
| 'summary': clean_summary[:300], | |
| 'published': article_date_utc.isoformat() | |
| }) | |
| if len(news_list) >= max_articles: | |
| break | |
| except Exception as e: | |
| print(f"Feedparser Error: {e}") | |
| return news_list | |
| def search_yahoo_news(keyword, max_articles=10): | |
| """ | |
| ดึงข่าวจาก Yahoo Finance (ผ่าน Google RSS) | |
| """ | |
| try: | |
| safe_keyword = quote(keyword) | |
| url = f"https://news.google.com/rss/search?q={safe_keyword}+site:finance.yahoo.com&hl=en-US&gl=US&ceid=US:en" | |
| return _parse_feed(url, max_articles) | |
| except Exception as e: | |
| st.error(f"Yahoo Search Error: {e}") | |
| return [] | |
| # --- 5. ฟังก์ชันวิเคราะห์ --- | |
| def analyze_themes(news_list, model, labels): | |
| results = [] | |
| if not model: | |
| st.error("Model failed to load. Cannot analyze themes.") | |
| return pd.DataFrame() | |
| for news in news_list: | |
| title = news.get('title', '') | |
| if not title: | |
| continue | |
| try: | |
| analysis = model(title, candidate_labels=labels) | |
| news['theme'] = analysis['labels'][0] | |
| news['confidence'] = analysis['scores'][0] | |
| results.append(news) | |
| except Exception as e: | |
| print(f"Error analyzing title '{title}': {e}") | |
| return pd.DataFrame(results) | |
| # --- 6. UI --- | |
| st.title("📰 News Theme Analyzer") | |
| st.markdown("Analyzes news from trusted sources (Reuters, Bloomberg, Yahoo...)") | |
| topic = st.text_input("Enter a keyword or stock symbol:", "NVIDIA") | |
| if st.button("Analyze News"): | |
| if not topic: | |
| st.warning("Please enter a topic.") | |
| elif not classifier: | |
| st.error("Model is not available. Please check logs.") | |
| else: | |
| with st.spinner(f"Fetching and analyzing news for '{topic}'..."): | |
| news_api = fetch_news_from_api(topic, API_KEY) | |
| news_yahoo = search_yahoo_news(topic) | |
| all_news = news_api + news_yahoo | |
| seen_titles = set() | |
| unique_news_items = [] | |
| for item in all_news: | |
| if item['title'] not in seen_titles: | |
| unique_news_items.append(item) | |
| seen_titles.add(item['title']) | |
| if unique_news_items: | |
| df = analyze_themes(unique_news_items, classifier, theme_labels) | |
| if df.empty: | |
| st.warning("Analysis failed, though news was fetched. Check logs.") | |
| else: | |
| st.subheader("📊 Theme Distribution Summary") | |
| theme_counts = df['theme'].value_counts() | |
| total_articles = len(df) | |
| summary_data = [] | |
| for theme, count in theme_counts.items(): | |
| percentage = (count / total_articles) * 100 | |
| summary_data.append({ | |
| "Theme": theme, | |
| "Percentage": f"{percentage:.1f}%", | |
| "Articles": f"{count} / {total_articles}" | |
| }) | |
| st.dataframe(pd.DataFrame(summary_data), use_container_width=True) | |
| st.markdown("---") | |
| st.subheader(f"📰 Analysis Results (Found {len(df)} unique articles)") | |
| for index, row in df.iterrows(): | |
| st.markdown(f"### [{row['title']}]({row['link']})") | |
| try: | |
| date_str = pd.to_datetime(row['published']).strftime("%Y-%m-%d %H:%M") | |
| except: | |
| date_str = "N/A" | |
| st.markdown(f"**Theme:** {row['theme']} (Confidence: {row['confidence']:.2f}) \n**Published:** {date_str}") | |
| st.markdown(f"**Summary:** {row['summary']}") | |
| st.markdown("---") | |
| else: | |
| st.error("No news found from any source.") |