Spaces:

MM748-Z01
/

Theme_Analyze-2

Sleeping

App Files Files Community

Theme_Analyze-2 / app.py

Sooteemon

Update app.py

cc35bd9 verified 5 months ago

raw

history blame contribute delete

7.85 kB

	import streamlit as st
	import requests
	from transformers import pipeline
	import pandas as pd
	from datetime import datetime, timedelta, timezone
	import calendar

	# --- Imports for Yahoo Scraper ---
	import feedparser
	from urllib.parse import quote
	from bs4 import BeautifulSoup

	# --- 1. การตั้งค่าหน้าและ API Key ---
	st.set_page_config(page_title="📰 News Theme Analysis", layout="wide")

	API_KEY = "88bc396d4eab4be494a4b86ec842db47"
	NEWS_API_URL = "https://newsapi.org/v2/everything"

	# --- 2. โหลดโมเดล (อัปเกรดแล้ว) ---
	@st.cache_resource
	def load_model():
	try:
	print("Loading model: MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli")
	classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli")
	print("Model loaded successfully!")
	return classifier
	except Exception as e:
	print(f"Error loading model: {e}")
	return None

	classifier = load_model()
	theme_labels = ["Earnings & Finance", "Product & Innovation", "Legal & Regulation", "Partnership & Deals", "Stock Price Movement", "Executive Changes"]

	# --- 3. ฟังก์ชันดึงข่าว (วิธีที่ 1: NewsAPI) ---
	@st.cache_data(ttl=3600)
	def fetch_news_from_api(topic, api_key):
	"""
	ดึงข่าวโดยใช้ NewsAPI.org (Reuters, Bloomberg...)
	"""
	headers = {'X-Api-Key': api_key}
	from_date = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d')
	trusted_sources = "reuters,bloomberg,the-wall-street-journal,marketwatch,cnbc"

	params = {
	'q': topic,
	'sortBy': 'publishedAt',
	'pageSize': 20,
	'from': from_date,
	'sources': trusted_sources
	}

	try:
	response = requests.get(NEWS_API_URL, headers=headers, params=params)
	response.raise_for_status()
	articles = response.json().get('articles', [])

	news_list = []
	for article in articles:
	if article.get('title') == "[Removed]":
	continue
	news_list.append({
	'title': article.get('title'),
	'summary': article.get('description', 'No description available.'),
	'link': article.get('url'),
	'published': article.get('publishedAt')
	})
	return news_list
	except Exception as e:
	st.error(f"NewsAPI Error: {e}")
	return []

	# --- 4. ฟังก์ชันดึงข่าว (วิธีที่ 2: Yahoo Finance RSS) ---
	def _parse_feed(url, max_articles=10):
	"""
	Helper function for Yahoo Scraper
	"""
	news_list = []
	try:
	cutoff_date_utc = datetime.now(timezone.utc) - timedelta(days=7)
	feed = feedparser.parse(url, agent='Mozilla/5.0')

	for entry in feed.entries:
	published_struct = entry.get('published_parsed')
	if not published_struct:
	continue

	try:
	article_timestamp = calendar.timegm(published_struct)
	article_date_utc = datetime.fromtimestamp(article_timestamp, tz=timezone.utc)
	except Exception:
	continue

	if article_date_utc >= cutoff_date_utc:
	raw_summary_html = entry.get('summary', '')
	soup = BeautifulSoup(raw_summary_html, 'html.parser')
	clean_summary = soup.get_text()

	news_list.append({
	'title': entry.get('title', 'No title'),
	'link': entry.get('link', '').split('&url=')[-1],
	'summary': clean_summary[:300],
	'published': article_date_utc.isoformat()
	})
	if len(news_list) >= max_articles:
	break
	except Exception as e:
	print(f"Feedparser Error: {e}")
	return news_list

	@st.cache_data(ttl=3600)
	def search_yahoo_news(keyword, max_articles=10):
	"""
	ดึงข่าวจาก Yahoo Finance (ผ่าน Google RSS)
	"""
	try:
	safe_keyword = quote(keyword)
	url = f"https://news.google.com/rss/search?q={safe_keyword}+site:finance.yahoo.com&hl=en-US&gl=US&ceid=US:en"
	return _parse_feed(url, max_articles)
	except Exception as e:
	st.error(f"Yahoo Search Error: {e}")
	return []

	# --- 5. ฟังก์ชันวิเคราะห์ ---
	def analyze_themes(news_list, model, labels):
	results = []
	if not model:
	st.error("Model failed to load. Cannot analyze themes.")
	return pd.DataFrame()

	for news in news_list:
	title = news.get('title', '')
	if not title:
	continue
	try:
	analysis = model(title, candidate_labels=labels)
	news['theme'] = analysis['labels'][0]
	news['confidence'] = analysis['scores'][0]
	results.append(news)
	except Exception as e:
	print(f"Error analyzing title '{title}': {e}")

	return pd.DataFrame(results)

	# --- 6. UI ---
	st.title("📰 News Theme Analyzer")
	st.markdown("Analyzes news from trusted sources (Reuters, Bloomberg, Yahoo...)")

	topic = st.text_input("Enter a keyword or stock symbol:", "NVIDIA")

	if st.button("Analyze News"):
	if not topic:
	st.warning("Please enter a topic.")
	elif not classifier:
	st.error("Model is not available. Please check logs.")
	else:
	with st.spinner(f"Fetching and analyzing news for '{topic}'..."):

	news_api = fetch_news_from_api(topic, API_KEY)
	news_yahoo = search_yahoo_news(topic)

	all_news = news_api + news_yahoo
	seen_titles = set()
	unique_news_items = []

	for item in all_news:
	if item['title'] not in seen_titles:
	unique_news_items.append(item)
	seen_titles.add(item['title'])

	if unique_news_items:
	df = analyze_themes(unique_news_items, classifier, theme_labels)

	if df.empty:
	st.warning("Analysis failed, though news was fetched. Check logs.")
	else:
	st.subheader("📊 Theme Distribution Summary")
	theme_counts = df['theme'].value_counts()
	total_articles = len(df)

	summary_data = []
	for theme, count in theme_counts.items():
	percentage = (count / total_articles) * 100
	summary_data.append({
	"Theme": theme,
	"Percentage": f"{percentage:.1f}%",
	"Articles": f"{count} / {total_articles}"
	})

	st.dataframe(pd.DataFrame(summary_data), use_container_width=True)
	st.markdown("---")

	st.subheader(f"📰 Analysis Results (Found {len(df)} unique articles)")

	for index, row in df.iterrows():
	st.markdown(f"### [{row['title']}]({row['link']})")
	try:
	date_str = pd.to_datetime(row['published']).strftime("%Y-%m-%d %H:%M")
	except:
	date_str = "N/A"
	st.markdown(f"Theme: {row['theme']} (Confidence: {row['confidence']:.2f}) \nPublished: {date_str}")
	st.markdown(f"Summary: {row['summary']}")
	st.markdown("---")
	else:
	st.error("No news found from any source.")