Spaces:
Sleeping
Sleeping
| import sqlite3 | |
| import pandas as pd | |
| import os | |
| from datetime import datetime | |
| import streamlit as st | |
| # def get_db_path(): | |
| # db_path = "../database/stock_insights.db" | |
| # if not os.path.exists(db_path) and os.path.exists("/tmp/stock_insights.db"): | |
| # db_path = "/tmp/stock_insights.db" | |
| # return db_path | |
| # download from Hugging Face dataset | |
| def ensure_db(): | |
| repo_path = os.path.join(os.getcwd(), "database", "stock_insights.db") | |
| if os.path.exists(repo_path): | |
| return repo_path | |
| candidates = [ | |
| os.path.join("/app", "database", "stock_insights.db"), | |
| os.path.join("/tmp", "database", "stock_insights.db"), | |
| os.path.join("/tmp", "stock_insights.db"), | |
| ] | |
| for p in candidates: | |
| if os.path.exists(p): | |
| return p | |
| try: | |
| from huggingface_hub import hf_hub_download | |
| tmp_dir = os.path.join("/tmp", "database") | |
| os.makedirs(tmp_dir, exist_ok=True) | |
| local_file = hf_hub_download( | |
| repo_id="PuppetLover/stock_insights", | |
| filename="stock_insights.db", | |
| repo_type="dataset", | |
| local_dir=tmp_dir, | |
| local_dir_use_symlinks=False, | |
| ) | |
| return local_file | |
| except Exception as e: | |
| local_rel = os.path.join("database", "stock_insights.db") | |
| if os.path.exists(local_rel): | |
| return local_rel | |
| raise RuntimeError(f"Cannot access or download database file: {e}") | |
| # gọi và gán hằng DB_PATH dùng trong module | |
| DB_PATH = ensure_db() | |
| def generate_stock_report(stock_code, time_period): | |
| start_date, end_date = time_period | |
| today = datetime.now().date() | |
| # db_path = os.path.join("database", "stock_insights.db") | |
| db_path = DB_PATH | |
| report = { | |
| "stock_code": stock_code, | |
| "report_period": f"{start_date} to {end_date}" | |
| } | |
| with sqlite3.connect(db_path) as conn: | |
| # Tạo bảng tạm relevant_articles | |
| conn.execute("DROP TABLE IF EXISTS relevant_articles;") | |
| conn.execute(""" | |
| CREATE TEMP TABLE relevant_articles AS | |
| SELECT DISTINCT article_id FROM entities | |
| WHERE entity_text =? | |
| AND entity_type IN ('STOCK', 'COMPANY') | |
| AND confidence = 'high' | |
| AND article_id IN ( | |
| SELECT article_id FROM articles WHERE publish_date BETWEEN ? AND ? | |
| ); | |
| """, (stock_code, start_date, end_date)) | |
| # 1. OVERALL SENTIMENT | |
| q_sentences = """ | |
| SELECT s.sentiment_score, s.sentiment_label, a.publish_date | |
| FROM sentences s | |
| JOIN articles a ON s.article_id = a.article_id | |
| WHERE s.article_id IN ( | |
| SELECT s2.sentence_id FROM sentences s2 | |
| WHERE s2.article_id IN (SELECT article_id FROM relevant_articles) | |
| ) | |
| AND s.sentiment_score IS NOT NULL; | |
| """ | |
| df_sent = pd.read_sql_query(q_sentences, conn) | |
| if not df_sent.empty: | |
| df_sent['publish_date'] = pd.to_datetime(df_sent['publish_date']).dt.date | |
| df_sent['days_ago'] = (today - df_sent['publish_date']).apply(lambda x: x.days) | |
| df_sent['weight'] = 1 / (df_sent['days_ago'] + 1) | |
| weighted_score = (df_sent['sentiment_score'] * df_sent['weight']).sum() / df_sent['weight'].sum() | |
| # Chuẩn hóa nhãn sentiment về lower-case | |
| df_sent['sentiment_label'] = df_sent['sentiment_label'].str.lower() | |
| sentiment_counts = df_sent['sentiment_label'].value_counts().to_dict() | |
| trend = "Tích cực" if weighted_score > 0.1 else "Tiêu cực" if weighted_score < -0.1 else "Trung tính" | |
| else: | |
| weighted_score, sentiment_counts, trend = 0.0, {}, "Không có dữ liệu" | |
| report["overall_sentiment"] = { | |
| "score": weighted_score, | |
| "trend": trend, | |
| "positive_mentions": sentiment_counts.get("positive", 0), | |
| "negative_mentions": sentiment_counts.get("negative", 0), | |
| "neutral_mentions": sentiment_counts.get("neutral", 0) | |
| } | |
| # 2. KEY EVENTS, RISKS, PRICE ACTIONS | |
| def get_key_entities(entity_type): | |
| query = f""" | |
| SELECT | |
| e.entity_text, | |
| COUNT(e.entity_id) as count, | |
| AVG(s.sentiment_score) as avg_sentiment | |
| FROM entities e | |
| JOIN sentences s ON e.sentence_id = s.sentence_id | |
| WHERE e.article_id IN (SELECT article_id FROM relevant_articles) | |
| AND e.entity_type =? | |
| GROUP BY e.entity_text | |
| ORDER BY count DESC | |
| LIMIT 5; | |
| """ | |
| df = pd.read_sql_query(query, conn, params=(entity_type,)) | |
| def score_to_label(score): | |
| if score is None: return "N/A" | |
| return "Tích cực" if score > 0.1 else "Tiêu cực" if score < -0.1 else "Trung tính" | |
| df['sentiment'] = df['avg_sentiment'].apply(score_to_label) | |
| return df.to_dict('records') | |
| report["key_events"] = get_key_entities('EVENT') | |
| report["key_price_actions"] = get_key_entities('PRICE_ACTION') | |
| report["key_risks_mentioned"] = get_key_entities('RISK') | |
| # 3. TOP RELATED ENTITIES | |
| q_related = """ | |
| SELECT e.entity_type, e.entity_text | |
| FROM entities e | |
| WHERE e.article_id IN (SELECT article_id FROM relevant_articles) | |
| AND e.entity_text!=? | |
| AND e.entity_type IN ('STOCK', 'COMPANY', 'PERSON'); | |
| """ | |
| df_related = pd.read_sql_query(q_related, conn, params=(stock_code,)) | |
| top_related = {} | |
| if not df_related.empty: | |
| for etype in ['STOCK', 'COMPANY', 'PERSON']: | |
| top_related[etype.lower() + 's'] = df_related[df_related['entity_type'] == etype]['entity_text'].value_counts().head(3).index.tolist() | |
| report["top_related_entities"] = top_related | |
| # 4. SOURCE ARTICLES | |
| q_articles = """ | |
| SELECT a.title, a.source_url, s.sentiment_label | |
| FROM articles a | |
| JOIN sentences s ON a.article_id = s.article_id | |
| WHERE a.article_id IN (SELECT article_id FROM relevant_articles) | |
| GROUP BY a.article_id | |
| ORDER BY a.publish_date DESC | |
| LIMIT 5; | |
| """ | |
| df_articles = pd.read_sql_query(q_articles, conn) | |
| report["source_articles"] = df_articles.to_dict('records') | |
| return report | |
| # --- HIỂN THỊ BÁO CÁO --- | |
| def show_report(report_data, summary, stock_code_input): | |
| st.markdown( | |
| f"<h3 style='text-align: center; color: #30cfd0; margin-top:2rem;'>Báo cáo Phân tích cho {report_data.get('stock_code', stock_code_input)}</h3>", unsafe_allow_html=True) | |
| st.markdown( | |
| f"<p style='text-align: center; color: #94a3b8;'>Giai đoạn: {report_data.get('report_period', 'N/A')}</p>", unsafe_allow_html=True) | |
| st.markdown("#### 🤖 Tóm tắt từ AI") | |
| st.info(summary) | |
| # Tổng quan cảm xúc | |
| st.markdown("#### 📊 Tổng quan Cảm xúc") | |
| sentiment = report_data['overall_sentiment'] | |
| score = sentiment['score'] | |
| trend_color = "normal" | |
| if sentiment['trend'] == "Tích cực": | |
| trend_color = "normal" | |
| if sentiment['trend'] == "Tiêu cực": | |
| trend_color = "inverse" | |
| st.metric( | |
| label="Điểm Cảm xúc (có trọng số thời gian)", | |
| value=f"{score:.2f}" if score is not None else "N/A", | |
| delta=sentiment['trend'], | |
| delta_color=trend_color | |
| ) | |
| col1, col2, col3 = st.columns(3) | |
| col1.metric("👍 Tích cực", sentiment['positive_mentions']) | |
| col2.metric("👎 Tiêu cực", sentiment['negative_mentions']) | |
| col3.metric("😐 Trung tính", sentiment['neutral_mentions']) | |
| # Các bảng chi tiết | |
| st.markdown("---") | |
| col_events, col_risks = st.columns(2) | |
| with col_events: | |
| st.markdown("#### ⚡ Sự kiện Nổi bật") | |
| if report_data["key_events"]: | |
| # Kiểm tra key thực tế | |
| df_events = pd.DataFrame(report_data["key_events"]) | |
| if 'avg_sentiment' in df_events.columns: | |
| df_events = df_events.rename( | |
| columns={'entity_text': 'Sự kiện', 'avg_sentiment': 'Sentiment'}) | |
| show_cols = ['Sự kiện', 'count', 'Sentiment'] | |
| elif 'sentiment' in df_events.columns: | |
| df_events = df_events.rename( | |
| columns={'entity_text': 'Sự kiện'}) | |
| show_cols = ['Sự kiện', 'count', 'sentiment'] | |
| else: | |
| df_events = df_events.rename( | |
| columns={'entity_text': 'Sự kiện'}) | |
| show_cols = ['Sự kiện', 'count'] | |
| st.dataframe(df_events[show_cols], use_container_width=True) | |
| else: | |
| st.write("Không có sự kiện nổi bật.") | |
| with col_risks: | |
| st.markdown("#### ⚠️ Rủi ro được đề cập") | |
| if report_data["key_risks_mentioned"]: | |
| df_risks = pd.DataFrame(report_data["key_risks_mentioned"]) | |
| if 'avg_sentiment' in df_risks.columns: | |
| df_risks = df_risks.rename( | |
| columns={'entity_text': 'Rủi ro', 'avg_sentiment': 'Sentiment'}) | |
| show_cols = ['Rủi ro', 'count', 'Sentiment'] | |
| elif 'sentiment' in df_risks.columns: | |
| df_risks = df_risks.rename( | |
| columns={'entity_text': 'Rủi ro'}) | |
| show_cols = ['Rủi ro', 'count', 'sentiment'] | |
| else: | |
| df_risks = df_risks.rename( | |
| columns={'entity_text': 'Rủi ro'}) | |
| show_cols = ['Rủi ro', 'count'] | |
| st.dataframe(df_risks[show_cols], use_container_width=True) | |
| else: | |
| st.write("Không có rủi ro nổi bật.") | |
| st.markdown("#### 📈 Hành động Giá Chính") | |
| if report_data["key_price_actions"]: | |
| df_price = pd.DataFrame(report_data["key_price_actions"]) | |
| if 'avg_sentiment' in df_price.columns: | |
| df_price = df_price.rename( | |
| columns={'entity_text': 'Hành động giá', 'avg_sentiment': 'Sentiment'}) | |
| show_cols = ['Hành động giá', 'count', 'Sentiment'] | |
| elif 'sentiment' in df_price.columns: | |
| df_price = df_price.rename( | |
| columns={'entity_text': 'Hành động giá'}) | |
| show_cols = ['Hành động giá', 'count', 'sentiment'] | |
| else: | |
| df_price = df_price.rename( | |
| columns={'entity_text': 'Hành động giá'}) | |
| show_cols = ['Hành động giá', 'count'] | |
| st.dataframe(df_price[show_cols], use_container_width=True) | |
| else: | |
| st.write("Không có hành động giá nổi bật.") | |
| # Thực thể liên quan | |
| st.markdown("---") | |
| st.markdown("#### 🔗 Các Thực thể Liên quan nhiều nhất") | |
| related = report_data['top_related_entities'] | |
| if any(related.values()): | |
| for etype, entities in related.items(): | |
| if entities: | |
| st.markdown( | |
| f"**{etype.replace('_', ' ').title()}:** {', '.join(entities)}") | |
| else: | |
| st.write("Không tìm thấy thực thể liên quan nổi bật.") | |
| # Nguồn bài viết | |
| st.markdown("---") | |
| st.markdown("#### 📰 Nguồn Bài viết Tham khảo") | |
| if report_data["source_articles"]: | |
| for article in report_data["source_articles"]: | |
| st.markdown( | |
| f"- [{article['title']}]({article['source_url']}) - *Cảm xúc: {article['sentiment_label']}*") | |
| else: | |
| st.write("Không có bài viết nào trong khoảng thời gian này.") | |
| st.markdown("</div>", unsafe_allow_html=True) |