Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import io | |
| import base64 | |
| import os | |
| import traceback | |
| import random | |
| import networkx as nx | |
| from datetime import datetime, timedelta | |
| import gradio as gr | |
| import logging | |
| from jinja2 import Template | |
| from matplotlib import font_manager | |
| # ===== Fonts and Styles ===== | |
| # Load local SimHei font if available | |
| simhei_path = 'SimHei.ttf' # Assuming it's .ttf; change to .tiff if needed (though .ttf is standard) | |
| if os.path.exists(simhei_path): | |
| font_prop = font_manager.FontProperties(fname=simhei_path) | |
| plt.rcParams['font.family'] = 'sans-serif' | |
| plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft JhengHei', 'Noto Sans TC', 'Arial Unicode MS'] | |
| else: | |
| plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Noto Sans TC', 'SimHei', 'Arial Unicode MS'] | |
| plt.rcParams['axes.unicode_minus'] = False | |
| plt.style.use("seaborn-v0_8") | |
| # ===== Logging ===== | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| # ===== Parameters ===== | |
| candidates = ["Hsu Chih-chieh", "Chiu Yi-ying", "Lai Jui-lung", "Lin Dai-hua", "Ko Chih-en"] | |
| days_back = 7 | |
| max_tweets_per_candidate = 20 | |
| news_file = "news_sample.csv" | |
| history_file = "history_sentiment.csv" | |
| # ===== Sentiment Analysis ===== | |
| try: | |
| from transformers import pipeline | |
| sentiment_pipeline = pipeline( | |
| "sentiment-analysis", | |
| model="lxyuan/distilbert-base-multilingual-cased-sentiments-student" | |
| ) | |
| def sentiment(text): | |
| return sentiment_pipeline(text)[0] | |
| except: | |
| def sentiment(text): | |
| return { | |
| "label": random.choice(["positive", "negative", "neutral"]), | |
| "score": random.uniform(0.3, 0.9) | |
| } | |
| # ===== Simulate Post Fetching ===== | |
| def fetch_tweets(candidate): | |
| sample_texts = { | |
| "Hsu Chih-chieh": ["Hsu Chih-chieh actively participates in local activities", "Hsu Chih-chieh criticized for empty policies", "Support Hsu Chih-chieh, build a new future for Kaohsiung!"], | |
| "Chiu Yi-ying": ["Chiu Yi-ying strongly states intention to run for mayor", "Chiu Yi-ying criticizes Lin Dai-hua", "Chiu Yi-ying promotes Hakka culture"], | |
| "Lai Jui-lung": ["Lai Jui-lung promotes marine economy", "Lai Jui-lung leads in polls", "Lai Jui-lung questioned for lack of experience"], | |
| "Lin Dai-hua": ["Lin Dai-hua actively engages with grassroots", "Lin Dai-hua involved in assistant fee controversy", "Lin Dai-hua receives support from Zheng Guohui"], | |
| "Ko Chih-en": ["Ko Chih-en leads significantly in polls", "Ko Chih-en criticized for missing disaster inspection", "Ko Chih-en promotes youth policies"] | |
| } | |
| return pd.DataFrame([ | |
| { | |
| "Date": datetime.now() - timedelta(days=random.randint(0, days_back - 1)), | |
| "User": f"user{random.randint(1, 100)}", | |
| "Content": random.choice(sample_texts.get(candidate, [f"{candidate}'s post {i}"])), | |
| "Candidate": candidate | |
| } | |
| for i in range(random.randint(5, max_tweets_per_candidate)) | |
| ]) | |
| # ===== Tool: Matplotlib to base64 ===== | |
| def fig_to_base64(): | |
| buf = io.BytesIO() | |
| plt.savefig(buf, format="png", dpi=120, bbox_inches="tight") | |
| buf.seek(0) | |
| img_b64 = base64.b64encode(buf.read()).decode("utf-8") | |
| buf.close() | |
| plt.close() | |
| return img_b64 | |
| # ===== Multi-Chart Generator ===== | |
| def generate_charts(all_df, summary, df_hist): | |
| results = {} | |
| # 1. Daily Sentiment Ratios | |
| fig = plt.figure(figsize=(8, 5)) | |
| summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].plot( | |
| kind='bar', stacked=True, colormap='coolwarm', ax=fig.gca() | |
| ) | |
| plt.title("Candidates' Daily Social Sentiment Ratios") | |
| plt.ylabel("Ratio") | |
| plt.xlabel("Candidate") | |
| plt.legend(["Positive", "Negative", "Neutral"]) | |
| results["img_b64_today"] = fig_to_base64() | |
| # 2. Historical Sentiment Trends | |
| fig = plt.figure(figsize=(10, 5)) | |
| for c in candidates: | |
| temp = df_hist[df_hist['Candidate'] == c] | |
| if not temp.empty: | |
| plt.plot(temp['Date'], temp['Positive Ratio'], marker='o', label=f"{c} Positive") | |
| plt.plot(temp['Date'], temp['Negative Ratio'], marker='x', label=f"{c} Negative") | |
| plt.plot(temp['Date'], temp['Neutral Ratio'], marker='s', label=f"{c} Neutral") | |
| plt.title("Candidates' Historical Sentiment Trends") | |
| plt.xticks(rotation=45) | |
| plt.ylabel("Ratio") | |
| plt.xlabel("Date") | |
| plt.legend() | |
| results["img_b64_trend"] = fig_to_base64() | |
| # 3. Social Sentiment Trends | |
| sentiment_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Sentiment']).size().unstack(fill_value=0) | |
| sentiment_trend = sentiment_trend.div(sentiment_trend.sum(axis=1), axis=0).fillna(0) | |
| fig = plt.figure(figsize=(8, 5)) | |
| for s in ['positive', 'negative', 'neutral']: | |
| if s in sentiment_trend.columns: | |
| plt.plot(sentiment_trend.index, sentiment_trend[s], marker='o', label={'positive':'Positive', 'negative':'Negative', 'neutral':'Neutral'}[s]) | |
| plt.title("Social Sentiment Trends") | |
| plt.xlabel("Date") | |
| plt.ylabel("Ratio") | |
| plt.legend() | |
| results["img_social_sentiment"] = fig_to_base64() | |
| # 4. Platform Performance | |
| platforms = ["X", "Facebook", "Instagram", "PTT", "Line"] | |
| platform_counts = pd.Series({p: random.randint(10, 100) for p in platforms}) | |
| fig = plt.figure(figsize=(8, 5)) | |
| plt.bar(platforms, platform_counts, color='skyblue') | |
| plt.title("Platform Post Volumes") | |
| plt.xlabel("Platform") | |
| plt.ylabel("Post Count") | |
| results["img_platform_performance"] = fig_to_base64() | |
| # 5. Candidates' Volume Trends | |
| candidate_trend = all_df.groupby([pd.Grouper(key='Date', freq='D'), 'Candidate']).size().unstack(fill_value=0) | |
| fig = plt.figure(figsize=(8, 5)) | |
| for c in candidates: | |
| if c in candidate_trend.columns: | |
| plt.plot(candidate_trend.index, candidate_trend[c], marker='o', label=c) | |
| plt.title("Candidates' Post Volume Trends") | |
| plt.xlabel("Date") | |
| plt.ylabel("Post Count") | |
| plt.legend() | |
| results["img_candidate_volume"] = fig_to_base64() | |
| # 6. Candidates' Sentiment Analysis | |
| fig = plt.figure(figsize=(8, 5)) | |
| summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].plot( | |
| kind='bar', stacked=True, colormap='coolwarm', ax=fig.gca() | |
| ) | |
| plt.title("Candidates' Post Sentiment Analysis (Positive/Negative/Neutral)") | |
| plt.ylabel("Ratio") | |
| plt.xlabel("Candidate") | |
| plt.legend(["Positive", "Negative", "Neutral"]) | |
| results["img_candidate_sentiment"] = fig_to_base64() | |
| # 7. Knowledge Graph | |
| fig, ax = plt.subplots(figsize=(8, 6)) | |
| G = nx.Graph() | |
| for c in candidates: | |
| G.add_node(c) | |
| for i in range(len(candidates) - 1): | |
| G.add_edge(candidates[i], candidates[i + 1]) | |
| nx.draw(G, nx.spring_layout(G), with_labels=True, node_color='lightgreen', font_size=12, ax=ax) | |
| plt.title("Candidates' Knowledge Graph") | |
| results["img_knowledge_graph"] = fig_to_base64() | |
| return results | |
| # ===== Main Analysis Function ===== | |
| def run_analysis(): | |
| try: | |
| # Embed the template as a string to avoid file dependency and ensure syntax is correct | |
| html_template = """ | |
| <!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>2026 Kaohsiung Mayoral Election Public Opinion Analysis Report</title> | |
| <script src="https://cdn.tailwindcss.com"></script> | |
| </head> | |
| <body class="bg-gray-100 font-sans leading-normal tracking-normal"> | |
| <div class="container mx-auto p-4"> | |
| <h1 class="text-3xl font-bold mb-4">2026 Kaohsiung Mayoral Election Public Opinion Analysis Report</h1> | |
| <p class="mb-4">Report Date: {{ report_date }}</p> | |
| <h2 class="text-2xl font-bold mb-2">Engagement Summary</h2> | |
| {{ engagement_table | safe }} | |
| <h2 class="text-2xl font-bold mb-2">News Summary</h2> | |
| <ul class="list-disc pl-5 mb-4"> | |
| {% for key, value in news_summary %} | |
| <li><strong>{{ key }}</strong>: {{ value }}</li> | |
| {% endfor %} | |
| </ul> | |
| <h2 class="text-2xl font-bold mb-2">News Details</h2> | |
| {{ news_table | safe }} | |
| <h2 class="text-2xl font-bold mb-2">Today's Sentiment Ratios</h2> | |
| <img src="data:image/png;base64,{{ img_b64_today }}" alt="Today's Sentiment Ratios" class="mb-4"> | |
| <h2 class="text-2xl font-bold mb-2">Historical Sentiment Trends</h2> | |
| <img src="data:image/png;base64,{{ img_b64_trend }}" alt="Historical Sentiment Trends" class="mb-4"> | |
| <h2 class="text-2xl font-bold mb-2">Social Sentiment Trends</h2> | |
| <img src="data:image/png;base64,{{ img_social_sentiment }}" alt="Social Sentiment Trends" class="mb-4"> | |
| <h2 class="text-2xl font-bold mb-2">Platform Performance</h2> | |
| <img src="data:image/png;base64,{{ img_platform_performance }}" alt="Platform Performance" class="mb-4"> | |
| <h2 class="text-2xl font-bold mb-2">Candidates' Volume Trends</h2> | |
| <img src="data:image/png;base64,{{ img_candidate_volume }}" alt="Candidates' Volume Trends" class="mb-4"> | |
| <h2 class="text-2xl font-bold mb-2">Candidates' Sentiment Analysis</h2> | |
| <img src="data:image/png;base64,{{ img_candidate_sentiment }}" alt="Candidates' Sentiment Analysis" class="mb-4"> | |
| <h2 class="text-2xl font-bold mb-2">Knowledge Graph</h2> | |
| <img src="data:image/png;base64,{{ img_knowledge_graph }}" alt="Knowledge Graph" class="mb-4"> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| # --- Posts & Sentiment Analysis --- | |
| all_df = pd.concat([fetch_tweets(c) for c in candidates], ignore_index=True) | |
| all_df['Sentiment'] = all_df['Content'].apply(lambda x: sentiment(x)['label']) | |
| all_df['Confidence'] = all_df['Content'].apply(lambda x: sentiment(x)['score']) | |
| # --- Statistics --- | |
| summary = all_df.groupby(['Candidate', 'Sentiment']).size().unstack(fill_value=0) | |
| summary['Total Posts'] = summary.sum(axis=1) | |
| summary['Positive Ratio'] = summary.get('positive', 0) / summary['Total Posts'].replace(0, 1) | |
| summary['Negative Ratio'] = summary.get('negative', 0) / summary['Total Posts'].replace(0, 1) | |
| summary['Neutral Ratio'] = summary.get('neutral', 0) / summary['Total Posts'].replace(0, 1) | |
| # --- Historical Data --- | |
| today_str = datetime.now().strftime('%Y-%m-%d') | |
| hist_row = summary[['Positive Ratio', 'Negative Ratio', 'Neutral Ratio']].copy() | |
| hist_row['Date'] = today_str | |
| hist_row['Candidate'] = summary.index | |
| df_hist = pd.concat( | |
| [pd.read_csv(history_file), hist_row], | |
| ignore_index=True | |
| ) if os.path.exists(history_file) else hist_row | |
| df_hist.to_csv(history_file, index=False) | |
| # --- Charts --- | |
| charts = generate_charts(all_df, summary, df_hist) | |
| # --- News --- | |
| if os.path.exists(news_file): | |
| df_news = pd.read_csv(news_file) | |
| news_summary = df_news.groupby('Category').size().to_dict() | |
| news_table = df_news.to_html(index=False, classes="min-w-full border border-gray-200") | |
| else: | |
| news_summary = { | |
| "Polls": "Ko Chih-en leads Green Camp candidates in multiple polls.", | |
| "Intra-party Competition": "Intense competition in the DPP primary.", | |
| "Controversy": "Lin Dai-hua involved in assistant fee controversy." | |
| } | |
| news_table = "<p>No news data available</p>" | |
| # Convert news_summary to list of tuples to support iteration in template | |
| news_summary = list(news_summary.items()) | |
| # --- Engagement Table --- | |
| engagement_table = f""" | |
| <table class="min-w-full bg-white border border-gray-200"> | |
| <tr class="bg-gray-100 border-b"> | |
| <th class="py-2 px-4 border-r">Total Engagement</th> | |
| <td class="py-2 px-4 border-r">{len(all_df)}</td> | |
| <th class="py-2 px-4 border-r">Positive Sentiment Ratio</th> | |
| <td class="py-2 px-4 border-r">{all_df['Sentiment'].value_counts(normalize=True).get('positive', 0):.1%}</td> | |
| <th class="py-2 px-4 border-r">Average Interaction Rate</th> | |
| <td class="py-2 px-4 border-r">3.9%</td> | |
| <th class="py-2 px-4 border-r">Active Platforms</th> | |
| <td class="py-2 px-4">{5}</td> | |
| </tr> | |
| </table> | |
| """ | |
| # --- HTML Rendering --- | |
| template = Template(html_template) | |
| html_content = template.render( | |
| report_date=datetime.now().strftime('%Y-%m-%d %H:%M'), | |
| engagement_table=engagement_table if engagement_table else "<p>No engagement data provided</p>", | |
| news_summary=news_summary if news_summary else "<p>No news summary provided</p>", | |
| news_table=news_table if news_table else "<p>No news data provided</p>", | |
| **charts | |
| ) | |
| return html_content | |
| except Exception: | |
| return f"<pre>❌ Analysis failed:\n{traceback.format_exc()}</pre>" | |
| # ===== Gradio Frontend ===== | |
| if __name__ == "__main__": | |
| iface = gr.Interface( | |
| fn=run_analysis, | |
| inputs=[], | |
| outputs=gr.HTML(), | |
| title="2026 Kaohsiung Mayoral Election Public Opinion Analysis" | |
| ) | |
| iface.launch(server_name="0.0.0.0", server_port=7860) |