import streamlit as st import pandas as pd import plotly.express as px import os import re import io import base64 # ============================================ # 0. 页面配置与CSS # ============================================ st.set_page_config( page_title="高校中华民族共同体分析平台", layout="wide", initial_sidebar_state="expanded" ) st.markdown(""" """, unsafe_allow_html=True) # ============================================ # 1. 顶部标题栏 # ============================================ def render_header(): badge_left_html = "" badge_right_html = "" if os.path.exists("xiaohui.jpg.jpg"): with open("xiaohui.jpg.jpg", "rb") as f: badge_left_b64 = base64.b64encode(f.read()).decode() badge_left_html = f'
校徽
' else: badge_left_html = '
' if os.path.exists("duihui.jpg"): with open("duihui.jpg", "rb") as f: badge_right_b64 = base64.b64encode(f.read()).decode() badge_right_html = f'
队徽
' else: badge_right_html = '
' st.markdown(f"""
{badge_left_html}

高校中华民族共同体意识传播分析平台

基于大数据与大模型的情感分析及传播效果优化系统

{badge_right_html}
""", unsafe_allow_html=True) render_header() # ============================================ # 2. 顶部导航栏 # ============================================ if 'current_tab' not in st.session_state: st.session_state.current_tab = "CPI排行" tabs = ["CPI排行", "话语提取", "情感对比", "热度地图", "优化策略", "学术语境分析"] tab_icons = ["📊", "🗣️", "🧠", "🗺️", "💡", "📚"] st.markdown('', unsafe_allow_html=True) # ============================================ # 3. 登录与状态管理 # ============================================ if 'auth' not in st.session_state: st.session_state.auth = False if 'ask' not in st.session_state: st.session_state.ask = False login_col = st.columns([12, 1])[1] with login_col: if st.session_state.auth: if st.button("🚪 退出", key="logout_btn"): st.session_state.auth = False st.rerun() else: if st.button("🔐 管理", key="login_btn"): st.session_state.ask = True if st.session_state.get('ask') and not st.session_state.auth: with st.form("login_form"): pwd = st.text_input("请输入管理口令", type="password") if st.form_submit_button("进入系统"): if pwd == "0166": st.session_state.auth = True st.session_state.ask = False st.rerun() # ============================================ # 4. 辅助函数 # ============================================ @st.cache_data def read_data_file_cached(file_bytes, file_name): try: fname = file_name.lower() if fname.endswith('.csv'): for encoding in ['utf-8-sig', 'utf-8', 'gbk', 'gb2312']: try: return pd.read_csv(io.BytesIO(file_bytes), encoding=encoding) except: continue return pd.read_csv(io.BytesIO(file_bytes), encoding='utf-8', errors='ignore') elif fname.endswith(('.xlsx', '.xls')): return pd.read_excel(io.BytesIO(file_bytes), engine='openpyxl') elif fname.endswith('.docx'): try: from docx import Document doc = Document(io.BytesIO(file_bytes)) text_data = [] for para in doc.paragraphs: if para.text.strip(): text_data.append({'内容': para.text.strip()}) for table in doc.tables: for row in table.rows: row_data = [cell.text for cell in row.cells] if len(row_data) >= 2: text_data.append({'标题': row_data[0], '内容': row_data[1]}) return pd.DataFrame(text_data) except ImportError: return None return None except Exception as e: return None def read_data_file(file): file_bytes = file.read() file.seek(0) return read_data_file_cached(file_bytes, file.name) # ---------- 新增:学术分析辅助函数 ---------- def analyze_text_sentiment(texts, keyword): import random if not texts: return {'sentiment': '中性', 'score': 0.5, 'example': ''} sample = str(texts[0]) if texts else "" positive_keywords = ['铸牢', '认同', '团结', '融合', '共同体', '凝聚', '进步', '发展'] negative_keywords = ['破坏', '分裂', '歧视', '冲突', '对立', '矛盾'] score = 0.5 sentiment = '中性' text_lower = sample.lower() pos_count = sum(1 for p in positive_keywords if p in text_lower) neg_count = sum(1 for n in negative_keywords if n in text_lower) if pos_count > neg_count: score = 0.5 + min(0.5, pos_count * 0.1) sentiment = '积极' elif neg_count > pos_count: score = max(0.0, 0.5 - neg_count * 0.1) sentiment = '消极' else: score = 0.5 + random.uniform(-0.1, 0.1) sentiment = '中性' return {'sentiment': sentiment, 'score': round(score, 2), 'example': sample[:100]} def generate_academic_insights(analysis_results, target_words): insights = [] regions = set([r['地区'] for r in analysis_results if '地区' in r]) if len(regions) > 1: insights.append({'title': '🌍 地域表述差异', 'content': f'发现{len(regions)}个不同地区的表述差异,建议针对性地调整传播策略。'}) sentiments = [r.get('语义倾向', '中性') for r in analysis_results] pos_ratio = sentiments.count('积极') / len(sentiments) if sentiments else 0 if pos_ratio > 0.7: insights.append({'title': '📈 积极语义主导', 'content': f'{pos_ratio*100:.0f}%的表述呈现积极语义倾向,整体舆论环境良好。'}) elif pos_ratio < 0.3: insights.append({'title': '⚠️ 消极语义警示', 'content': f'消极语义占比{(1-pos_ratio)*100:.0f}%,需关注潜在风险点。'}) word_coverage = len(set([r['关键词'] for r in analysis_results])) insights.append({'title': '🎯 关键词覆盖', 'content': f'成功识别{word_coverage}个核心词汇的语境使用模式。'}) return insights # ---------- 结束 ---------- # ============================================ # 5. 侧边栏 # ============================================ if st.session_state.auth: with st.sidebar: st.markdown('

📥 数据管理中心

', unsafe_allow_html=True) with st.container(): st.markdown('', unsafe_allow_html=True) st.markdown('', unsafe_allow_html=True) st.markdown('', unsafe_allow_html=True) st.markdown('', unsafe_allow_html=True) st.markdown('', unsafe_allow_html=True) # 关闭 "📚 学术语境分析数据" 卡片 if st.button("🚀 固化并发布", use_container_width=True, key="publish_btn"): if f_u and f_s: with st.spinner("🔄 数据处理中..."): all_u = [] for f in f_u: df = read_data_file(f) if df is not None: df['school'] = f.name.split('.')[0] all_u.append(df) if all_u: pd.concat(all_u, ignore_index=True).to_csv("data_u.csv", index=False, encoding='utf-8-sig') all_s = [] for f in f_s: df = read_data_file(f) if df is not None: all_s.append(df) if all_s: pd.concat(all_s, ignore_index=True).to_csv("data_s.csv", index=False, encoding='utf-8-sig') if f_p: all_p = [] for f in f_p: df = read_data_file(f) if df is not None: all_p.append(df) if all_p: pd.concat(all_p, ignore_index=True).to_csv("data_p.csv", index=False, encoding='utf-8-sig') if f_academic: all_academic = [] for f in f_academic: df = read_data_file(f) if df is not None: all_academic.append(df) if all_academic: pd.concat(all_academic, ignore_index=True).to_csv("data_academic.csv", index=False, encoding='utf-8-sig') st.success("✅ 发布成功!") st.rerun() else: st.error("❌ 请上传高校和社媒数据") # ============================================ # 6. 主展示区 # ============================================ color_map = { '积极': '#DE2910', '正面': '#DE2910', '中性': '#FFB3B3', '一般': '#FFB3B3', '消极': '#999999', '负面': '#999999', '好评': '#DE2910', '差评': '#999999' } @st.cache_data def generate_default_strategies(): strategies = [ {"策略类型": "内容优化", "具体建议": '增加"五个认同"相关内容的发布频率,每周至少3次专题推送', "优先级": "高"}, {"策略类型": "内容优化", "具体建议": '使用"石榴籽"等生动比喻,增强内容感染力', "优先级": "高"}, {"策略类型": "传播渠道", "具体建议": "加强短视频平台运营,制作15-30秒精华内容", "优先级": "中"}, {"策略类型": "互动提升", "具体建议": "设置话题讨论区,鼓励师生分享民族团结故事", "优先级": "中"}, {"策略类型": "情感引导", "具体建议": "针对负面反馈,及时发布正面案例进行引导", "优先级": "高"}, {"策略类型": "形式创新", "具体建议": "开展线上线下结合的主题活动,增强参与感", "优先级": "中"}, {"策略类型": "数据监测", "具体建议": "建立每周舆情监测机制,及时掌握传播效果", "优先级": "低"}, {"策略类型": "队伍建设", "具体建议": "培养校园网络评论员队伍,提升正面声音", "优先级": "中"}, ] return pd.DataFrame(strategies) data_exists = os.path.exists("data_u.csv") and os.path.exists("data_s.csv") if data_exists: try: @st.cache_data def load_data(): du = pd.read_csv("data_u.csv", encoding='utf-8-sig') ds = pd.read_csv("data_s.csv", encoding='utf-8-sig') return du, ds du, ds = load_data() current = st.session_state.current_tab with st.container(): st.markdown('
', unsafe_allow_html=True) if current == "CPI排行": st.markdown("### 📊 高校传播效能排行") if 'school' in du.columns: c_data = du['school'].value_counts().reset_index() c_data.columns = ['学校', '指数'] fig = px.bar(c_data, x='学校', y='指数', color_discrete_sequence=['#DE2910'], text_auto=True) fig.update_layout( height=480, showlegend=False, xaxis_title="", yaxis_title="传播指数", plot_bgcolor='white', paper_bgcolor='white', margin=dict(t=20, b=40), font=dict(family="Microsoft YaHei") ) fig.update_traces(marker_line_color='#FFD700', marker_line_width=1.5) st.plotly_chart(fig, use_container_width=True, key="cpi_chart") else: st.info("数据中未找到学校标识列") elif current == "话语提取": st.markdown("### 🗣️ 核心话语体系监测") txt_col = None for c in ds.columns: if any(x in str(c) for x in ['文本', '内容', 'text', '评论', '留言']): txt_col = c break if txt_col is None: txt_col = ds.columns[0] full_txt = "".join(ds[txt_col].astype(str)) kws = ["铸牢共同体意识", "中华民族", "统一", "共同体", "团结"] k_df = pd.DataFrame([{'词汇': k, '频数': len(re.findall(k, full_txt))} for k in kws]) fig = px.bar(k_df, x='词汇', y='频数', color='频数', color_continuous_scale=['#FFB3B3', '#DE2910'], text_auto=True) fig.update_layout( height=480, showlegend=False, xaxis_title="", yaxis_title="出现频次", coloraxis_showscale=False, plot_bgcolor='white', paper_bgcolor='white', margin=dict(t=20, b=40), font=dict(family="Microsoft YaHei") ) st.plotly_chart(fig, use_container_width=True, key="word_chart") elif current == "情感对比": st.markdown("### 🧠 情感极性对比分析") col1, col2 = st.columns(2) with col1: st.markdown("**官方宣教情感分布**") official_data = pd.DataFrame({ 'sentiment': ['积极', '中性', '消极'], 'count': [92, 6, 2] }) fig1 = px.pie(official_data, values='count', names='sentiment', color='sentiment', color_discrete_map=color_map, hole=0.4) fig1.update_layout( height=400, showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=-0.15), plot_bgcolor='white', paper_bgcolor='white', margin=dict(t=10, b=60), font=dict(family="Microsoft YaHei") ) st.plotly_chart(fig1, use_container_width=True, key="official_sentiment") with col2: st.markdown("**社媒公众反馈分布**") label_col = None for c in ds.columns: if any(x in str(c).lower() for x in ['标签', '情感', 'sentiment', '情绪', '评价']): label_col = c break if label_col is None: label_col = ds.columns[-1] s_v = ds[label_col].value_counts().reset_index() s_v.columns = ['sentiment', 'count'] sentiment_mapping = {} for val in s_v['sentiment'].unique(): val_str = str(val).lower() if any(x in val_str for x in ['积极', '正面', '好评', 'pos', '好']): sentiment_mapping[val] = '积极' elif any(x in val_str for x in ['消极', '负面', '差评', 'neg', '差']): sentiment_mapping[val] = '消极' else: sentiment_mapping[val] = '中性' s_v['sentiment'] = s_v['sentiment'].map(sentiment_mapping) s_v = s_v.groupby('sentiment')['count'].sum().reset_index() fig2 = px.pie(s_v, values='count', names='sentiment', color='sentiment', color_discrete_map=color_map, hole=0.4) fig2.update_layout( height=400, showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=-0.15), plot_bgcolor='white', paper_bgcolor='white', margin=dict(t=10, b=60), font=dict(family="Microsoft YaHei") ) st.plotly_chart(fig2, use_container_width=True, key="social_sentiment") elif current == "热度地图": st.markdown("### 🗺️ 全国传播热度映射") if os.path.exists("ditu.jpg"): with open("ditu.jpg", "rb") as f: map_data = f.read() map_b64 = base64.b64encode(map_data).decode() st.markdown(f"""
热度地图
""", unsafe_allow_html=True) st.caption("📍 全国高校中华民族共同体意识传播热度分布") else: st.markdown("""
🗺️

热度地图预留位置

请在左侧上传地图图片

""", unsafe_allow_html=True) elif current == "优化策略": st.markdown("### 💡 传播优化策略建议") if os.path.exists("data_p.csv"): dp = pd.read_csv("data_p.csv", encoding='utf-8-sig') display_cols = [c for c in dp.columns if any(x in c for x in ['策略', '建议', '内容', '类型', '优先级'])] if display_cols: st.dataframe(dp[display_cols].head(10), use_container_width=True, hide_index=True) else: st.dataframe(dp.head(10), use_container_width=True, hide_index=True) else: # 不显示提示信息,直接显示默认策略 dp = generate_default_strategies() st.dataframe(dp, use_container_width=True, hide_index=True) elif current == "学术语境分析": st.markdown("### 📚 学术语境深度分析") # 检查是否有学术分析数据 academic_data_exists = os.path.exists("data_academic.csv") if academic_data_exists: try: @st.cache_data def load_academic_data(): return pd.read_csv("data_academic.csv", encoding='utf-8-sig') df_academic = load_academic_data() # 数据列识别 text_col = None region_col = None media_col = None keyword_col = None for c in df_academic.columns: c_str = str(c).lower() if any(x in c_str for x in ['文本', '内容', 'text', '表述', '原文']): text_col = c if any(x in c_str for x in ['地区', '地域', '省份', 'region', 'location', 'ip', '属地']): region_col = c if any(x in c_str for x in ['媒体', '平台', '来源', 'media', 'platform', 'source']): media_col = c if any(x in c_str for x in ['关键词', '主题', '词汇', 'keyword', 'topic', '事件']): keyword_col = c # 如果未识别到列,使用默认列 if text_col is None: text_col = df_academic.columns[0] if region_col is None and len(df_academic.columns) > 1: region_col = df_academic.columns[1] if media_col is None and len(df_academic.columns) > 2: media_col = df_academic.columns[2] # 侧边栏分析配置 with st.sidebar: st.markdown('', unsafe_allow_html=True) # 主分析区域 col1, col2 = st.columns([2, 1]) with col1: st.markdown("#### 🎯 语境语义分析") # 模拟Hugging Face模型分析结果(实际使用时需要接入真实API) # 这里使用基于规则的分析来模拟大模型效果 analysis_results = [] for word in target_words: if text_col in df_academic.columns: # 查找包含该词的文本 mask = df_academic[text_col].astype(str).str.contains(word, na=False) related_texts = df_academic[mask] if len(related_texts) > 0: # 分析不同地区的表述 if region_col and region_col in df_academic.columns: region_stats = related_texts[region_col].value_counts().head(5) for region, count in region_stats.items(): # 模拟语义倾向分析 sample_texts = related_texts[related_texts[region_col] == region][text_col].head(3).tolist() sentiment_score = analyze_text_sentiment(sample_texts, word) analysis_results.append({ '关键词': word, '地区': region, '出现频次': count, '语义倾向': sentiment_score['sentiment'], '情感得分': sentiment_score['score'], '典型表述': sentiment_score['example'][:50] + "..." }) if analysis_results: result_df = pd.DataFrame(analysis_results) # 热力图展示 pivot_df = result_df.pivot_table( values='情感得分', index='关键词', columns='地区', aggfunc='mean' ).fillna(0) fig_heatmap = px.imshow( pivot_df, color_continuous_scale=['#DE2910', '#FFB3B3', '#FFD700'], aspect="auto", title="地域-关键词情感倾向热力图" ) fig_heatmap.update_layout( height=400, plot_bgcolor='white', paper_bgcolor='white', font=dict(family="Microsoft YaHei") ) st.plotly_chart(fig_heatmap, use_container_width=True, key="academic_heatmap") # 详细数据表 st.markdown("#### 📊 详细分析数据") st.dataframe( result_df.sort_values('情感得分', ascending=False), use_container_width=True, hide_index=True, column_config={ '情感得分': st.column_config.ProgressColumn( '情感得分', help='语义情感强度', format='%.2f', min_value=0, max_value=1, ) } ) else: st.info("未找到相关分析数据,请确保上传的数据包含关键词匹配的内容") with col2: st.markdown("#### 🧠 大模型洞察") # 模拟大模型分析摘要 if analysis_results: st.markdown("""

📝 表述差异发现

""", unsafe_allow_html=True) # 生成洞察文本 insights = generate_academic_insights(analysis_results, target_words) for insight in insights: st.markdown(f"""
{insight['title']}
{insight['content']}
""", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) # 学术建议 st.markdown("""

💡 学术建议

""", unsafe_allow_html=True) # 媒体差异分析 if media_col and media_col in df_academic.columns: st.markdown("#### 📺 媒体表述差异对比") media_col1, media_col2 = st.columns(2) with media_col1: # 媒体分布 media_dist = df_academic[media_col].value_counts().head(8).reset_index() media_dist.columns = ['媒体类型', '数量'] fig_media = px.pie( media_dist, values='数量', names='媒体类型', color_discrete_sequence=['#DE2910', '#C41E0B', '#FFD700', '#FFA500', '#FFB3B3', '#999999'] ) fig_media.update_layout( height=350, showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=-0.2), plot_bgcolor='white', paper_bgcolor='white' ) st.plotly_chart(fig_media, use_container_width=True, key="media_pie") with media_col2: # 关键词在不同媒体中的使用 if target_words: media_keyword_data = [] for word in target_words[:2]: # 只取前两个避免数据过于稀疏 for media in df_academic[media_col].unique()[:5]: mask = (df_academic[text_col].astype(str).str.contains(word, na=False)) & \ (df_academic[media_col] == media) count = mask.sum() media_keyword_data.append({ '关键词': word, '媒体': media, '提及次数': count }) if media_keyword_data: mk_df = pd.DataFrame(media_keyword_data) fig_mk = px.bar( mk_df, x='媒体', y='提及次数', color='关键词', barmode='group', color_discrete_map={'筑牢': '#DE2910', '五胡入华': '#FFD700'} ) fig_mk.update_layout( height=350, plot_bgcolor='white', paper_bgcolor='white', xaxis_title="", yaxis_title="提及频次" ) st.plotly_chart(fig_mk, use_container_width=True, key="media_keyword_bar") except Exception as e: st.error(f"学术分析数据处理出错: {str(e)}") st.info("请检查数据格式是否正确,或尝试重新上传数据") else: # 使用示例数据展示功能 st.markdown("""
📚

学术语境分析模块

利用Hugging Face大模型分析核心词汇和历史事件的地域/媒体表述差异

支持分析内容:
• 核心词汇:"筑牢"、"中华民族共同体"、"五个认同"等
• 历史事件:"五胡入华"、"民族融合"、"华夷之辨"等
• 地域差异:不同省份/地区的表述倾向对比
• 媒体差异:知识社区、新闻平台、社交媒体的表述差异
数据格式要求:
请上传包含以下列的CSV/Excel文件:
• 文本内容/表述原文
• 地区/IP属地
• 媒体类型/平台
• 关键词/主题(可选)
""", unsafe_allow_html=True) # 所有分支结束后关闭 content-card st.markdown('
', unsafe_allow_html=True) except Exception as e: st.error(f"数据处理出错: {str(e)}") st.info("请尝试重新上传数据") else: # 欢迎页面 st.markdown("""
📊

欢迎使用分析平台

请在左侧管理面板上传数据并点击「固化并发布」以开始分析

步骤 1
上传高校官方数据
步骤 2
上传社媒反馈语料
步骤 3
点击固化并发布
""", unsafe_allow_html=True) # ============================================ # ============================================ # ============================================ # 7. 底部信息 # ============================================ st.markdown('', unsafe_allow_html=True)