', unsafe_allow_html=True) if current == "CPI排行": st.markdown("### 📊 高校传播效能排行") if 'school' in du.columns: c_data = du['school'].value_counts().reset_index() c_data.columns = ['学校', '指数'] fig = px.bar(c_data, x='学校', y='指数', color_discrete_sequence=['#DE2910'], text_auto=True) fig.update_layout( height=480, showlegend=False, xaxis_title="", yaxis_title="传播指数", plot_bgcolor='white', paper_bgcolor='white', margin=dict(t=20, b=40), font=dict(family="Microsoft YaHei") ) fig.update_traces(marker_line_color='#FFD700', marker_line_width=1.5) st.plotly_chart(fig, use_container_width=True, key="cpi_chart") else: st.info("数据中未找到学校标识列") elif current == "话语提取": st.markdown("### 🗣️ 核心话语体系监测") txt_col = None for c in ds.columns: if any(x in str(c) for x in ['文本', '内容', 'text', '评论', '留言']): txt_col = c break if txt_col is None: txt_col = ds.columns[0] full_txt = "".join(ds[txt_col].astype(str)) kws = ["铸牢共同体意识", "中华民族", "统一", "共同体", "团结"] k_df = pd.DataFrame([{'词汇': k, '频数': len(re.findall(k, full_txt))} for k in kws]) fig = px.bar(k_df, x='词汇', y='频数', color='频数', color_continuous_scale=['#FFB3B3', '#DE2910'], text_auto=True) fig.update_layout( height=480, showlegend=False, xaxis_title="", yaxis_title="出现频次", coloraxis_showscale=False, plot_bgcolor='white', paper_bgcolor='white', margin=dict(t=20, b=40), font=dict(family="Microsoft YaHei") ) st.plotly_chart(fig, use_container_width=True, key="word_chart") elif current == "情感对比": st.markdown("### 🧠 情感极性对比分析") col1, col2 = st.columns(2) with col1: st.markdown("**官方宣教情感分布**") official_data = pd.DataFrame({ 'sentiment': ['积极', '中性', '消极'], 'count': [92, 6, 2] }) fig1 = px.pie(official_data, values='count', names='sentiment', color='sentiment', color_discrete_map=color_map, hole=0.4) fig1.update_layout( height=400, showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=-0.15), plot_bgcolor='white', paper_bgcolor='white', margin=dict(t=10, b=60), font=dict(family="Microsoft YaHei") ) st.plotly_chart(fig1, use_container_width=True, key="official_sentiment") with col2: st.markdown("**社媒公众反馈分布**") label_col = None for c in ds.columns: if any(x in str(c).lower() for x in ['标签', '情感', 'sentiment', '情绪', '评价']): label_col = c break if label_col is None: label_col = ds.columns[-1] s_v = ds[label_col].value_counts().reset_index() s_v.columns = ['sentiment', 'count'] sentiment_mapping = {} for val in s_v['sentiment'].unique(): val_str = str(val).lower() if any(x in val_str for x in ['积极', '正面', '好评', 'pos', '好']): sentiment_mapping[val] = '积极' elif any(x in val_str for x in ['消极', '负面', '差评', 'neg', '差']): sentiment_mapping[val] = '消极' else: sentiment_mapping[val] = '中性' s_v['sentiment'] = s_v['sentiment'].map(sentiment_mapping) s_v = s_v.groupby('sentiment')['count'].sum().reset_index() fig2 = px.pie(s_v, values='count', names='sentiment', color='sentiment', color_discrete_map=color_map, hole=0.4) fig2.update_layout( height=400, showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=-0.15), plot_bgcolor='white', paper_bgcolor='white', margin=dict(t=10, b=60), font=dict(family="Microsoft YaHei") ) st.plotly_chart(fig2, use_container_width=True, key="social_sentiment") elif current == "热度地图": st.markdown("### 🗺️ 全国传播热度映射") if os.path.exists("ditu.jpg"): with open("ditu.jpg", "rb") as f: map_data = f.read() map_b64 = base64.b64encode(map_data).decode() st.markdown(f"""

""", unsafe_allow_html=True) st.caption("📍 全国高校中华民族共同体意识传播热度分布") else: st.markdown("""

🗺️

热度地图预留位置

请在左侧上传地图图片

""", unsafe_allow_html=True) elif current == "优化策略": st.markdown("### 💡 传播优化策略建议") if os.path.exists("data_p.csv"): dp = pd.read_csv("data_p.csv", encoding='utf-8-sig') display_cols = [c for c in dp.columns if any(x in c for x in ['策略', '建议', '内容', '类型', '优先级'])] if display_cols: st.dataframe(dp[display_cols].head(10), use_container_width=True, hide_index=True) else: st.dataframe(dp.head(10), use_container_width=True, hide_index=True) else: # 不显示提示信息，直接显示默认策略 dp = generate_default_strategies() st.dataframe(dp, use_container_width=True, hide_index=True) elif current == "学术语境分析": st.markdown("### 📚 学术语境深度分析") # 检查是否有学术分析数据 academic_data_exists = os.path.exists("data_academic.csv") if academic_data_exists: try: @st.cache_data def load_academic_data(): return pd.read_csv("data_academic.csv", encoding='utf-8-sig') df_academic = load_academic_data() # 数据列识别 text_col = None region_col = None media_col = None keyword_col = None for c in df_academic.columns: c_str = str(c).lower() if any(x in c_str for x in ['文本', '内容', 'text', '表述', '原文']): text_col = c if any(x in c_str for x in ['地区', '地域', '省份', 'region', 'location', 'ip', '属地']): region_col = c if any(x in c_str for x in ['媒体', '平台', '来源', 'media', 'platform', 'source']): media_col = c if any(x in c_str for x in ['关键词', '主题', '词汇', 'keyword', 'topic', '事件']): keyword_col = c # 如果未识别到列，使用默认列 if text_col is None: text_col = df_academic.columns[0] if region_col is None and len(df_academic.columns) > 1: region_col = df_academic.columns[1] if media_col is None and len(df_academic.columns) > 2: media_col = df_academic.columns[2] # 侧边栏分析配置 with st.sidebar: st.markdown('

', unsafe_allow_html=True) # 选择分析类型 analysis_type = st.selectbox( "选择分析维度", ["核心词汇分析", "历史事件分析", "地域差异对比", "媒体表述差异"], key="analysis_type" ) # 核心词汇选择 if analysis_type == "核心词汇分析": target_words = st.multiselect( "选择核心词汇", ["筑牢", "中华民族共同体", "民族团结", "五个认同", "石榴籽", "休戚与共", "荣辱与共"], default=["筑牢", "中华民族共同体"] ) elif analysis_type == "历史事件分析": target_words = st.multiselect( "选择历史事件", ["五胡入华", "五胡乱华", "民族融合", "华夷之辨", "多元一体", "大一统"], default=["五胡入华"] ) else: target_words = st.text_input("输入分析关键词（多个用逗号分隔）", "筑牢,五胡入华") target_words = [w.strip() for w in target_words.split(",")] # Hugging Face模型选择 hf_model = st.selectbox( "选择语义分析模型", ["uer/roberta-base-finetuned-jd-binary-chinese", "distilbert-base-chinese", "bert-base-chinese", "自定义模型"], key="hf_model" ) if hf_model == "自定义模型": custom_model = st.text_input("输入Hugging Face模型ID", "uer/roberta-base-finetuned-jd-binary-chinese") else: custom_model = hf_model st.markdown('

', unsafe_allow_html=True) # 主分析区域 col1, col2 = st.columns([2, 1]) with col1: st.markdown("#### 🎯 语境语义分析") # 模拟Hugging Face模型分析结果（实际使用时需要接入真实API） # 这里使用基于规则的分析来模拟大模型效果 analysis_results = [] for word in target_words: if text_col in df_academic.columns: # 查找包含该词的文本 mask = df_academic[text_col].astype(str).str.contains(word, na=False) related_texts = df_academic[mask] if len(related_texts) > 0: # 分析不同地区的表述 if region_col and region_col in df_academic.columns: region_stats = related_texts[region_col].value_counts().head(5) for region, count in region_stats.items(): # 模拟语义倾向分析 sample_texts = related_texts[related_texts[region_col] == region][text_col].head(3).tolist() sentiment_score = analyze_text_sentiment(sample_texts, word) analysis_results.append({ '关键词': word, '地区': region, '出现频次': count, '语义倾向': sentiment_score['sentiment'], '情感得分': sentiment_score['score'], '典型表述': sentiment_score['example'][:50] + "..." }) if analysis_results: result_df = pd.DataFrame(analysis_results) # 热力图展示 pivot_df = result_df.pivot_table( values='情感得分', index='关键词', columns='地区', aggfunc='mean' ).fillna(0) fig_heatmap = px.imshow( pivot_df, color_continuous_scale=['#DE2910', '#FFB3B3', '#FFD700'], aspect="auto", title="地域-关键词情感倾向热力图" ) fig_heatmap.update_layout( height=400, plot_bgcolor='white', paper_bgcolor='white', font=dict(family="Microsoft YaHei") ) st.plotly_chart(fig_heatmap, use_container_width=True, key="academic_heatmap") # 详细数据表 st.markdown("#### 📊 详细分析数据") st.dataframe( result_df.sort_values('情感得分', ascending=False), use_container_width=True, hide_index=True, column_config={ '情感得分': st.column_config.ProgressColumn( '情感得分', help='语义情感强度', format='%.2f', min_value=0, max_value=1, ) } ) else: st.info("未找到相关分析数据，请确保上传的数据包含关键词匹配的内容") with col2: st.markdown("#### 🧠 大模型洞察") # 模拟大模型分析摘要 if analysis_results: st.markdown("""

📝 表述差异发现

""", unsafe_allow_html=True) # 生成洞察文本 insights = generate_academic_insights(analysis_results, target_words) for insight in insights: st.markdown(f"""

{insight['title']}
{insight['content']}

""", unsafe_allow_html=True) st.markdown("

", unsafe_allow_html=True) # 学术建议 st.markdown("""

💡 学术建议

关注地域表述差异，调整传播策略
监测历史事件表述的情感倾向变化
建立核心词汇的语境使用规范

""", unsafe_allow_html=True) # 媒体差异分析 if media_col and media_col in df_academic.columns: st.markdown("#### 📺 媒体表述差异对比") media_col1, media_col2 = st.columns(2) with media_col1: # 媒体分布 media_dist = df_academic[media_col].value_counts().head(8).reset_index() media_dist.columns = ['媒体类型', '数量'] fig_media = px.pie( media_dist, values='数量', names='媒体类型', color_discrete_sequence=['#DE2910', '#C41E0B', '#FFD700', '#FFA500', '#FFB3B3', '#999999'] ) fig_media.update_layout( height=350, showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=-0.2), plot_bgcolor='white', paper_bgcolor='white' ) st.plotly_chart(fig_media, use_container_width=True, key="media_pie") with media_col2: # 关键词在不同媒体中的使用 if target_words: media_keyword_data = [] for word in target_words[:2]: # 只取前两个避免数据过于稀疏 for media in df_academic[media_col].unique()[:5]: mask = (df_academic[text_col].astype(str).str.contains(word, na=False)) & \ (df_academic[media_col] == media) count = mask.sum() media_keyword_data.append({ '关键词': word, '媒体': media, '提及次数': count }) if media_keyword_data: mk_df = pd.DataFrame(media_keyword_data) fig_mk = px.bar( mk_df, x='媒体', y='提及次数', color='关键词', barmode='group', color_discrete_map={'筑牢': '#DE2910', '五胡入华': '#FFD700'} ) fig_mk.update_layout( height=350, plot_bgcolor='white', paper_bgcolor='white', xaxis_title="", yaxis_title="提及频次" ) st.plotly_chart(fig_mk, use_container_width=True, key="media_keyword_bar") except Exception as e: st.error(f"学术分析数据处理出错: {str(e)}") st.info("请检查数据格式是否正确，或尝试重新上传数据") else: # 使用示例数据展示功能 st.markdown("""

📚

学术语境分析模块

利用Hugging Face大模型分析核心词汇和历史事件的地域/媒体表述差异

支持分析内容：
• 核心词汇："筑牢"、"中华民族共同体"、"五个认同"等
• 历史事件："五胡入华"、"民族融合"、"华夷之辨"等
• 地域差异：不同省份/地区的表述倾向对比
• 媒体差异：知识社区、新闻平台、社交媒体的表述差异

数据格式要求：
请上传包含以下列的CSV/Excel文件：
• 文本内容/表述原文
• 地区/IP属地
• 媒体类型/平台
• 关键词/主题（可选）

""", unsafe_allow_html=True) # 所有分支结束后关闭 content-card st.markdown('

高校中华民族共同体意识传播分析平台

📝 表述差异发现

💡 学术建议

学术语境分析模块

欢迎使用分析平台