import streamlit as st
import pandas as pd
import plotly.express as px
import os
import re
import io
import base64
# ============================================
# 0. 页面配置与CSS
# ============================================
st.set_page_config(
page_title="高校中华民族共同体分析平台",
layout="wide",
initial_sidebar_state="expanded"
)
st.markdown("""
""", unsafe_allow_html=True)
# ============================================
# 1. 顶部标题栏
# ============================================
def render_header():
badge_left_html = ""
badge_right_html = ""
if os.path.exists("xiaohui.jpg.jpg"):
with open("xiaohui.jpg.jpg", "rb") as f:
badge_left_b64 = base64.b64encode(f.read()).decode()
badge_left_html = f'
', unsafe_allow_html=True)
nav_cols = st.columns(len(tabs)) # 动态适应标签数量
for i, (tab, icon) in enumerate(zip(tabs, tab_icons)):
with nav_cols[i]:
is_active = st.session_state.current_tab == tab
btn_type = "primary" if is_active else "secondary"
if st.button(f"{icon} {tab}", key=f"nav_{tab}", use_container_width=True, type=btn_type):
st.session_state.current_tab = tab
st.rerun()
st.markdown('
', unsafe_allow_html=True)
# ============================================
# 3. 登录与状态管理
# ============================================
if 'auth' not in st.session_state:
st.session_state.auth = False
if 'ask' not in st.session_state:
st.session_state.ask = False
login_col = st.columns([12, 1])[1]
with login_col:
if st.session_state.auth:
if st.button("🚪 退出", key="logout_btn"):
st.session_state.auth = False
st.rerun()
else:
if st.button("🔐 管理", key="login_btn"):
st.session_state.ask = True
if st.session_state.get('ask') and not st.session_state.auth:
with st.form("login_form"):
pwd = st.text_input("请输入管理口令", type="password")
if st.form_submit_button("进入系统"):
if pwd == "0166":
st.session_state.auth = True
st.session_state.ask = False
st.rerun()
# ============================================
# 4. 辅助函数
# ============================================
@st.cache_data
def read_data_file_cached(file_bytes, file_name):
try:
fname = file_name.lower()
if fname.endswith('.csv'):
for encoding in ['utf-8-sig', 'utf-8', 'gbk', 'gb2312']:
try:
return pd.read_csv(io.BytesIO(file_bytes), encoding=encoding)
except:
continue
return pd.read_csv(io.BytesIO(file_bytes), encoding='utf-8', errors='ignore')
elif fname.endswith(('.xlsx', '.xls')):
return pd.read_excel(io.BytesIO(file_bytes), engine='openpyxl')
elif fname.endswith('.docx'):
try:
from docx import Document
doc = Document(io.BytesIO(file_bytes))
text_data = []
for para in doc.paragraphs:
if para.text.strip():
text_data.append({'内容': para.text.strip()})
for table in doc.tables:
for row in table.rows:
row_data = [cell.text for cell in row.cells]
if len(row_data) >= 2:
text_data.append({'标题': row_data[0], '内容': row_data[1]})
return pd.DataFrame(text_data)
except ImportError:
return None
return None
except Exception as e:
return None
def read_data_file(file):
file_bytes = file.read()
file.seek(0)
return read_data_file_cached(file_bytes, file.name)
# ---------- 新增:学术分析辅助函数 ----------
def analyze_text_sentiment(texts, keyword):
import random
if not texts:
return {'sentiment': '中性', 'score': 0.5, 'example': ''}
sample = str(texts[0]) if texts else ""
positive_keywords = ['铸牢', '认同', '团结', '融合', '共同体', '凝聚', '进步', '发展']
negative_keywords = ['破坏', '分裂', '歧视', '冲突', '对立', '矛盾']
score = 0.5
sentiment = '中性'
text_lower = sample.lower()
pos_count = sum(1 for p in positive_keywords if p in text_lower)
neg_count = sum(1 for n in negative_keywords if n in text_lower)
if pos_count > neg_count:
score = 0.5 + min(0.5, pos_count * 0.1)
sentiment = '积极'
elif neg_count > pos_count:
score = max(0.0, 0.5 - neg_count * 0.1)
sentiment = '消极'
else:
score = 0.5 + random.uniform(-0.1, 0.1)
sentiment = '中性'
return {'sentiment': sentiment, 'score': round(score, 2), 'example': sample[:100]}
def generate_academic_insights(analysis_results, target_words):
insights = []
regions = set([r['地区'] for r in analysis_results if '地区' in r])
if len(regions) > 1:
insights.append({'title': '🌍 地域表述差异', 'content': f'发现{len(regions)}个不同地区的表述差异,建议针对性地调整传播策略。'})
sentiments = [r.get('语义倾向', '中性') for r in analysis_results]
pos_ratio = sentiments.count('积极') / len(sentiments) if sentiments else 0
if pos_ratio > 0.7:
insights.append({'title': '📈 积极语义主导', 'content': f'{pos_ratio*100:.0f}%的表述呈现积极语义倾向,整体舆论环境良好。'})
elif pos_ratio < 0.3:
insights.append({'title': '⚠️ 消极语义警示', 'content': f'消极语义占比{(1-pos_ratio)*100:.0f}%,需关注潜在风险点。'})
word_coverage = len(set([r['关键词'] for r in analysis_results]))
insights.append({'title': '🎯 关键词覆盖', 'content': f'成功识别{word_coverage}个核心词汇的语境使用模式。'})
return insights
# ---------- 结束 ----------
# ============================================
# 5. 侧边栏
# ============================================
if st.session_state.auth:
with st.sidebar:
st.markdown('', unsafe_allow_html=True)
if current == "CPI排行":
st.markdown("### 📊 高校传播效能排行")
if 'school' in du.columns:
c_data = du['school'].value_counts().reset_index()
c_data.columns = ['学校', '指数']
fig = px.bar(c_data, x='学校', y='指数',
color_discrete_sequence=['#DE2910'],
text_auto=True)
fig.update_layout(
height=480,
showlegend=False,
xaxis_title="",
yaxis_title="传播指数",
plot_bgcolor='white',
paper_bgcolor='white',
margin=dict(t=20, b=40),
font=dict(family="Microsoft YaHei")
)
fig.update_traces(marker_line_color='#FFD700', marker_line_width=1.5)
st.plotly_chart(fig, use_container_width=True, key="cpi_chart")
else:
st.info("数据中未找到学校标识列")
elif current == "话语提取":
st.markdown("### 🗣️ 核心话语体系监测")
txt_col = None
for c in ds.columns:
if any(x in str(c) for x in ['文本', '内容', 'text', '评论', '留言']):
txt_col = c
break
if txt_col is None:
txt_col = ds.columns[0]
full_txt = "".join(ds[txt_col].astype(str))
kws = ["铸牢共同体意识", "中华民族", "统一", "共同体", "团结"]
k_df = pd.DataFrame([{'词汇': k, '频数': len(re.findall(k, full_txt))} for k in kws])
fig = px.bar(k_df, x='词汇', y='频数',
color='频数',
color_continuous_scale=['#FFB3B3', '#DE2910'],
text_auto=True)
fig.update_layout(
height=480,
showlegend=False,
xaxis_title="",
yaxis_title="出现频次",
coloraxis_showscale=False,
plot_bgcolor='white',
paper_bgcolor='white',
margin=dict(t=20, b=40),
font=dict(family="Microsoft YaHei")
)
st.plotly_chart(fig, use_container_width=True, key="word_chart")
elif current == "情感对比":
st.markdown("### 🧠 情感极性对比分析")
col1, col2 = st.columns(2)
with col1:
st.markdown("**官方宣教情感分布**")
official_data = pd.DataFrame({
'sentiment': ['积极', '中性', '消极'],
'count': [92, 6, 2]
})
fig1 = px.pie(official_data, values='count', names='sentiment',
color='sentiment',
color_discrete_map=color_map,
hole=0.4)
fig1.update_layout(
height=400,
showlegend=True,
legend=dict(orientation="h", yanchor="bottom", y=-0.15),
plot_bgcolor='white',
paper_bgcolor='white',
margin=dict(t=10, b=60),
font=dict(family="Microsoft YaHei")
)
st.plotly_chart(fig1, use_container_width=True, key="official_sentiment")
with col2:
st.markdown("**社媒公众反馈分布**")
label_col = None
for c in ds.columns:
if any(x in str(c).lower() for x in ['标签', '情感', 'sentiment', '情绪', '评价']):
label_col = c
break
if label_col is None:
label_col = ds.columns[-1]
s_v = ds[label_col].value_counts().reset_index()
s_v.columns = ['sentiment', 'count']
sentiment_mapping = {}
for val in s_v['sentiment'].unique():
val_str = str(val).lower()
if any(x in val_str for x in ['积极', '正面', '好评', 'pos', '好']):
sentiment_mapping[val] = '积极'
elif any(x in val_str for x in ['消极', '负面', '差评', 'neg', '差']):
sentiment_mapping[val] = '消极'
else:
sentiment_mapping[val] = '中性'
s_v['sentiment'] = s_v['sentiment'].map(sentiment_mapping)
s_v = s_v.groupby('sentiment')['count'].sum().reset_index()
fig2 = px.pie(s_v, values='count', names='sentiment',
color='sentiment',
color_discrete_map=color_map,
hole=0.4)
fig2.update_layout(
height=400,
showlegend=True,
legend=dict(orientation="h", yanchor="bottom", y=-0.15),
plot_bgcolor='white',
paper_bgcolor='white',
margin=dict(t=10, b=60),
font=dict(family="Microsoft YaHei")
)
st.plotly_chart(fig2, use_container_width=True, key="social_sentiment")
elif current == "热度地图":
st.markdown("### 🗺️ 全国传播热度映射")
if os.path.exists("ditu.jpg"):
with open("ditu.jpg", "rb") as f:
map_data = f.read()
map_b64 = base64.b64encode(map_data).decode()
st.markdown(f"""
""", unsafe_allow_html=True)
st.caption("📍 全国高校中华民族共同体意识传播热度分布")
else:
st.markdown("""
""", unsafe_allow_html=True)
elif current == "优化策略":
st.markdown("### 💡 传播优化策略建议")
if os.path.exists("data_p.csv"):
dp = pd.read_csv("data_p.csv", encoding='utf-8-sig')
display_cols = [c for c in dp.columns if any(x in c for x in ['策略', '建议', '内容', '类型', '优先级'])]
if display_cols:
st.dataframe(dp[display_cols].head(10), use_container_width=True, hide_index=True)
else:
st.dataframe(dp.head(10), use_container_width=True, hide_index=True)
else:
# 不显示提示信息,直接显示默认策略
dp = generate_default_strategies()
st.dataframe(dp, use_container_width=True, hide_index=True)
elif current == "学术语境分析":
st.markdown("### 📚 学术语境深度分析")
# 检查是否有学术分析数据
academic_data_exists = os.path.exists("data_academic.csv")
if academic_data_exists:
try:
@st.cache_data
def load_academic_data():
return pd.read_csv("data_academic.csv", encoding='utf-8-sig')
df_academic = load_academic_data()
# 数据列识别
text_col = None
region_col = None
media_col = None
keyword_col = None
for c in df_academic.columns:
c_str = str(c).lower()
if any(x in c_str for x in ['文本', '内容', 'text', '表述', '原文']):
text_col = c
if any(x in c_str for x in ['地区', '地域', '省份', 'region', 'location', 'ip', '属地']):
region_col = c
if any(x in c_str for x in ['媒体', '平台', '来源', 'media', 'platform', 'source']):
media_col = c
if any(x in c_str for x in ['关键词', '主题', '词汇', 'keyword', 'topic', '事件']):
keyword_col = c
# 如果未识别到列,使用默认列
if text_col is None:
text_col = df_academic.columns[0]
if region_col is None and len(df_academic.columns) > 1:
region_col = df_academic.columns[1]
if media_col is None and len(df_academic.columns) > 2:
media_col = df_academic.columns[2]
# 侧边栏分析配置
with st.sidebar:
st.markdown('', unsafe_allow_html=True)
# 主分析区域
col1, col2 = st.columns([2, 1])
with col1:
st.markdown("#### 🎯 语境语义分析")
# 模拟Hugging Face模型分析结果(实际使用时需要接入真实API)
# 这里使用基于规则的分析来模拟大模型效果
analysis_results = []
for word in target_words:
if text_col in df_academic.columns:
# 查找包含该词的文本
mask = df_academic[text_col].astype(str).str.contains(word, na=False)
related_texts = df_academic[mask]
if len(related_texts) > 0:
# 分析不同地区的表述
if region_col and region_col in df_academic.columns:
region_stats = related_texts[region_col].value_counts().head(5)
for region, count in region_stats.items():
# 模拟语义倾向分析
sample_texts = related_texts[related_texts[region_col] == region][text_col].head(3).tolist()
sentiment_score = analyze_text_sentiment(sample_texts, word)
analysis_results.append({
'关键词': word,
'地区': region,
'出现频次': count,
'语义倾向': sentiment_score['sentiment'],
'情感得分': sentiment_score['score'],
'典型表述': sentiment_score['example'][:50] + "..."
})
if analysis_results:
result_df = pd.DataFrame(analysis_results)
# 热力图展示
pivot_df = result_df.pivot_table(
values='情感得分',
index='关键词',
columns='地区',
aggfunc='mean'
).fillna(0)
fig_heatmap = px.imshow(
pivot_df,
color_continuous_scale=['#DE2910', '#FFB3B3', '#FFD700'],
aspect="auto",
title="地域-关键词情感倾向热力图"
)
fig_heatmap.update_layout(
height=400,
plot_bgcolor='white',
paper_bgcolor='white',
font=dict(family="Microsoft YaHei")
)
st.plotly_chart(fig_heatmap, use_container_width=True, key="academic_heatmap")
# 详细数据表
st.markdown("#### 📊 详细分析数据")
st.dataframe(
result_df.sort_values('情感得分', ascending=False),
use_container_width=True,
hide_index=True,
column_config={
'情感得分': st.column_config.ProgressColumn(
'情感得分',
help='语义情感强度',
format='%.2f',
min_value=0,
max_value=1,
)
}
)
else:
st.info("未找到相关分析数据,请确保上传的数据包含关键词匹配的内容")
with col2:
st.markdown("#### 🧠 大模型洞察")
# 模拟大模型分析摘要
if analysis_results:
st.markdown("""
📝 表述差异发现
""", unsafe_allow_html=True)
# 生成洞察文本
insights = generate_academic_insights(analysis_results, target_words)
for insight in insights:
st.markdown(f"""
{insight['title']}
{insight['content']}
""", unsafe_allow_html=True)
st.markdown("
", unsafe_allow_html=True)
# 学术建议
st.markdown("""
💡 学术建议
- 关注地域表述差异,调整传播策略
- 监测历史事件表述的情感倾向变化
- 建立核心词汇的语境使用规范
""", unsafe_allow_html=True)
# 媒体差异分析
if media_col and media_col in df_academic.columns:
st.markdown("#### 📺 媒体表述差异对比")
media_col1, media_col2 = st.columns(2)
with media_col1:
# 媒体分布
media_dist = df_academic[media_col].value_counts().head(8).reset_index()
media_dist.columns = ['媒体类型', '数量']
fig_media = px.pie(
media_dist,
values='数量',
names='媒体类型',
color_discrete_sequence=['#DE2910', '#C41E0B', '#FFD700', '#FFA500', '#FFB3B3', '#999999']
)
fig_media.update_layout(
height=350,
showlegend=True,
legend=dict(orientation="h", yanchor="bottom", y=-0.2),
plot_bgcolor='white',
paper_bgcolor='white'
)
st.plotly_chart(fig_media, use_container_width=True, key="media_pie")
with media_col2:
# 关键词在不同媒体中的使用
if target_words:
media_keyword_data = []
for word in target_words[:2]: # 只取前两个避免数据过于稀疏
for media in df_academic[media_col].unique()[:5]:
mask = (df_academic[text_col].astype(str).str.contains(word, na=False)) & \
(df_academic[media_col] == media)
count = mask.sum()
media_keyword_data.append({
'关键词': word,
'媒体': media,
'提及次数': count
})
if media_keyword_data:
mk_df = pd.DataFrame(media_keyword_data)
fig_mk = px.bar(
mk_df,
x='媒体',
y='提及次数',
color='关键词',
barmode='group',
color_discrete_map={'筑牢': '#DE2910', '五胡入华': '#FFD700'}
)
fig_mk.update_layout(
height=350,
plot_bgcolor='white',
paper_bgcolor='white',
xaxis_title="",
yaxis_title="提及频次"
)
st.plotly_chart(fig_mk, use_container_width=True, key="media_keyword_bar")
except Exception as e:
st.error(f"学术分析数据处理出错: {str(e)}")
st.info("请检查数据格式是否正确,或尝试重新上传数据")
else:
# 使用示例数据展示功能
st.markdown("""
📚
学术语境分析模块
利用Hugging Face大模型分析核心词汇和历史事件的地域/媒体表述差异
支持分析内容:
• 核心词汇:"筑牢"、"中华民族共同体"、"五个认同"等
• 历史事件:"五胡入华"、"民族融合"、"华夷之辨"等
• 地域差异:不同省份/地区的表述倾向对比
• 媒体差异:知识社区、新闻平台、社交媒体的表述差异
数据格式要求:
请上传包含以下列的CSV/Excel文件:
• 文本内容/表述原文
• 地区/IP属地
• 媒体类型/平台
• 关键词/主题(可选)
""", unsafe_allow_html=True)
# 所有分支结束后关闭 content-card
st.markdown('
', unsafe_allow_html=True)
except Exception as e:
st.error(f"数据处理出错: {str(e)}")
st.info("请尝试重新上传数据")
else:
# 欢迎页面
st.markdown("""