meta_analysis / app_meta_analysis.py
Donlagon007's picture
Update app_meta_analysis.py
8c115e6 verified
import streamlit as st
import pandas as pd
import numpy as np
import uuid
from datetime import datetime
import os
import io
# 頁面配置
st.set_page_config(
page_title="Bayesian Meta-Analysis - Pokémon Type Comparison",
page_icon="🎲",
layout="wide",
initial_sidebar_state="expanded"
)
# 自定義 CSS
st.markdown("""
<style>
.streamlit-expanderHeader {
background-color: #e8f1f8;
border: 1px solid #b0cfe8;
border-radius: 5px;
font-weight: 600;
color: #1b4f72;
}
.streamlit-expanderHeader:hover {
background-color: #d0e7f8;
}
.stMetric {
background-color: #f8fbff;
padding: 10px;
border-radius: 5px;
border: 1px solid #d0e4f5;
}
.stButton > button {
width: 100%;
border-radius: 20px;
font-weight: 600;
transition: all 0.3s ease;
}
.stButton > button:hover {
transform: translateY(-2px);
box-shadow: 0 4px 8px rgba(0,0,0,0.2);
}
.success-box {
background-color: #d4edda;
border: 1px solid #c3e6cb;
border-radius: 5px;
padding: 10px;
margin: 10px 0;
}
.info-box {
background-color: #d1ecf1;
border: 1px solid #bee5eb;
border-radius: 5px;
padding: 10px;
margin: 10px 0;
}
</style>
""", unsafe_allow_html=True)
# 導入自定義模組
from meta_analysis_core import BayesianMetaAnalyzer
from meta_analysis_llm_assistant import MetaAnalysisLLMAssistant
from meta_analysis_utils import (
plot_trace_combined,
plot_posterior,
plot_forest,
plot_dag,
create_dag_legend_table,
format_summary_stats
)
# 初始化 session state
if 'session_id' not in st.session_state:
st.session_state.session_id = str(uuid.uuid4())
if 'analysis_results' not in st.session_state:
st.session_state.analysis_results = None
if 'analyzer' not in st.session_state:
st.session_state.analyzer = None
if 'available_types' not in st.session_state:
st.session_state.available_types = []
if 'df_full' not in st.session_state:
st.session_state.df_full = None
if 'chat_history' not in st.session_state:
st.session_state.chat_history = []
if 'llm_assistant' not in st.session_state:
st.session_state.llm_assistant = None
# 標題
st.title("🎲 Bayesian Meta-Analysis System")
st.markdown("### 貝氏統合分析模型")
st.markdown("---")
# Sidebar
with st.sidebar:
st.header("⚙️ 配置設定")
# API 選擇
st.subheader("🤖 AI 助手設定")
api_choice = st.radio(
"選擇 LLM API",
options=["OpenAI GPT-4o", "Google Gemini", "Anthropic Claude"],
index=0,
help="選擇要使用的 AI 助手"
)
# API Key 輸入
if api_choice == "Google Gemini":
api_key = st.text_input(
"Google Gemini API Key",
type="password",
help="輸入您的 Google Gemini API Key"
)
elif api_choice == "Anthropic Claude":
api_key = st.text_input(
"Anthropic Claude API Key",
type="password",
help="輸入您的 Anthropic API Key (https://console.anthropic.com)"
)
else: # OpenAI GPT-4o
api_key = st.text_input(
"OpenAI API Key",
type="password",
help="輸入您的 OpenAI API Key (https://platform.openai.com)"
)
if api_key:
st.session_state.api_key = api_key
st.session_state.api_choice = api_choice
st.success(f"✅ {api_choice} API Key 已載入")
st.markdown("---")
# 資料來源(自動載入)
st.subheader("📊 資料來源")
# 自動載入資料
DATA_PATH = "all_types_wide_30_completed_sas.csv"
if st.session_state.df_full is None:
try:
df_full = pd.read_csv(DATA_PATH)
st.session_state.df_full = df_full
# 提取可用屬性
type_columns = [col.replace('_win_count', '').replace('_total_battles', '')
for col in df_full.columns if '_win_count' in col]
available_types = sorted(set(type_columns))
st.session_state.available_types = available_types
except FileNotFoundError:
st.error(f"❌ 找不到資料檔案:{DATA_PATH}")
if st.session_state.df_full is not None:
st.success(f"✅ 資料已載入!共 {len(st.session_state.df_full)} 筆記錄")
st.info(f"📋 可用屬性數量: {len(st.session_state.available_types)}")
# MCMC 參數設定
st.subheader("🔬 蒙地卡羅馬可夫鏈 參數")
n_warmup = st.number_input(
"Burn-in 樣本數",
min_value=500,
max_value=5000,
value=1000,
step=100,
help="Burn-in 樣本數"
)
n_samples = st.number_input(
"Sampling 樣本數",
min_value=1000,
max_value=10000,
value=2000,
step=500,
help="Sampling 樣本數"
)
n_chains = st.selectbox(
"鏈數 (Chains)",
options=[1, 2, 4],
index=1,
help="蒙地卡羅馬可夫鏈 數量"
)
target_accept = st.slider(
"目標接受率 (Target Accept)",
min_value=0.80,
max_value=0.99,
value=0.95,
step=0.01,
help="NUTS 採樣器的目標接受率,越高越謹慎但越慢"
)
st.info(f"💡 總迭代數 = {n_warmup} (Warmup) + {n_samples} (Sample) = {n_warmup + n_samples}")
st.markdown("---")
# 關於系統
with st.expander("ℹ️ 關於此系統"):
st.markdown("""
**貝氏後設分析系統**
本系統使用貝氏階層模型進行後設分析,
比較不同寶可夢屬性在多個道館中的對戰表現。
**主要功能:**
- 🎲 貝氏推論與後驗分佈
- 📊 階層模型(借用資訊)
- 📈 5 種視覺化圖表
- 📋 完整統計報告
- 🔍 收斂診斷
**分析特色:**
- 完整的 MCMC trace(warmup + posterior)
- 中文 DAG 節點對照表
- 預測新研究效果
- 異質性評估
""")
# 主要內容區
tab1, tab2, tab3 = st.tabs(["📊 貝氏分析", "📈 視覺化結果", "💬 AI 助手"])
# Tab 1: 貝氏分析
with tab1:
st.header("📊 貝氏統合分析")
if st.session_state.df_full is None:
st.warning("⚠️ 資料載入失敗,請確認資料檔案存在")
else:
# 顯示資料預覽
with st.expander("👀 資料預覽"):
st.dataframe(st.session_state.df_full, use_container_width=True)
st.markdown("---")
# 屬性選擇
st.subheader("🎯 選擇比較屬性")
col1, col2 = st.columns(2)
with col1:
treatment_type = st.selectbox(
"實驗組屬性(Treatment)",
options=["請選擇..."] + st.session_state.available_types,
index=0,
help="選擇要分析的實驗組屬性"
)
with col2:
control_type = st.selectbox(
"對照組屬性(Control)",
options=["請選擇..."] + st.session_state.available_types,
index=0,
help="選擇要分析的對照組屬性"
)
# 驗證選擇
if treatment_type == "請選擇..." or control_type == "請選擇...":
st.warning("⚠️ 請選擇實驗組和對照組屬性")
elif treatment_type == control_type:
st.error("❌ 實驗組和對照組不能相同")
else:
st.success(f"✅ 已選擇: {treatment_type} (實驗組) vs {control_type} (對照組)")
# 顯示資料摘要
st.subheader("📋 資料摘要")
df_full = st.session_state.df_full
treatment_wins = df_full[f'{treatment_type}_win_count'].sum()
treatment_total = df_full[f'{treatment_type}_total_battles'].sum()
control_wins = df_full[f'{control_type}_win_count'].sum()
control_total = df_full[f'{control_type}_total_battles'].sum()
col1, col2, col3 = st.columns(3)
with col1:
st.metric(
"資料筆數",
f"{len(df_full)} 筆",
help="道館(研究單位)的數量"
)
with col2:
treatment_rate = treatment_wins / treatment_total if treatment_total > 0 else 0
st.metric(
f"{treatment_type} 平均勝率",
f"{treatment_rate:.3f}",
help="實驗組的整體勝率"
)
with col3:
control_rate = control_wins / control_total if control_total > 0 else 0
st.metric(
f"{control_type} 平均勝率",
f"{control_rate:.3f}",
help="對照組的整體勝率"
)
st.markdown("---")
# 執行分析按鈕
# 只有在正確選擇屬性時才顯示按鈕
if treatment_type != "請選擇..." and control_type != "請選擇..." and treatment_type != control_type:
if st.button("🔬 開始貝氏分析", type="primary", use_container_width=True):
# 執行分析...
with st.spinner("🔄 正在執行 蒙地卡羅馬可夫鏈 抽樣,請稍候..."):
try:
# 初始化分析器
analyzer = BayesianMetaAnalyzer(
session_id=st.session_state.session_id,
treatment_type=treatment_type,
control_type=control_type
)
# 載入資料
analyzer.load_data(
df_full=df_full,
treatment_type=treatment_type,
control_type=control_type
)
# 執行分析
results = analyzer.run_analysis(
n_warmup=n_warmup,
n_samples=n_samples,
n_chains=n_chains,
target_accept=target_accept
)
# 儲存結果
st.session_state.analysis_results = results
st.session_state.analyzer = analyzer
st.success("✅ 分析完成!請切換到「視覺化結果」頁籤查看圖表")
st.balloons()
except Exception as e:
st.error(f"❌ 分析失敗: {str(e)}")
st.exception(e)
# 顯示結果(如果已執行分析)
if st.session_state.analysis_results is not None:
results = st.session_state.analysis_results
st.markdown("---")
st.subheader("📊 分析結果摘要")
overall = results['overall']
# 使用 metric 卡片顯示
col1, col2, col3 = st.columns(3)
with col1:
st.metric(
label="d (整體效應)",
value=f"{overall['d_mean']:.4f}",
)
st.markdown(f"<span style='color: green; font-size: 14px;'>↑ HDI: [{overall['d_hdi_low']:.3f}, {overall['d_hdi_high']:.3f}]</span>", unsafe_allow_html=True)
with col2:
st.metric(
label="勝算比 (OR)",
value=f"{overall['or_mean']:.3f}",
)
st.markdown(f"<span style='color: green; font-size: 14px;'>↑ HDI: [{overall['or_hdi_low']:.3f}, {overall['or_hdi_high']:.3f}]</span>", unsafe_allow_html=True)
with col3:
st.metric(
label="sigma (異質性)",
value=f"{overall['sigma_mean']:.4f}",
)
st.markdown(f"<span style='color: green; font-size: 14px;'>↑ HDI: [{overall['sigma_hdi_low']:.3f}, {overall['sigma_hdi_high']:.3f}]</span>", unsafe_allow_html=True)
# 異質性解釋
sigma_mean = overall['sigma_mean']
if sigma_mean > 0.5:
st.info("🔴 **高異質性**: 不同道館的結果差異很大")
elif sigma_mean > 0.3:
st.info("🟡 **中等異質性**: 不同道館的結果有一定差異")
else:
st.info("🟢 **低異質性**: 不同道館的結果相對一致")
st.markdown("---")
# 預測效應
st.markdown("#### 🔮 預測新研究效果")
pred = results['predictive']
col1, col2 = st.columns(2)
with col1:
st.metric(
"預測 delta_new",
f"{pred['delta_new_mean']:.4f}",
delta=f"± {pred['delta_new_sd']:.4f}",
help="預測第 31 間道館的效應值"
)
with col2:
st.metric(
"預測 OR",
f"{pred['or_new_mean']:.4f}",
help="預測的勝算比"
)
st.markdown("---")
# 收斂診斷
st.markdown("#### 🔍 模型診斷")
diag = results['diagnostics']
col1, col2, col3 = st.columns(3)
with col1:
rhat_status = "✅ 已收斂" if diag['converged'] else "❌ 未收斂"
st.metric("收斂狀態", rhat_status)
with col2:
if diag['rhat_d'] is not None:
st.metric("R-hat (d)", f"{diag['rhat_d']:.4f}")
with col3:
if diag['ess_d'] is not None:
st.metric("ESS (d)", f"{int(diag['ess_d'])}")
# 下載報告
st.markdown("---")
st.markdown("#### 📥 下載報告")
report_text = format_summary_stats(results)
st.download_button(
label="📄 下載完整分析報告 (.txt)",
data=report_text,
file_name=f"meta_analysis_{treatment_type}_vs_{control_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
mime="text/plain",
use_container_width=True
)
# Tab 2: 視覺化結果
with tab2:
st.header("📈 視覺化結果")
if st.session_state.analysis_results is None:
st.info("ℹ️ 請先在「貝氏分析」頁面執行分析")
else:
results = st.session_state.analysis_results
analyzer = st.session_state.analyzer
# 創建 5 個 tab
viz_tabs = st.tabs([
"🔍 模型結構 (DAG)",
"📋 DAG 對照表",
"📉 Trace Plot",
#"📊 Posterior Plot",
"🌲 Forest Plot"
])
# Tab 1: DAG
with viz_tabs[0]:
st.markdown("### 🔍 模型結構圖 (DAG)")
st.markdown("""
**DAG(Directed Acyclic Graph)** 顯示貝氏階層模型的結構:
- **橢圓節點**:隨機變數(prior 和 likelihood)
- **矩形節點**:觀測資料
- **菱形節點**:確定性變數(由其他變數推導)
- **箭頭**:依賴關係
""")
with st.spinner("正在生成 DAG..."):
try:
dag_img = plot_dag(analyzer.model, results)
if dag_img:
st.image(dag_img, use_column_width=True)
else:
st.warning("⚠️ 無法生成 DAG 圖(可能需要安裝 Graphviz)")
except Exception as e:
st.error(f"生成 DAG 失敗: {str(e)}")
# Tab 2: DAG 對照表
with viz_tabs[1]:
st.markdown("### 📋 DAG 中文對照表")
st.markdown("節點說明表格:解釋 DAG 中每個節點的統計意義和實際情境應用")
from meta_analysis_utils import create_dag_legend_html
dag_html = create_dag_legend_html(results)
st.markdown(dag_html, unsafe_allow_html=True)
# Tab 3: Trace Plot
with viz_tabs[2]:
st.markdown("### 📉 Trace Plot(抽樣軌跡)")
st.markdown("""
**Trace Plot 用途**:
- **左圖**:後驗分佈的密度圖
- **右圖**:完整的 MCMC 軌跡(warmup + posterior)
- **紅色虛線**:標記 burn-in 結束點
**判斷收斂**:
- 軌跡圖應該要穩定震盪
- 不同鏈應該混合良好
- R-hat < 1.1 表示已收斂
""")
with st.spinner("正在生成 Trace Plot..."):
try:
trace_img = plot_trace_combined(results)
if trace_img:
st.image(trace_img, use_column_width=True)
except Exception as e:
st.error(f"生成 Trace Plot 失敗: {str(e)}")
# Tab 4: Posterior Plot
# Tab 5: Forest Plot
#with viz_tabs[4]:
with viz_tabs[3]:
st.markdown("### 🌲 Forest Plot(各道館效應)")
st.markdown("""
**Forest Plot 用途**:
- 顯示每個道館的特定效應(delta)
- **點**:平均效應值
- **橫線**:95% HDI
- **垂直虛線**:零效應參考線
**解讀**:
- HDI 不包含 0 → 該道館有顯著效應
- 點在右側(> 0)→ 實驗組優勢
- 點在左側(< 0)→ 對照組優勢
""")
with st.spinner("正在生成 Forest Plot..."):
try:
forest_img = plot_forest(results)
if forest_img:
st.image(forest_img, use_column_width=True)
except Exception as e:
st.error(f"生成 Forest Plot 失敗: {str(e)}")
# Tab 3: AI 助手
with tab3:
st.header("💬 AI 分析助手")
if not st.session_state.get('api_key'):
st.warning("⚠️ 請在左側輸入您的 API Key 以使用 AI 助手")
st.info("💡 支援 Google Gemini 和 Anthropic Claude")
elif st.session_state.analysis_results is None:
st.info("ℹ️ 請先在「貝氏分析」頁面執行分析")
else:
# 初始化 LLM 助手
if st.session_state.llm_assistant is None:
api_choice = st.session_state.get('api_choice', 'Google Gemini')
st.session_state.llm_assistant = MetaAnalysisLLMAssistant(
api_key=st.session_state.api_key,
session_id=st.session_state.session_id,
api_provider=api_choice
)
# 聊天容器
chat_container = st.container()
with chat_container:
for message in st.session_state.chat_history:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# 使用者輸入
if prompt := st.chat_input("詢問關於分析結果的任何問題..."):
# 添加使用者訊息
st.session_state.chat_history.append({
"role": "user",
"content": prompt
})
with st.chat_message("user"):
st.markdown(prompt)
# AI 回應
with st.chat_message("assistant"):
with st.spinner("思考中..."):
try:
response = st.session_state.llm_assistant.get_response(
user_message=prompt,
analysis_results=st.session_state.analysis_results
)
st.markdown(response)
except Exception as e:
error_msg = f"❌ 錯誤: {str(e)}\n\n請檢查 API key 或重新表達問題。"
st.error(error_msg)
response = error_msg
# 添加助手回應
st.session_state.chat_history.append({
"role": "assistant",
"content": response
})
st.markdown("---")
# 快速問題按鈕
st.subheader("💡 快速問題")
quick_questions = [
"📊 給我這次分析的總結",
"🎯 解釋 d 和勝算比",
"🔍 解釋 sigma(異質性)",
"❓ 什麼是貝氏統合分析?",
"🔮 解釋預測新道館",
"🆚 貝氏 vs 頻率論",
"⚔️ 對戰策略建議",
"🏛️ 比較不同道館"
]
cols = st.columns(4)
for idx, question in enumerate(quick_questions):
col_idx = idx % 4
if cols[col_idx].button(question, key=f"quick_{idx}"):
# 根據問題選擇對應的方法
if "總結" in question:
response = st.session_state.llm_assistant.generate_summary(
st.session_state.analysis_results
)
elif "d 和勝算比" in question:
response = st.session_state.llm_assistant.explain_metric(
'd',
st.session_state.analysis_results
)
elif "sigma" in question or "異質性" in question:
response = st.session_state.llm_assistant.explain_metric(
'sigma',
st.session_state.analysis_results
)
elif "貝氏後設分析" in question:
response = st.session_state.llm_assistant.explain_bayesian_meta_analysis()
elif "預測" in question:
response = st.session_state.llm_assistant.explain_predictive_inference(
st.session_state.analysis_results
)
elif "貝氏" in question and "頻率論" in question:
response = st.session_state.llm_assistant.explain_bayesian_meta_analysis()
elif "策略" in question:
response = st.session_state.llm_assistant.battle_strategy_advice(
st.session_state.analysis_results
)
elif "道館" in question:
response = st.session_state.llm_assistant.compare_gyms(
st.session_state.analysis_results
)
else:
response = st.session_state.llm_assistant.get_response(
question,
st.session_state.analysis_results
)
# 添加到聊天歷史
st.session_state.chat_history.append({
"role": "user",
"content": question
})
st.session_state.chat_history.append({
"role": "assistant",
"content": response
})
st.rerun()
# 重置對話按鈕
st.markdown("---")
if st.button("🔄 重置對話"):
st.session_state.llm_assistant.reset_conversation()
st.session_state.chat_history = []
st.success("✅ 對話已重置")
st.rerun()
# Footer
st.markdown("---")
st.markdown(
f"""
<div style='text-align: center'>
<p>🎲 Bayesian Meta-Analysis System for Pokémon Type Comparison | Built with Streamlit & PyMC</p>
<p>Session ID: {st.session_state.session_id[:8]}</p>
</div>
""",
unsafe_allow_html=True
)