Spaces:

Donlagon007
/

meta_analysis

Sleeping

App Files Files Community

meta_analysis / app_meta_analysis.py

Donlagon007

Update app_meta_analysis.py

8c115e6 verified 2 months ago

raw

history blame contribute delete

26.1 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import uuid
	from datetime import datetime
	import os
	import io

	# 頁面配置
	st.set_page_config(
	page_title="Bayesian Meta-Analysis - Pokémon Type Comparison",
	page_icon="🎲",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# 自定義 CSS
	st.markdown("""
	<style>
	.streamlit-expanderHeader {
	background-color: #e8f1f8;
	border: 1px solid #b0cfe8;
	border-radius: 5px;
	font-weight: 600;
	color: #1b4f72;
	}
	.streamlit-expanderHeader:hover {
	background-color: #d0e7f8;
	}
	.stMetric {
	background-color: #f8fbff;
	padding: 10px;
	border-radius: 5px;
	border: 1px solid #d0e4f5;
	}
	.stButton > button {
	width: 100%;
	border-radius: 20px;
	font-weight: 600;
	transition: all 0.3s ease;
	}
	.stButton > button:hover {
	transform: translateY(-2px);
	box-shadow: 0 4px 8px rgba(0,0,0,0.2);
	}
	.success-box {
	background-color: #d4edda;
	border: 1px solid #c3e6cb;
	border-radius: 5px;
	padding: 10px;
	margin: 10px 0;
	}
	.info-box {
	background-color: #d1ecf1;
	border: 1px solid #bee5eb;
	border-radius: 5px;
	padding: 10px;
	margin: 10px 0;
	}
	</style>
	""", unsafe_allow_html=True)

	# 導入自定義模組
	from meta_analysis_core import BayesianMetaAnalyzer
	from meta_analysis_llm_assistant import MetaAnalysisLLMAssistant
	from meta_analysis_utils import (
	plot_trace_combined,
	plot_posterior,
	plot_forest,
	plot_dag,
	create_dag_legend_table,
	format_summary_stats
	)

	# 初始化 session state
	if 'session_id' not in st.session_state:
	st.session_state.session_id = str(uuid.uuid4())
	if 'analysis_results' not in st.session_state:
	st.session_state.analysis_results = None
	if 'analyzer' not in st.session_state:
	st.session_state.analyzer = None
	if 'available_types' not in st.session_state:
	st.session_state.available_types = []
	if 'df_full' not in st.session_state:
	st.session_state.df_full = None
	if 'chat_history' not in st.session_state:
	st.session_state.chat_history = []
	if 'llm_assistant' not in st.session_state:
	st.session_state.llm_assistant = None

	# 標題
	st.title("🎲 Bayesian Meta-Analysis System")
	st.markdown("### 貝氏統合分析模型")
	st.markdown("---")

	# Sidebar
	with st.sidebar:
	st.header("⚙️ 配置設定")

	# API 選擇
	st.subheader("🤖 AI 助手設定")
	api_choice = st.radio(
	"選擇 LLM API",
	options=["OpenAI GPT-4o", "Google Gemini", "Anthropic Claude"],
	index=0,
	help="選擇要使用的 AI 助手"
	)

	# API Key 輸入
	if api_choice == "Google Gemini":
	api_key = st.text_input(
	"Google Gemini API Key",
	type="password",
	help="輸入您的 Google Gemini API Key"
	)
	elif api_choice == "Anthropic Claude":
	api_key = st.text_input(
	"Anthropic Claude API Key",
	type="password",
	help="輸入您的 Anthropic API Key (https://console.anthropic.com)"
	)
	else: # OpenAI GPT-4o
	api_key = st.text_input(
	"OpenAI API Key",
	type="password",
	help="輸入您的 OpenAI API Key (https://platform.openai.com)"
	)

	if api_key:
	st.session_state.api_key = api_key
	st.session_state.api_choice = api_choice
	st.success(f"✅ {api_choice} API Key 已載入")

	st.markdown("---")


	# 資料來源（自動載入）
	st.subheader("📊 資料來源")

	# 自動載入資料
	DATA_PATH = "all_types_wide_30_completed_sas.csv"

	if st.session_state.df_full is None:
	try:
	df_full = pd.read_csv(DATA_PATH)
	st.session_state.df_full = df_full

	# 提取可用屬性
	type_columns = [col.replace('_win_count', '').replace('_total_battles', '')
	for col in df_full.columns if '_win_count' in col]
	available_types = sorted(set(type_columns))
	st.session_state.available_types = available_types
	except FileNotFoundError:
	st.error(f"❌ 找不到資料檔案：{DATA_PATH}")

	if st.session_state.df_full is not None:
	st.success(f"✅ 資料已載入！共 {len(st.session_state.df_full)} 筆記錄")
	st.info(f"📋 可用屬性數量: {len(st.session_state.available_types)}")



	# MCMC 參數設定
	st.subheader("🔬 蒙地卡羅馬可夫鏈參數")

	n_warmup = st.number_input(
	"Burn-in 樣本數",
	min_value=500,
	max_value=5000,
	value=1000,
	step=100,
	help="Burn-in 樣本數"
	)

	n_samples = st.number_input(
	"Sampling 樣本數",
	min_value=1000,
	max_value=10000,
	value=2000,
	step=500,
	help="Sampling 樣本數"
	)

	n_chains = st.selectbox(
	"鏈數 (Chains)",
	options=[1, 2, 4],
	index=1,
	help="蒙地卡羅馬可夫鏈數量"
	)

	target_accept = st.slider(
	"目標接受率 (Target Accept)",
	min_value=0.80,
	max_value=0.99,
	value=0.95,
	step=0.01,
	help="NUTS 採樣器的目標接受率，越高越謹慎但越慢"
	)


	st.info(f"💡 總迭代數 = {n_warmup} (Warmup) + {n_samples} (Sample) = {n_warmup + n_samples}")
	st.markdown("---")

	# 關於系統
	with st.expander("ℹ️ 關於此系統"):
	st.markdown("""
	貝氏後設分析系統

	本系統使用貝氏階層模型進行後設分析，
	比較不同寶可夢屬性在多個道館中的對戰表現。

	主要功能：
	- 🎲 貝氏推論與後驗分佈
	- 📊 階層模型（借用資訊）
	- 📈 5 種視覺化圖表
	- 📋 完整統計報告
	- 🔍 收斂診斷

	分析特色：
	- 完整的 MCMC trace（warmup + posterior）
	- 中文 DAG 節點對照表
	- 預測新研究效果
	- 異質性評估
	""")

	# 主要內容區
	tab1, tab2, tab3 = st.tabs(["📊 貝氏分析", "📈 視覺化結果", "💬 AI 助手"])

	# Tab 1: 貝氏分析
	with tab1:
	st.header("📊 貝氏統合分析")

	if st.session_state.df_full is None:
	st.warning("⚠️ 資料載入失敗，請確認資料檔案存在")
	else:

	# 顯示資料預覽
	with st.expander("👀 資料預覽"):
	st.dataframe(st.session_state.df_full, use_container_width=True)

	st.markdown("---")


	# 屬性選擇
	st.subheader("🎯 選擇比較屬性")

	col1, col2 = st.columns(2)

	with col1:
	treatment_type = st.selectbox(
	"實驗組屬性（Treatment）",
	options=["請選擇..."] + st.session_state.available_types,
	index=0,
	help="選擇要分析的實驗組屬性"
	)

	with col2:
	control_type = st.selectbox(
	"對照組屬性（Control）",
	options=["請選擇..."] + st.session_state.available_types,
	index=0,
	help="選擇要分析的對照組屬性"
	)

	# 驗證選擇
	if treatment_type == "請選擇..." or control_type == "請選擇...":
	st.warning("⚠️ 請選擇實驗組和對照組屬性")
	elif treatment_type == control_type:
	st.error("❌ 實驗組和對照組不能相同")
	else:
	st.success(f"✅ 已選擇: {treatment_type} (實驗組) vs {control_type} (對照組)")


	# 顯示資料摘要
	st.subheader("📋 資料摘要")

	df_full = st.session_state.df_full
	treatment_wins = df_full[f'{treatment_type}_win_count'].sum()
	treatment_total = df_full[f'{treatment_type}_total_battles'].sum()
	control_wins = df_full[f'{control_type}_win_count'].sum()
	control_total = df_full[f'{control_type}_total_battles'].sum()

	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric(
	"資料筆數",
	f"{len(df_full)} 筆",
	help="道館（研究單位）的數量"
	)

	with col2:
	treatment_rate = treatment_wins / treatment_total if treatment_total > 0 else 0
	st.metric(
	f"{treatment_type} 平均勝率",
	f"{treatment_rate:.3f}",
	help="實驗組的整體勝率"
	)

	with col3:
	control_rate = control_wins / control_total if control_total > 0 else 0
	st.metric(
	f"{control_type} 平均勝率",
	f"{control_rate:.3f}",
	help="對照組的整體勝率"
	)

	st.markdown("---")

	# 執行分析按鈕
	# 只有在正確選擇屬性時才顯示按鈕
	if treatment_type != "請選擇..." and control_type != "請選擇..." and treatment_type != control_type:
	if st.button("🔬 開始貝氏分析", type="primary", use_container_width=True):
	# 執行分析...
	with st.spinner("🔄 正在執行蒙地卡羅馬可夫鏈抽樣，請稍候..."):
	try:
	# 初始化分析器
	analyzer = BayesianMetaAnalyzer(
	session_id=st.session_state.session_id,
	treatment_type=treatment_type,
	control_type=control_type
	)

	# 載入資料
	analyzer.load_data(
	df_full=df_full,
	treatment_type=treatment_type,
	control_type=control_type
	)

	# 執行分析
	results = analyzer.run_analysis(
	n_warmup=n_warmup,
	n_samples=n_samples,
	n_chains=n_chains,
	target_accept=target_accept
	)

	# 儲存結果
	st.session_state.analysis_results = results
	st.session_state.analyzer = analyzer

	st.success("✅ 分析完成！請切換到「視覺化結果」頁籤查看圖表")
	st.balloons()

	except Exception as e:
	st.error(f"❌ 分析失敗: {str(e)}")
	st.exception(e)

	# 顯示結果（如果已執行分析）
	if st.session_state.analysis_results is not None:
	results = st.session_state.analysis_results

	st.markdown("---")
	st.subheader("📊 分析結果摘要")

	overall = results['overall']

	# 使用 metric 卡片顯示
	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric(
	label="d (整體效應)",
	value=f"{overall['d_mean']:.4f}",
	)
	st.markdown(f"<span style='color: green; font-size: 14px;'>↑ HDI: [{overall['d_hdi_low']:.3f}, {overall['d_hdi_high']:.3f}]</span>", unsafe_allow_html=True)

	with col2:
	st.metric(
	label="勝算比 (OR)",
	value=f"{overall['or_mean']:.3f}",
	)
	st.markdown(f"<span style='color: green; font-size: 14px;'>↑ HDI: [{overall['or_hdi_low']:.3f}, {overall['or_hdi_high']:.3f}]</span>", unsafe_allow_html=True)

	with col3:
	st.metric(
	label="sigma (異質性)",
	value=f"{overall['sigma_mean']:.4f}",
	)
	st.markdown(f"<span style='color: green; font-size: 14px;'>↑ HDI: [{overall['sigma_hdi_low']:.3f}, {overall['sigma_hdi_high']:.3f}]</span>", unsafe_allow_html=True)

	# 異質性解釋
	sigma_mean = overall['sigma_mean']
	if sigma_mean > 0.5:
	st.info("🔴 高異質性: 不同道館的結果差異很大")
	elif sigma_mean > 0.3:
	st.info("🟡 中等異質性: 不同道館的結果有一定差異")
	else:
	st.info("🟢 低異質性: 不同道館的結果相對一致")

	st.markdown("---")

	# 預測效應
	st.markdown("#### 🔮 預測新研究效果")
	pred = results['predictive']

	col1, col2 = st.columns(2)

	with col1:
	st.metric(
	"預測 delta_new",
	f"{pred['delta_new_mean']:.4f}",
	delta=f"± {pred['delta_new_sd']:.4f}",
	help="預測第 31 間道館的效應值"
	)

	with col2:
	st.metric(
	"預測 OR",
	f"{pred['or_new_mean']:.4f}",
	help="預測的勝算比"
	)


	st.markdown("---")

	# 收斂診斷
	st.markdown("#### 🔍 模型診斷")
	diag = results['diagnostics']

	col1, col2, col3 = st.columns(3)

	with col1:
	rhat_status = "✅ 已收斂" if diag['converged'] else "❌ 未收斂"
	st.metric("收斂狀態", rhat_status)

	with col2:
	if diag['rhat_d'] is not None:
	st.metric("R-hat (d)", f"{diag['rhat_d']:.4f}")

	with col3:
	if diag['ess_d'] is not None:
	st.metric("ESS (d)", f"{int(diag['ess_d'])}")

	# 下載報告
	st.markdown("---")
	st.markdown("#### 📥 下載報告")

	report_text = format_summary_stats(results)

	st.download_button(
	label="📄 下載完整分析報告 (.txt)",
	data=report_text,
	file_name=f"meta_analysis_{treatment_type}_vs_{control_type}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt",
	mime="text/plain",
	use_container_width=True
	)

	# Tab 2: 視覺化結果
	with tab2:
	st.header("📈 視覺化結果")

	if st.session_state.analysis_results is None:
	st.info("ℹ️ 請先在「貝氏分析」頁面執行分析")
	else:
	results = st.session_state.analysis_results
	analyzer = st.session_state.analyzer

	# 創建 5 個 tab
	viz_tabs = st.tabs([
	"🔍 模型結構 (DAG)",
	"📋 DAG 對照表",
	"📉 Trace Plot",
	#"📊 Posterior Plot",
	"🌲 Forest Plot"
	])

	# Tab 1: DAG
	with viz_tabs[0]:
	st.markdown("### 🔍 模型結構圖 (DAG)")
	st.markdown("""
	DAG（Directed Acyclic Graph）顯示貝氏階層模型的結構：
	- 橢圓節點：隨機變數（prior 和 likelihood）
	- 矩形節點：觀測資料
	- 菱形節點：確定性變數（由其他變數推導）
	- 箭頭：依賴關係
	""")

	with st.spinner("正在生成 DAG..."):
	try:
	dag_img = plot_dag(analyzer.model, results)
	if dag_img:
	st.image(dag_img, use_column_width=True)
	else:
	st.warning("⚠️ 無法生成 DAG 圖（可能需要安裝 Graphviz）")
	except Exception as e:
	st.error(f"生成 DAG 失敗: {str(e)}")

	# Tab 2: DAG 對照表
	with viz_tabs[1]:
	st.markdown("### 📋 DAG 中文對照表")
	st.markdown("節點說明表格：解釋 DAG 中每個節點的統計意義和實際情境應用")

	from meta_analysis_utils import create_dag_legend_html
	dag_html = create_dag_legend_html(results)
	st.markdown(dag_html, unsafe_allow_html=True)


	# Tab 3: Trace Plot
	with viz_tabs[2]:
	st.markdown("### 📉 Trace Plot（抽樣軌跡）")
	st.markdown("""
	Trace Plot 用途：
	- 左圖：後驗分佈的密度圖
	- 右圖：完整的 MCMC 軌跡（warmup + posterior）
	- 紅色虛線：標記 burn-in 結束點

	判斷收斂：
	- 軌跡圖應該要穩定震盪
	- 不同鏈應該混合良好
	- R-hat < 1.1 表示已收斂
	""")

	with st.spinner("正在生成 Trace Plot..."):
	try:
	trace_img = plot_trace_combined(results)
	if trace_img:
	st.image(trace_img, use_column_width=True)
	except Exception as e:
	st.error(f"生成 Trace Plot 失敗: {str(e)}")

	# Tab 4: Posterior Plot

	# Tab 5: Forest Plot
	#with viz_tabs[4]:
	with viz_tabs[3]:
	st.markdown("### 🌲 Forest Plot（各道館效應）")
	st.markdown("""
	Forest Plot 用途：
	- 顯示每個道館的特定效應（delta）
	- 點：平均效應值
	- 橫線：95% HDI
	- 垂直虛線：零效應參考線

	解讀：
	- HDI 不包含 0 → 該道館有顯著效應
	- 點在右側（> 0）→ 實驗組優勢
	- 點在左側（< 0）→ 對照組優勢
	""")

	with st.spinner("正在生成 Forest Plot..."):
	try:
	forest_img = plot_forest(results)
	if forest_img:
	st.image(forest_img, use_column_width=True)
	except Exception as e:
	st.error(f"生成 Forest Plot 失敗: {str(e)}")

	# Tab 3: AI 助手
	with tab3:
	st.header("💬 AI 分析助手")

	if not st.session_state.get('api_key'):
	st.warning("⚠️ 請在左側輸入您的 API Key 以使用 AI 助手")
	st.info("💡 支援 Google Gemini 和 Anthropic Claude")
	elif st.session_state.analysis_results is None:
	st.info("ℹ️ 請先在「貝氏分析」頁面執行分析")
	else:
	# 初始化 LLM 助手
	if st.session_state.llm_assistant is None:
	api_choice = st.session_state.get('api_choice', 'Google Gemini')
	st.session_state.llm_assistant = MetaAnalysisLLMAssistant(
	api_key=st.session_state.api_key,
	session_id=st.session_state.session_id,
	api_provider=api_choice
	)

	# 聊天容器
	chat_container = st.container()

	with chat_container:
	for message in st.session_state.chat_history:
	with st.chat_message(message["role"]):
	st.markdown(message["content"])

	# 使用者輸入
	if prompt := st.chat_input("詢問關於分析結果的任何問題..."):
	# 添加使用者訊息
	st.session_state.chat_history.append({
	"role": "user",
	"content": prompt
	})

	with st.chat_message("user"):
	st.markdown(prompt)

	# AI 回應
	with st.chat_message("assistant"):
	with st.spinner("思考中..."):
	try:
	response = st.session_state.llm_assistant.get_response(
	user_message=prompt,
	analysis_results=st.session_state.analysis_results
	)
	st.markdown(response)

	except Exception as e:
	error_msg = f"❌ 錯誤: {str(e)}\n\n請檢查 API key 或重新表達問題。"
	st.error(error_msg)
	response = error_msg

	# 添加助手回應
	st.session_state.chat_history.append({
	"role": "assistant",
	"content": response
	})

	st.markdown("---")

	# 快速問題按鈕
	st.subheader("💡 快速問題")

	quick_questions = [
	"📊 給我這次分析的總結",
	"🎯 解釋 d 和勝算比",
	"🔍 解釋 sigma（異質性）",
	"❓ 什麼是貝氏統合分析？",
	"🔮 解釋預測新道館",
	"🆚 貝氏 vs 頻率論",
	"⚔️ 對戰策略建議",
	"🏛️ 比較不同道館"
	]

	cols = st.columns(4)
	for idx, question in enumerate(quick_questions):
	col_idx = idx % 4
	if cols[col_idx].button(question, key=f"quick_{idx}"):
	# 根據問題選擇對應的方法
	if "總結" in question:
	response = st.session_state.llm_assistant.generate_summary(
	st.session_state.analysis_results
	)
	elif "d 和勝算比" in question:
	response = st.session_state.llm_assistant.explain_metric(
	'd',
	st.session_state.analysis_results
	)
	elif "sigma" in question or "異質性" in question:
	response = st.session_state.llm_assistant.explain_metric(
	'sigma',
	st.session_state.analysis_results
	)
	elif "貝氏後設分析" in question:
	response = st.session_state.llm_assistant.explain_bayesian_meta_analysis()
	elif "預測" in question:
	response = st.session_state.llm_assistant.explain_predictive_inference(
	st.session_state.analysis_results
	)
	elif "貝氏" in question and "頻率論" in question:
	response = st.session_state.llm_assistant.explain_bayesian_meta_analysis()
	elif "策略" in question:
	response = st.session_state.llm_assistant.battle_strategy_advice(
	st.session_state.analysis_results
	)
	elif "道館" in question:
	response = st.session_state.llm_assistant.compare_gyms(
	st.session_state.analysis_results
	)
	else:
	response = st.session_state.llm_assistant.get_response(
	question,
	st.session_state.analysis_results
	)

	# 添加到聊天歷史
	st.session_state.chat_history.append({
	"role": "user",
	"content": question
	})

	st.session_state.chat_history.append({
	"role": "assistant",
	"content": response
	})

	st.rerun()

	# 重置對話按鈕
	st.markdown("---")
	if st.button("🔄 重置對話"):
	st.session_state.llm_assistant.reset_conversation()
	st.session_state.chat_history = []
	st.success("✅ 對話已重置")
	st.rerun()

	# Footer
	st.markdown("---")
	st.markdown(
	f"""
	<div style='text-align: center'>
	<p>🎲 Bayesian Meta-Analysis System for Pokémon Type Comparison \| Built with Streamlit & PyMC</p>
	<p>Session ID: {st.session_state.session_id[:8]}</p>
	</div>
	""",
	unsafe_allow_html=True
	)