import matplotlib.pyplot as plt import matplotlib matplotlib.use('Agg') # 使用非互動式後端 import numpy as np import pandas as pd import arviz as az import io from PIL import Image from scipy import stats # 設定 Matplotlib 中文字體 plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Microsoft YaHei', 'SimHei', 'Arial Unicode MS', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False def plot_trace_combined(results): """ 繪製完整的 Trace Plot(warmup + posterior) Args: results: 分析結果字典 Returns: PIL Image """ try: trace_data = results['trace_data'] warmup_end = trace_data['warmup_end'] total_draws = results['model_params']['total_draws'] n_chains = results['model_params']['n_chains'] # 準備資料(處理多鏈) d_samples = np.array(trace_data['d']) sigma_samples = np.array(trace_data['sigma']) delta_new_samples = np.array(trace_data['delta_new']) # 如果是多鏈,reshape 成 (n_chains, total_draws) if d_samples.ndim == 1: d_samples = d_samples.reshape(n_chains, -1) sigma_samples = sigma_samples.reshape(n_chains, -1) delta_new_samples = delta_new_samples.reshape(n_chains, -1) # 創建 3x2 的子圖 fig, axes = plt.subplots(3, 2, figsize=(14, 10)) # 定義繪圖參數 params = [ ('d', d_samples, 'd (Log-OR)'), ('sigma', sigma_samples, 'sigma (Between-study SD)'), ('delta_new', delta_new_samples, 'delta_new (Predictive)') ] # 定義顏色 colors = plt.cm.tab10.colors for idx, (param_name, samples, label) in enumerate(params): # 左圖: KDE 密度圖(分別顯示每條鏈) for chain_idx in range(n_chains): chain_color = colors[chain_idx % len(colors)] # 只用該鏈的 posterior 部分 posterior_samples_chain = samples[chain_idx, warmup_end:] density = stats.gaussian_kde(posterior_samples_chain) xs = np.linspace(posterior_samples_chain.min(), posterior_samples_chain.max(), 200) axes[idx, 0].plot(xs, density(xs), color=chain_color, linewidth=2, alpha=0.8, label=f'Chain {chain_idx+1}' if idx == 0 else '') axes[idx, 0].fill_between(xs, density(xs), alpha=0.2, color=chain_color) axes[idx, 0].set_xlabel(label, fontsize=12) axes[idx, 0].set_ylabel('Density', fontsize=12) axes[idx, 0].set_title(f'{label} Posterior Distribution', fontsize=13, fontweight='bold') axes[idx, 0].grid(alpha=0.3) if idx == 0 and n_chains > 1: axes[idx, 0].legend(loc='upper right', fontsize=9) # 右圖: 完整 Trace(warmup + posterior),分別畫每條鏈 x_vals = np.arange(total_draws) for chain_idx in range(n_chains): chain_color = colors[chain_idx % len(colors)] axes[idx, 1].plot(x_vals, samples[chain_idx], color=chain_color, alpha=0.7, linewidth=0.8, label=f'Chain {chain_idx+1}' if idx == 0 else '') # 標記 warmup 結束 ylim = axes[idx, 1].get_ylim() axes[idx, 1].axvline(x=warmup_end, color='red', linestyle='--', linewidth=2.5, alpha=0.8, label='Burn-in end' if idx == 0 else '') axes[idx, 1].text(warmup_end + 50, ylim[0] + (ylim[1]-ylim[0])*0.95, 'Burn-in', color='red', fontsize=10, va='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.8)) axes[idx, 1].set_xlabel('Iteration', fontsize=12) axes[idx, 1].set_ylabel(label, fontsize=12) axes[idx, 1].set_title(f'{label} Trace', fontsize=13, fontweight='bold') axes[idx, 1].grid(alpha=0.3) if idx == 0: axes[idx, 1].legend(loc='upper right', fontsize=9) plt.tight_layout() # 轉換為圖片 buf = io.BytesIO() plt.savefig(buf, format='png', dpi=150, bbox_inches='tight') buf.seek(0) img = Image.open(buf) plt.close() return img except Exception as e: print(f"Error in plot_trace_combined: {e}") import traceback traceback.print_exc() return None def plot_posterior(results): """ 繪製後驗分佈圖 Args: results: 分析結果字典 Returns: PIL Image """ try: trace_data = results['trace_data'] warmup_end = trace_data['warmup_end'] n_chains = results['model_params']['n_chains'] # 準備後驗樣本(去除 warmup,處理多鏈) d_arr = np.array(trace_data['d']) sigma_arr = np.array(trace_data['sigma']) delta_new_arr = np.array(trace_data['delta_new']) or_arr = np.array(trace_data['or']) # 如果是多鏈,reshape 成 (n_chains, total_draws) if d_arr.ndim == 1: d_arr = d_arr.reshape(n_chains, -1) sigma_arr = sigma_arr.reshape(n_chains, -1) delta_new_arr = delta_new_arr.reshape(n_chains, -1) or_arr = or_arr.reshape(n_chains, -1) # 取 posterior 部分並 flatten(合併所有鏈) d_post = d_arr[:, warmup_end:].flatten() sigma_post = sigma_arr[:, warmup_end:].flatten() delta_new_post = delta_new_arr[:, warmup_end:].flatten() or_post = or_arr[:, warmup_end:].flatten() # 創建 2x2 的子圖 fig, axes = plt.subplots(2, 2, figsize=(14, 10)) axes = axes.flatten() params = [ ('d', d_post, 'd (Log-OR)', results['overall']['d_mean']), ('sigma', sigma_post, 'sigma (Between-study SD)', results['overall']['sigma_mean']), ('delta_new', delta_new_post, 'delta_new (Predictive)', results['predictive']['delta_new_mean']), ('or', or_post, 'OR (Odds Ratio)', results['overall']['or_mean']) ] for idx, (param_name, samples, label, mean_val) in enumerate(params): ax = axes[idx] # 計算 HDI hdi_low = np.percentile(samples, 2.5) hdi_high = np.percentile(samples, 97.5) # 繪製密度圖 density = stats.gaussian_kde(samples) xs = np.linspace(samples.min(), samples.max(), 300) ys = density(xs) ax.plot(xs, ys, color='steelblue', linewidth=2) # 填充 HDI 區域 mask = (xs >= hdi_low) & (xs <= hdi_high) ax.fill_between(xs[mask], ys[mask], alpha=0.3, color='steelblue', label='95% HDI') # 標記平均值 ax.axvline(mean_val, color='red', linestyle='--', linewidth=2, label=f'Mean = {mean_val:.3f}') # 設定標籤 ax.set_xlabel(label, fontsize=12) ax.set_ylabel('Density', fontsize=12) ax.set_title(f'{label}', fontsize=13, fontweight='bold') ax.legend(loc='upper right', fontsize=9) ax.grid(alpha=0.3) # 添加 HDI 文字註解 ax.text(0.02, 0.98, f'95% HDI:\n[{hdi_low:.3f}, {hdi_high:.3f}]', transform=ax.transAxes, fontsize=9, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5)) plt.tight_layout() # 轉換為圖片 buf = io.BytesIO() plt.savefig(buf, format='png', dpi=150, bbox_inches='tight') buf.seek(0) img = Image.open(buf) plt.close() return img except Exception as e: print(f"Error in plot_posterior: {e}") return None def plot_forest(results): """ 繪製 Forest Plot(各研究效應) Args: results: 分析結果字典 Returns: PIL Image """ try: by_study = results['by_study'] n_studies = results['n_studies'] treatment_type = results['treatment_type'] control_type = results['control_type'] delta_mean = np.array(by_study['delta_mean']) delta_hdi_low = np.array(by_study['delta_hdi_low']) delta_hdi_high = np.array(by_study['delta_hdi_high']) # 創建圖表 fig, ax = plt.subplots(figsize=(12, max(8, n_studies * 0.3))) y_pos = np.arange(n_studies) # 繪製信賴區間(橫線) for i in range(n_studies): ax.hlines(y_pos[i], delta_hdi_low[i], delta_hdi_high[i], color='steelblue', linewidth=2.5, alpha=0.8) # 繪製平均值(點) ax.scatter(delta_mean, y_pos, color='darkblue', s=100, zorder=3, edgecolors='white', linewidth=1.5, label='Mean Effect') # 標註顯著的點 for i in range(n_studies): if delta_hdi_low[i] > 0: # 顯著正效應 ax.scatter(delta_mean[i], y_pos[i], marker='*', s=300, color='gold', zorder=4, edgecolors='black', linewidth=1) elif delta_hdi_high[i] < 0: # 顯著負效應 ax.scatter(delta_mean[i], y_pos[i], marker='*', s=300, color='red', zorder=4, edgecolors='black', linewidth=1) # 設定軸 ax.set_yticks(y_pos) ax.set_yticklabels([f'Gym {i+1}' for i in range(n_studies)], fontsize=10) ax.invert_yaxis() ax.axvline(0, color='red', linestyle='--', linewidth=2, alpha=0.7, label='No Effect (δ=0)') ax.set_xlabel('Delta (Log Odds Ratio)', fontsize=13, fontweight='bold') ax.set_title(f'Study-specific Treatment Effects\n{treatment_type} vs {control_type}', fontsize=15, fontweight='bold', pad=20) ax.legend(loc='lower right', fontsize=10) ax.grid(axis='x', alpha=0.3) plt.tight_layout() # 轉換為圖片 buf = io.BytesIO() plt.savefig(buf, format='png', dpi=150, bbox_inches='tight') buf.seek(0) img = Image.open(buf) plt.close() return img except Exception as e: print(f"Error in plot_forest: {e}") return None def plot_dag(model, results): """ 繪製 WinBUGS 風格的模型 DAG 圖 將 Binomial 拆成 r[i](觀測勝場)與 n[i](總場數常數) 節點標籤動態帶入實際選擇的屬性名稱 指向 Deterministic 節點的邊以虛線表示 Args: model: PyMC 模型物件(保留介面相容,實際不使用) results: 分析結果字典 Returns: PIL Image 或 None """ try: import graphviz treatment_type = results['treatment_type'] control_type = results['control_type'] n_studies = results['n_studies'] t = treatment_type c = control_type dot = graphviz.Digraph( 'BayesianMetaAnalysis', format='png', graph_attr={ 'rankdir': 'TB', 'fontsize': '14', 'fontname': 'Arial', 'bgcolor': 'white', 'pad': '0.6', 'nodesep': '0.8', 'ranksep': '1.0', 'dpi': '150', }, node_attr={ 'fontname': 'Arial', 'fontsize': '12', }, edge_attr={ 'arrowsize': '0.8', 'color': '#333333', } ) # === 節點樣式 === # 隨機變數 (stochastic) → 橢圓,白底 stochastic = { 'shape': 'ellipse', 'style': 'filled', 'fillcolor': '#FFFFFF', 'color': '#333333', 'penwidth': '2' } # 確定性變數 (deterministic) → 方框,白底 deterministic = { 'shape': 'rect', 'style': 'filled', 'fillcolor': '#FFFFFF', 'color': '#333333', 'penwidth': '2' } # 觀測資料 (observed) → 橢圓,灰底 observed = { 'shape': 'ellipse', 'style': 'filled', 'fillcolor': '#D0D0D0', 'color': '#333333', 'penwidth': '2' } # 常數/資料 (constant) → 方框,灰底 constant = { 'shape': 'rect', 'style': 'filled', 'fillcolor': '#D0D0D0', 'color': '#333333', 'penwidth': '2' } # ========== 第 1 層:超參數 ========== with dot.subgraph() as s: s.attr(rank='same') s.node('tau', 'τ\nGamma', **stochastic) s.node('d', 'd\nNormal', **stochastic) # ========== 第 2 層:衍生超參數 + 預測 ========== with dot.subgraph() as s: s.attr(rank='same') #s.node('sigma', 'σ = 1/√τ', **deterministic) #s.node('delta_new', 'δ_new\nNormal(d, 1/√τ)', **stochastic) s.node('sigma', 'σ = 1/√τ', **deterministic) s.node('delta_new', 'δ_new\nNormal', **stochastic) s.node('or', 'OR = exp(d)', **deterministic) # ========== Plate(研究 i = 1..N) ========== with dot.subgraph(name='cluster_studies') as plate: plate.attr( label=f' i = 1 ... {n_studies} ', labelloc='b', labeljust='r', style='rounded', color='#555555', penwidth='2', fontsize='14', fontname='Arial', margin='20', ) # 第 3 層:研究特定隨機效應 with plate.subgraph() as s: s.attr(rank='same') #s.node('mu_i', 'μ[i]\nNormal(0, 100)', **stochastic) #s.node('delta_i', 'δ[i]\nNormal(d, 1/√τ)', **stochastic) s.node('mu_i', 'μ[i]\nNormal', **stochastic) s.node('delta_i', 'δ[i]\nNormal', **stochastic) # 第 4 層:勝率(確定性)— 帶屬性名稱 with plate.subgraph() as s: s.attr(rank='same') s.node('pc_i', f'p_{c}[i]', **deterministic) s.node('pt_i', f'p_{t}[i]', **deterministic) # 第 5 層:觀測資料 r[i] 和常數 n[i] — 帶屬性名稱 with plate.subgraph() as s: s.attr(rank='same') s.node('nc_i', f'n_{c}[i]', **constant) s.node('rc_i', f'r_{c}[i]', **observed) s.node('rt_i', f'r_{t}[i]', **observed) s.node('nt_i', f'n_{t}[i]', **constant) # ========== 邊(依賴關係) ========== # 虛線邊樣式(指向 Deterministic 節點) dashed = {'style': 'dashed'} # --- 虛線:指向 Deterministic 節點的邊 --- dot.edge('tau', 'sigma', **dashed) # τ → σ dot.edge('d', 'or', **dashed) # d → OR dot.edge('mu_i', 'pc_i', **dashed) # μ[i] → p_c[i] dot.edge('mu_i', 'pt_i', **dashed) # μ[i] → p_t[i] dot.edge('delta_i', 'pt_i', **dashed) # δ[i] → p_t[i] # --- 實線:其餘所有邊 --- dot.edge('d', 'delta_new') dot.edge('tau', 'delta_new') dot.edge('d', 'delta_i') dot.edge('tau', 'delta_i') dot.edge('pc_i', 'rc_i') dot.edge('nc_i', 'rc_i') dot.edge('pt_i', 'rt_i') dot.edge('nt_i', 'rt_i') # 渲染為 PNG → PIL Image png_bytes = dot.pipe(format='png') img = Image.open(io.BytesIO(png_bytes)) return img except Exception as e: print(f"Error in plot_dag: {e}") return None def create_dag_legend_table(results): """ 創建 DAG 中文對照表 Args: results: 分析結果字典 Returns: PIL Image """ try: treatment_type = results['treatment_type'] control_type = results['control_type'] # 設定中文字體(雲端部署用) import matplotlib.font_manager as fm # 嘗試找到可用的中文字體 available_fonts = [f.name for f in fm.fontManager.ttflist] chinese_fonts = ['Noto Sans CJK TC', 'Noto Sans CJK SC', 'Noto Sans TC', 'Noto Sans SC', 'Microsoft JhengHei', 'Microsoft YaHei', 'SimHei', 'WenQuanYi Micro Hei', 'AR PL UMing TW', 'DejaVu Sans'] selected_font = None for font in chinese_fonts: if font in available_fonts: selected_font = font break if selected_font: plt.rcParams['font.family'] = selected_font plt.rcParams['axes.unicode_minus'] = False fig, ax = plt.subplots(figsize=(12, 7)) ax.axis('off') # 準備表格資料 table_data = [ ['節點符號', '統計意義', '寶可夢道館情境'], ['d', '整體平均效果\n(log-odds ratio)', f'{treatment_type}相比{control_type}的平均勝率差異\n(對數尺度)'], ['tau', '精確度參數\n(precision)', '道館間變異的精確度\n(tau越大代表各道館越一致)'], ['sigma', '標準差\n(standard deviation)', '道館間效果的標準差\n(不同道館之間的結果波動幅度)'], ['delta[i]', '研究i的效果\n(study-specific effect)', f'第i間道館內,{treatment_type}相對{control_type}的勝率優勢'], ['delta_new', '預測新研究效果\n(predictive effect)', '預測未來新開的第31間道館的對抗結果'], ['mu[i]', '基線參數\n(baseline logit)', f'第i間道館內,{control_type}的基礎勝率(logit尺度)'], ['OR', '勝算比\n(odds ratio)', f'{treatment_type}相比{control_type}的勝算倍數 (OR=exp(d))'], [f'p_{treatment_type}', f'{treatment_type}勝率\n(win rate)', f'第i間道館內,{treatment_type}寶可夢的勝率'], [f'p_{control_type}', f'{control_type}勝率\n(win rate)', f'第i間道館內,{control_type}寶可夢的勝率'], [f'{treatment_type}_wins', '觀測資料\n(observed data)', f'第i間道館內,{treatment_type}的實際勝場數'], [f'{control_type}_wins', '觀測資料\n(observed data)', f'第i間道館內,{control_type}的實際勝場數'], ] # 創建表格 table = ax.table(cellText=table_data, loc='center', cellLoc='left', colWidths=[0.15, 0.28, 0.57]) table.auto_set_font_size(False) table.set_fontsize(9.5) table.scale(1, 2.8) # 標題列格式 for i in range(3): cell = table[(0, i)] cell.set_facecolor('#4472C4') cell.set_text_props(weight='bold', color='white', fontsize=11) cell.set_height(0.08) # 其他列交替顏色 for i in range(1, len(table_data)): color = '#E7E6E6' if i % 2 == 0 else 'white' for j in range(3): cell = table[(i, j)] cell.set_facecolor(color) cell.set_edgecolor('#CCCCCC') if j == 0: # 第一列(符號)加粗 cell.set_text_props(weight='bold', fontsize=10) # 加上標題和說明 plt.suptitle(f'貝葉斯後設分析模型節點說明', fontsize=16, weight='bold', y=0.98) plt.title(f'比較情境: {treatment_type} (實驗組) vs {control_type} (對照組)', fontsize=12, pad=20) # 加上註解 fig.text(0.5, 0.02, f'註: 灰色底的節點 (如 {treatment_type}_wins, {control_type}_wins) 為觀測資料;白色圓圈節點為隨機變數;方框節點為確定性變數', ha='center', fontsize=9, style='italic', color='gray') plt.tight_layout() # 轉換為圖片 buf = io.BytesIO() plt.savefig(buf, format='png', dpi=200, bbox_inches='tight', facecolor='white') buf.seek(0) img = Image.open(buf) plt.close() return img except Exception as e: print(f"Error in create_dag_legend_table: {e}") return None def create_dag_legend_html(results): """ 創建 DAG 中文對照表(HTML 版本) Args: results: 分析結果字典 Returns: str: HTML 表格字串 """ treatment_type = results['treatment_type'] control_type = results['control_type'] html = f"""

貝葉斯後設分析模型節點說明

比較情境: {treatment_type} (實驗組) vs {control_type} (對照組)

節點符號 統計意義 寶可夢道館情境
d 整體平均效果
(log-odds ratio)
{treatment_type}相比{control_type}的平均勝率差異(對數尺度)
tau 精確度參數
(precision)
道館間變異的精確度(tau越大代表各道館越一致)
sigma 標準差
(standard deviation)
道館間效果的標準差(不同道館之間的結果波動幅度)
delta[i] 研究i的效果
(study-specific effect)
第i間道館內,{treatment_type}相對{control_type}的勝率優勢
delta_new 預測新研究效果
(predictive effect)
預測未來新開的第31間道館的對抗結果
mu[i] 基線參數
(baseline logit)
第i間道館內,{control_type}的基礎勝率(logit尺度)
OR 勝算比
(odds ratio)
{treatment_type}相比{control_type}的勝算倍數 (OR=exp(d))
p_{treatment_type} {treatment_type}勝率
(win rate)
第i間道館內,{treatment_type}寶可夢的勝率
p_{control_type} {control_type}勝率
(win rate)
第i間道館內,{control_type}寶可夢的勝率
{treatment_type}_wins 觀測資料
(observed data)
第i間道館內,{treatment_type}的實際勝場數
{control_type}_wins 觀測資料
(observed data)
第i間道館內,{control_type}的實際勝場數

註: 灰色底的節點 (如 {treatment_type}_wins, {control_type}_wins) 為觀測資料;白色圓圈節點為隨機變數;方框節點為確定性變數

""" return html def format_summary_stats(results): """ 格式化分析結果為文字報告 Args: results: 分析結果字典 Returns: str: 格式化的文字報告 """ overall = results['overall'] pred = results['predictive'] diag = results['diagnostics'] report = f""" ============================================== 貝氏後設分析報告 ============================================== 分析時間: {results['timestamp']} 實驗組: {results['treatment_type']} 對照組: {results['control_type']} 研究數量: {results['n_studies']} 個道館 ---------------------------------------------- 1. 整體效應摘要 (Overall Effect) ---------------------------------------------- d (整體對數勝算比): - 平均值: {overall['d_mean']:.4f} - 標準差: {overall['d_sd']:.4f} - 95% HDI: [{overall['d_hdi_low']:.4f}, {overall['d_hdi_high']:.4f}] 勝算比 (Odds Ratio): - 平均值: {overall['or_mean']:.4f} - 標準差: {overall['or_sd']:.4f} - 95% HDI: [{overall['or_hdi_low']:.4f}, {overall['or_hdi_high']:.4f}] sigma (道館間變異): - 平均值: {overall['sigma_mean']:.4f} - 標準差: {overall['sigma_sd']:.4f} - 95% HDI: [{overall['sigma_hdi_low']:.4f}, {overall['sigma_hdi_high']:.4f}] ---------------------------------------------- 2. 預測新研究效果 (Predictive Effect) ---------------------------------------------- delta_new (預測效應): - 平均值: {pred['delta_new_mean']:.4f} - 標準差: {pred['delta_new_sd']:.4f} - 95% HDI: [{pred['delta_new_hdi_low']:.4f}, {pred['delta_new_hdi_high']:.4f}] 預測勝算比: - 平均值: {pred['or_new_mean']:.4f} - 95% HDI: [{pred['or_new_hdi_low']:.4f}, {pred['or_new_hdi_high']:.4f}] 不確定性增加倍數: {pred['uncertainty_ratio']:.2f}x ---------------------------------------------- 3. 模型收斂診斷 (Diagnostics) ---------------------------------------------- R-hat (d): {f"{diag['rhat_d']:.4f}" if diag['rhat_d'] is not None else 'N/A'} R-hat (sigma): {f"{diag['rhat_sigma']:.4f}" if diag['rhat_sigma'] is not None else 'N/A'} ESS (d): {int(diag['ess_d']) if diag['ess_d'] is not None else 'N/A'} ESS (sigma): {int(diag['ess_sigma']) if diag['ess_sigma'] is not None else 'N/A'} 收斂狀態: {'✓ 已收斂' if diag['converged'] else '✗ 未收斂'} ---------------------------------------------- 4. MCMC 參數設定 ---------------------------------------------- Warmup 樣本數: {results['model_params']['n_warmup']} Posterior 樣本數: {results['model_params']['n_samples']} 鏈數: {results['model_params']['n_chains']} 總樣本數: {results['model_params']['total_draws']} ---------------------------------------------- 5. 結果解釋 ---------------------------------------------- """ # 添加解釋 or_mean = overall['or_mean'] or_low = overall['or_hdi_low'] or_high = overall['or_hdi_high'] if or_low > 1: effect_interp = f"{results['treatment_type']} 相對於 {results['control_type']} 有顯著優勢" elif or_high < 1: effect_interp = f"{results['control_type']} 相對於 {results['treatment_type']} 有顯著優勢" else: effect_interp = f"{results['treatment_type']} 與 {results['control_type']} 無顯著差異" sigma_mean = overall['sigma_mean'] if sigma_mean > 0.5: het_interp = "高異質性 - 不同道館的結果差異很大" elif sigma_mean > 0.3: het_interp = "中等異質性 - 不同道館的結果有一定差異" else: het_interp = "低異質性 - 不同道館的結果相對一致" report += f""" 整體效應: {effect_interp} 異質性: {het_interp} 平均而言,{results['treatment_type']} 獲勝的勝算是 {results['control_type']} 的 {or_mean:.3f} 倍 (95% 可信區間: [{or_low:.3f}, {or_high:.3f}]) ============================================== """ return report