mcnemar / mcnemar_utils.py
Wen1201's picture
Upload mcnemar_utils.py
d09e48e verified
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import numpy as np
def plot_contingency_table_heatmap(ct_labeled, feature_label, title="列聯表熱力圖"):
"""
繪製列聯表熱力圖
Args:
ct_labeled: 帶標籤的列聯表 DataFrame
feature_label: 特徵標籤
title: 圖表標題
Returns:
plotly figure
"""
# 移除總數列和行
ct_display = ct_labeled.iloc[:-1, :-1].copy()
# 創建註解文字
annotations = []
for i, row in enumerate(ct_display.index):
for j, col in enumerate(ct_display.columns):
annotations.append(
dict(
x=j,
y=i,
text=str(ct_display.iloc[i, j]),
font=dict(size=16, color='white' if ct_display.iloc[i, j] > ct_display.values.max()/2 else 'black'),
showarrow=False
)
)
fig = go.Figure(data=go.Heatmap(
z=ct_display.values,
x=ct_display.columns,
y=ct_display.index,
colorscale='Blues',
showscale=True,
hoverongaps=False,
hovertemplate='%{y}<br>%{x}<br>配對數: %{z}<extra></extra>'
))
fig.update_layout(
title=f'{title}<br><sub>{feature_label}</sub>',
xaxis_title='敗方 (Loser)',
yaxis_title='勝方 (Winner)',
width=600,
height=500,
template='plotly_white',
annotations=annotations
)
return fig
def plot_odds_ratio_forest(or_value, ci_low, ci_high, feature_label):
"""
繪製勝算比森林圖(線性刻度版本)
Args:
or_value: 勝算比
ci_low: 95% 信賴區間下界
ci_high: 95% 信賴區間上界
feature_label: 特徵標籤
Returns:
plotly figure
"""
fig = go.Figure()
# 動態計算 x 軸範圍
x_max = max(ci_high * 1.2, 3.5) # 至少到 3.5
x_min = 0
# 參考線 (OR = 1)
fig.add_shape(
type="line",
x0=1, x1=1,
y0=-0.3, y1=0.3,
line=dict(color="red", width=2, dash="dash"),
)
# 信賴區間(水平線)
fig.add_trace(go.Scatter(
x=[ci_low, ci_high],
y=[0, 0],
mode='lines',
line=dict(color='#2d6ca2', width=2),
showlegend=False,
hoverinfo='skip'
))
# 信賴區間端點(小豎線)
for x_pos in [ci_low, ci_high]:
fig.add_shape(
type="line",
x0=x_pos, x1=x_pos,
y0=-0.05, y1=0.05,
line=dict(color='#2d6ca2', width=2)
)
# 點估計(藍色圓點)
fig.add_trace(go.Scatter(
x=[or_value],
y=[0],
mode='markers',
marker=dict(
size=12,
color='#1f77b4',
line=dict(color='white', width=1.5)
),
showlegend=False,
hovertemplate=f'<b>OR: {or_value:.3f}</b><br>95% CI: [{ci_low:.3f}, {ci_high:.3f}]<extra></extra>'
))
# 添加數值標註框(右上角)
fig.add_annotation(
x=ci_high + (x_max - ci_high) * 0.3,
y=0,
text=f"OR = {or_value:.3f}<br>95% CI [{ci_low:.3f}, {ci_high:.3f}]",
showarrow=True,
arrowhead=2,
arrowsize=1,
arrowwidth=1.5,
arrowcolor='#2d6ca2',
ax=50,
ay=0,
font=dict(size=11, color='#1b4f72'),
bgcolor='rgba(255,255,255,0.95)',
bordercolor='#2d6ca2',
borderwidth=1.5,
borderpad=6
)
fig.update_layout(
title=dict(
text=f'勝算比 (Odds Ratio)<br><sub style="font-size:11px;">{feature_label}</sub>',
x=0.5,
xanchor='center'
),
xaxis_title='Odds Ratio',
yaxis=dict(
showticklabels=False,
showgrid=False,
zeroline=False,
range=[-0.35, 0.35]
),
width=800,
height=280,
template='plotly_white',
xaxis=dict(
type='linear', # 改用線性刻度
showgrid=True,
gridcolor='rgba(200,200,200,0.3)',
tickmode='linear',
tick0=0,
dtick=0.5, # 每 0.5 一個刻度
range=[x_min, x_max],
showline=True,
linewidth=1,
linecolor='black',
mirror=True
),
hovermode='closest',
plot_bgcolor='white',
margin=dict(l=60, r=60, t=80, b=60)
)
return fig
def plot_discordant_pairs(b, c, label_pos, label_neg):
"""
繪製不一致配對比較圖
Args:
b: cs=1 & cn=0 的配對數
c: cs=0 & cn=1 的配對數
label_pos: 正向標籤
label_neg: 負向標籤
Returns:
plotly figure
"""
fig = go.Figure()
categories = [
f'勝方 {label_pos.split()[1]}<br>敗方 {label_neg.split()[1]}',
f'勝方 {label_neg.split()[1]}<br>敗方 {label_pos.split()[1]}'
]
values = [b, c]
colors = ['#2d6ca2', '#d62728']
fig.add_trace(go.Bar(
x=categories,
y=values,
marker=dict(
color=colors,
line=dict(color='white', width=2)
),
text=values,
textposition='outside',
textfont=dict(size=16, color='black'),
hovertemplate='%{x}<br>配對數: %{y}<extra></extra>'
))
fig.update_layout(
title='不一致配對分布',
xaxis_title='配對類型',
yaxis_title='配對數量',
width=600,
height=400,
template='plotly_white',
showlegend=False
)
return fig
def plot_p_value_significance(p_value):
"""
繪製 p 值顯著性指示圖
Args:
p_value: p 值
Returns:
plotly figure
"""
fig = go.Figure()
# 設定顯著性閾值
thresholds = [0.001, 0.01, 0.05, 1.0]
labels = ['p < 0.001<br>(極顯著)', 'p < 0.01<br>(非常顯著)',
'p < 0.05<br>(顯著)', 'p ≥ 0.05<br>(不顯著)']
colors = ['#1a5f1a', '#2d8b2d', '#5cb85c', '#d9534f']
# 找出 p 值所在區間
current_idx = 0
for i, thresh in enumerate(thresholds):
if p_value < thresh:
current_idx = i
break
# 繪製區間條
for i in range(len(thresholds)):
opacity = 1.0 if i == current_idx else 0.3
fig.add_trace(go.Bar(
x=[labels[i]],
y=[1],
marker=dict(color=colors[i], opacity=opacity),
showlegend=False,
hovertemplate=f'{labels[i]}<extra></extra>'
))
# 添加 p 值標註
fig.add_annotation(
x=labels[current_idx],
y=1.1,
text=f"<b>p = {p_value:.4f}</b>",
showarrow=True,
arrowhead=2,
arrowsize=1,
arrowwidth=2,
arrowcolor='black',
font=dict(size=14, color='black'),
bgcolor='yellow',
bordercolor='black',
borderwidth=2,
borderpad=4
)
fig.update_layout(
title='顯著性水準',
xaxis_title='',
yaxis_title='',
yaxis=dict(showticklabels=False, showgrid=False),
width=700,
height=300,
template='plotly_white',
showlegend=False
)
return fig
def create_results_summary_table(results):
"""
創建結果摘要表格
Args:
results: 分析結果字典
Returns:
pandas DataFrame
"""
summary_data = {
'項目': [
'特徵',
'McNemar 統計量',
'p 值',
'顯著性',
'勝算比 (OR)',
'95% 信賴區間',
'不一致配對數',
'效果大小'
],
'數值': [
results['feature_label'],
f"{results['mcnemar_statistic']:.4f}",
f"{results['p_value']:.4f}",
results['interpretation']['significance'],
f"{results['odds_ratio']:.3f}",
f"[{results['ci_low']:.3f}, {results['ci_high']:.3f}]",
f"{results['discordant_n']} (b={results['discordant_b']}, c={results['discordant_c']})",
results['interpretation']['effect_size']
]
}
return pd.DataFrame(summary_data)
def export_results_to_text(results):
"""
匯出結果為純文字格式
Args:
results: 分析結果字典
Returns:
str: 格式化的文字報告
"""
report = f"""
==============================================
McNemar 檢定分析報告
==============================================
分析特徵: {results['feature_label']} ({results['feature_name']})
分析時間: {results['timestamp']}
----------------------------------------------
1. 列聯表
----------------------------------------------
{results['contingency_table_labeled'].to_string()}
----------------------------------------------
2. McNemar 檢定結果
----------------------------------------------
McNemar 統計量: {results['mcnemar_statistic']:.4f}
p 值: {results['p_value']:.4f}
顯著性: {results['interpretation']['significance']}
----------------------------------------------
3. 勝算比分析
----------------------------------------------
勝算比 (OR): {results['odds_ratio']:.3f}
95% 信賴區間: [{results['ci_low']:.3f}, {results['ci_high']:.3f}]
效果大小: {results['interpretation']['effect_size']}
----------------------------------------------
4. 不一致配對
----------------------------------------------
勝方{results['label_pos'].split()[1]}且敗方{results['label_neg'].split()[1]} (b): {results['discordant_b']}
勝方{results['label_neg'].split()[1]}且敗方{results['label_pos'].split()[1]} (c): {results['discordant_c']}
總不一致配對數: {results['discordant_n']}
----------------------------------------------
5. 解釋
----------------------------------------------
{'結果顯示勝方和敗方在此特徵上有顯著差異。' if results['interpretation']['is_significant'] else '結果顯示勝方和敗方在此特徵上無顯著差異。'}
勝算比為 {results['odds_ratio']:.3f},表示{
'勝方在此特徵上較高的機率是敗方的 ' + str(round(results['odds_ratio'], 2)) + ' 倍。'
if results['odds_ratio'] > 1
else '敗方在此特徵上較高的機率是勝方的 ' + str(round(1/results['odds_ratio'], 2)) + ' 倍。'
}
==============================================
"""
return report