Spaces:

Wen1201
/

mcnemar

Sleeping

App Files Files Community

mcnemar / mcnemar_utils.py

Wen1201

Upload mcnemar_utils.py

d09e48e verified 4 months ago

raw

history blame contribute delete

10.7 kB

	import plotly.graph_objects as go
	import plotly.express as px
	import pandas as pd
	import numpy as np

	def plot_contingency_table_heatmap(ct_labeled, feature_label, title="列聯表熱力圖"):
	"""
	繪製列聯表熱力圖

	Args:
	ct_labeled: 帶標籤的列聯表 DataFrame
	feature_label: 特徵標籤
	title: 圖表標題

	Returns:
	plotly figure
	"""
	# 移除總數列和行
	ct_display = ct_labeled.iloc[:-1, :-1].copy()

	# 創建註解文字
	annotations = []
	for i, row in enumerate(ct_display.index):
	for j, col in enumerate(ct_display.columns):
	annotations.append(
	dict(
	x=j,
	y=i,
	text=str(ct_display.iloc[i, j]),
	font=dict(size=16, color='white' if ct_display.iloc[i, j] > ct_display.values.max()/2 else 'black'),
	showarrow=False
	)
	)

	fig = go.Figure(data=go.Heatmap(
	z=ct_display.values,
	x=ct_display.columns,
	y=ct_display.index,
	colorscale='Blues',
	showscale=True,
	hoverongaps=False,
	hovertemplate='%{y}<br>%{x}<br>配對數: %{z}<extra></extra>'
	))

	fig.update_layout(
	title=f'{title}<br><sub>{feature_label}</sub>',
	xaxis_title='敗方 (Loser)',
	yaxis_title='勝方 (Winner)',
	width=600,
	height=500,
	template='plotly_white',
	annotations=annotations
	)

	return fig

	def plot_odds_ratio_forest(or_value, ci_low, ci_high, feature_label):
	"""
	繪製勝算比森林圖（線性刻度版本）

	Args:
	or_value: 勝算比
	ci_low: 95% 信賴區間下界
	ci_high: 95% 信賴區間上界
	feature_label: 特徵標籤

	Returns:
	plotly figure
	"""
	fig = go.Figure()

	# 動態計算 x 軸範圍
	x_max = max(ci_high * 1.2, 3.5) # 至少到 3.5
	x_min = 0

	# 參考線 (OR = 1)
	fig.add_shape(
	type="line",
	x0=1, x1=1,
	y0=-0.3, y1=0.3,
	line=dict(color="red", width=2, dash="dash"),
	)

	# 信賴區間（水平線）
	fig.add_trace(go.Scatter(
	x=[ci_low, ci_high],
	y=[0, 0],
	mode='lines',
	line=dict(color='#2d6ca2', width=2),
	showlegend=False,
	hoverinfo='skip'
	))

	# 信賴區間端點（小豎線）
	for x_pos in [ci_low, ci_high]:
	fig.add_shape(
	type="line",
	x0=x_pos, x1=x_pos,
	y0=-0.05, y1=0.05,
	line=dict(color='#2d6ca2', width=2)
	)

	# 點估計（藍色圓點）
	fig.add_trace(go.Scatter(
	x=[or_value],
	y=[0],
	mode='markers',
	marker=dict(
	size=12,
	color='#1f77b4',
	line=dict(color='white', width=1.5)
	),
	showlegend=False,
	hovertemplate=f'<b>OR: {or_value:.3f}</b><br>95% CI: [{ci_low:.3f}, {ci_high:.3f}]<extra></extra>'
	))

	# 添加數值標註框（右上角）
	fig.add_annotation(
	x=ci_high + (x_max - ci_high) * 0.3,
	y=0,
	text=f"OR = {or_value:.3f}<br>95% CI [{ci_low:.3f}, {ci_high:.3f}]",
	showarrow=True,
	arrowhead=2,
	arrowsize=1,
	arrowwidth=1.5,
	arrowcolor='#2d6ca2',
	ax=50,
	ay=0,
	font=dict(size=11, color='#1b4f72'),
	bgcolor='rgba(255,255,255,0.95)',
	bordercolor='#2d6ca2',
	borderwidth=1.5,
	borderpad=6
	)

	fig.update_layout(
	title=dict(
	text=f'勝算比 (Odds Ratio)<br><sub style="font-size:11px;">{feature_label}</sub>',
	x=0.5,
	xanchor='center'
	),
	xaxis_title='Odds Ratio',
	yaxis=dict(
	showticklabels=False,
	showgrid=False,
	zeroline=False,
	range=[-0.35, 0.35]
	),
	width=800,
	height=280,
	template='plotly_white',
	xaxis=dict(
	type='linear', # 改用線性刻度
	showgrid=True,
	gridcolor='rgba(200,200,200,0.3)',
	tickmode='linear',
	tick0=0,
	dtick=0.5, # 每 0.5 一個刻度
	range=[x_min, x_max],
	showline=True,
	linewidth=1,
	linecolor='black',
	mirror=True
	),
	hovermode='closest',
	plot_bgcolor='white',
	margin=dict(l=60, r=60, t=80, b=60)
	)

	return fig


	def plot_discordant_pairs(b, c, label_pos, label_neg):
	"""
	繪製不一致配對比較圖

	Args:
	b: cs=1 & cn=0 的配對數
	c: cs=0 & cn=1 的配對數
	label_pos: 正向標籤
	label_neg: 負向標籤

	Returns:
	plotly figure
	"""
	fig = go.Figure()

	categories = [
	f'勝方 {label_pos.split()[1]}<br>敗方 {label_neg.split()[1]}',
	f'勝方 {label_neg.split()[1]}<br>敗方 {label_pos.split()[1]}'
	]
	values = [b, c]
	colors = ['#2d6ca2', '#d62728']

	fig.add_trace(go.Bar(
	x=categories,
	y=values,
	marker=dict(
	color=colors,
	line=dict(color='white', width=2)
	),
	text=values,
	textposition='outside',
	textfont=dict(size=16, color='black'),
	hovertemplate='%{x}<br>配對數: %{y}<extra></extra>'
	))

	fig.update_layout(
	title='不一致配對分布',
	xaxis_title='配對類型',
	yaxis_title='配對數量',
	width=600,
	height=400,
	template='plotly_white',
	showlegend=False
	)

	return fig

	def plot_p_value_significance(p_value):
	"""
	繪製 p 值顯著性指示圖

	Args:
	p_value: p 值

	Returns:
	plotly figure
	"""
	fig = go.Figure()

	# 設定顯著性閾值
	thresholds = [0.001, 0.01, 0.05, 1.0]
	labels = ['p < 0.001<br>(極顯著)', 'p < 0.01<br>(非常顯著)',
	'p < 0.05<br>(顯著)', 'p ≥ 0.05<br>(不顯著)']
	colors = ['#1a5f1a', '#2d8b2d', '#5cb85c', '#d9534f']

	# 找出 p 值所在區間
	current_idx = 0
	for i, thresh in enumerate(thresholds):
	if p_value < thresh:
	current_idx = i
	break

	# 繪製區間條
	for i in range(len(thresholds)):
	opacity = 1.0 if i == current_idx else 0.3
	fig.add_trace(go.Bar(
	x=[labels[i]],
	y=[1],
	marker=dict(color=colors[i], opacity=opacity),
	showlegend=False,
	hovertemplate=f'{labels[i]}<extra></extra>'
	))

	# 添加 p 值標註
	fig.add_annotation(
	x=labels[current_idx],
	y=1.1,
	text=f"<b>p = {p_value:.4f}</b>",
	showarrow=True,
	arrowhead=2,
	arrowsize=1,
	arrowwidth=2,
	arrowcolor='black',
	font=dict(size=14, color='black'),
	bgcolor='yellow',
	bordercolor='black',
	borderwidth=2,
	borderpad=4
	)

	fig.update_layout(
	title='顯著性水準',
	xaxis_title='',
	yaxis_title='',
	yaxis=dict(showticklabels=False, showgrid=False),
	width=700,
	height=300,
	template='plotly_white',
	showlegend=False
	)

	return fig

	def create_results_summary_table(results):
	"""
	創建結果摘要表格

	Args:
	results: 分析結果字典

	Returns:
	pandas DataFrame
	"""
	summary_data = {
	'項目': [
	'特徵',
	'McNemar 統計量',
	'p 值',
	'顯著性',
	'勝算比 (OR)',
	'95% 信賴區間',
	'不一致配對數',
	'效果大小'
	],
	'數值': [
	results['feature_label'],
	f"{results['mcnemar_statistic']:.4f}",
	f"{results['p_value']:.4f}",
	results['interpretation']['significance'],
	f"{results['odds_ratio']:.3f}",
	f"[{results['ci_low']:.3f}, {results['ci_high']:.3f}]",
	f"{results['discordant_n']} (b={results['discordant_b']}, c={results['discordant_c']})",
	results['interpretation']['effect_size']
	]
	}

	return pd.DataFrame(summary_data)

	def export_results_to_text(results):
	"""
	匯出結果為純文字格式

	Args:
	results: 分析結果字典

	Returns:
	str: 格式化的文字報告
	"""
	report = f"""
	==============================================
	McNemar 檢定分析報告
	==============================================

	分析特徵: {results['feature_label']} ({results['feature_name']})
	分析時間: {results['timestamp']}

	----------------------------------------------
	1. 列聯表
	----------------------------------------------
	{results['contingency_table_labeled'].to_string()}

	----------------------------------------------
	2. McNemar 檢定結果
	----------------------------------------------
	McNemar 統計量: {results['mcnemar_statistic']:.4f}
	p 值: {results['p_value']:.4f}
	顯著性: {results['interpretation']['significance']}

	----------------------------------------------
	3. 勝算比分析
	----------------------------------------------
	勝算比 (OR): {results['odds_ratio']:.3f}
	95% 信賴區間: [{results['ci_low']:.3f}, {results['ci_high']:.3f}]
	效果大小: {results['interpretation']['effect_size']}

	----------------------------------------------
	4. 不一致配對
	----------------------------------------------
	勝方{results['label_pos'].split()[1]}且敗方{results['label_neg'].split()[1]} (b): {results['discordant_b']}
	勝方{results['label_neg'].split()[1]}且敗方{results['label_pos'].split()[1]} (c): {results['discordant_c']}
	總不一致配對數: {results['discordant_n']}

	----------------------------------------------
	5. 解釋
	----------------------------------------------
	{'結果顯示勝方和敗方在此特徵上有顯著差異。' if results['interpretation']['is_significant'] else '結果顯示勝方和敗方在此特徵上無顯著差異。'}
	勝算比為 {results['odds_ratio']:.3f}，表示{
	'勝方在此特徵上較高的機率是敗方的 ' + str(round(results['odds_ratio'], 2)) + ' 倍。'
	if results['odds_ratio'] > 1
	else '敗方在此特徵上較高的機率是勝方的 ' + str(round(1/results['odds_ratio'], 2)) + ' 倍。'
	}

	==============================================
	"""
	return report