import plotly.graph_objects as go import plotly.express as px import pandas as pd import numpy as np import networkx as nx from plotly.subplots import make_subplots from graphviz import Digraph import base64 def plot_roc_curve(fpr, tpr, auc, title="ROC Curve"): """ 繪製 ROC 曲線 Args: fpr: False positive rate tpr: True positive rate auc: Area under curve title: 圖表標題 Returns: plotly figure """ fig = go.Figure() # ROC 曲線 fig.add_trace(go.Scatter( x=fpr, y=tpr, mode='lines', name=f'ROC Curve (AUC = {auc:.4f})', line=dict(color='#2d6ca2', width=2) )) # 對角線(隨機分類器) fig.add_trace(go.Scatter( x=[0, 1], y=[0, 1], mode='lines', name='Random Classifier', line=dict(color='gray', width=1, dash='dash') )) fig.update_layout( title=title, xaxis_title='False Positive Rate', yaxis_title='True Positive Rate', width=600, height=500, template='plotly_white', legend=dict(x=0.6, y=0.1) ) return fig def plot_confusion_matrix(cm, title="Confusion Matrix"): """ 繪製混淆矩陣 Args: cm: 混淆矩陣 (2x2 list) title: 圖表標題 Returns: plotly figure """ # 轉換為 numpy array cm_array = np.array(cm) # 計算百分比 cm_percent = cm_array / cm_array.sum() * 100 # 創建標籤 labels = [ [f'{cm_array[i][j]}
({cm_percent[i][j]:.1f}%)' for j in range(2)] for i in range(2) ] fig = go.Figure(data=go.Heatmap( z=cm_array, x=['Predicted: 0', 'Predicted: 1'], y=['Actual: 0', 'Actual: 1'], text=labels, texttemplate='%{text}', textfont={"size": 14}, colorscale='Blues', showscale=True )) fig.update_layout( title=title, width=500, height=450, template='plotly_white' ) return fig def plot_probability_distribution(probs, title="Probability Distribution"): """ 繪製機率分佈圖 Args: probs: 預測機率列表 title: 圖表標題 Returns: plotly figure """ fig = go.Figure() fig.add_trace(go.Histogram( x=probs, nbinsx=20, name='Predicted Probabilities', marker=dict( color='#2d6ca2', line=dict(color='white', width=1) ) )) fig.update_layout( title=title, xaxis_title='Predicted Probability for Class 1', yaxis_title='Frequency', width=700, height=400, template='plotly_white', showlegend=False ) fig.update_xaxes(range=[0, 1]) return fig def generate_network_graph(model): # Pi """ Generate a Graphviz tree from a BayesianNetwork model and return it as a Base64-encoded string. Args: model: BayesianNetwork 模型 Returns: Base64-encoded PNG string """ dot = Digraph(format='png', engine='dot') dot.attr('node', style='filled', color='lightblue', shape='ellipse') dot.attr(dpi='300') # Add nodes and edges from the BayesianNetwork model for node in model.nodes(): dot.node(node) for edge in model.edges(): dot.edge(edge[1], edge[0]) # Render directly to binary and encode in Base64 png_data = dot.pipe(format='png') tree_base64 = base64.b64encode(png_data).decode('utf-8') return tree_base64 def create_cpd_table(cpd): """ 創建條件機率表的 DataFrame Args: cpd: CPD 物件 Returns: pandas DataFrame """ if cpd is None: return pd.DataFrame() # 獲取變數資訊 variable = cpd.variable evidence_vars = cpd.variables[1:] if len(cpd.variables) > 1 else [] # 如果是根節點(沒有父節點) if not evidence_vars: values = np.round(cpd.values.flatten(), 4) df = pd.DataFrame( {variable: values}, index=[f"{variable}({i})" for i in range(len(values))] ) return df # 有父節點的情況 evidence_card = cpd.cardinality[1:] # 生成多層索引欄位 from itertools import product column_values = list(product(*[range(card) for card in evidence_card])) # 創建欄位名稱 columns = pd.MultiIndex.from_tuples( [tuple(f"{var}({val})" for var, val in zip(evidence_vars, vals)) for vals in column_values], names=evidence_vars ) # 重塑 CPD 值 reshaped_values = cpd.values.reshape(len(cpd.values), -1) reshaped_values = np.round(reshaped_values, 4) # 創建 DataFrame df = pd.DataFrame( reshaped_values, index=[f"{variable}({i})" for i in range(len(cpd.values))], columns=columns ) return df def create_metrics_comparison_table(train_metrics, test_metrics): """ 創建訓練集和測試集指標比較表 Args: train_metrics: 訓練集指標字典 test_metrics: 測試集指標字典 Returns: pandas DataFrame """ metrics_data = { 'Metric': [ 'Accuracy', 'Precision', 'Recall', 'F1-Score', 'AUC', 'G-mean', 'P-mean', 'Specificity' ], 'Training Set': [ f"{train_metrics['accuracy']:.2f}%", f"{train_metrics['precision']:.2f}%", f"{train_metrics['recall']:.2f}%", f"{train_metrics['f1']:.2f}%", f"{train_metrics['auc']:.4f}", f"{train_metrics['g_mean']:.2f}%", f"{train_metrics['p_mean']:.2f}%", f"{train_metrics['specificity']:.2f}%" ], 'Test Set': [ f"{test_metrics['accuracy']:.2f}%", f"{test_metrics['precision']:.2f}%", f"{test_metrics['recall']:.2f}%", f"{test_metrics['f1']:.2f}%", f"{test_metrics['auc']:.4f}", f"{test_metrics['g_mean']:.2f}%", f"{test_metrics['p_mean']:.2f}%", f"{test_metrics['specificity']:.2f}%" ] } df = pd.DataFrame(metrics_data) return df def export_results_to_json(results, filename="analysis_results.json"): """ 將結果匯出為 JSON 格式 Args: results: 分析結果字典 filename: 檔案名稱 Returns: JSON 字串 """ import json # 移除無法序列化的物件 exportable_results = { 'parameters': results['parameters'], 'train_metrics': { k: v for k, v in results['train_metrics'].items() if k not in ['fpr', 'tpr', 'predicted_probs'] }, 'test_metrics': { k: v for k, v in results['test_metrics'].items() if k not in ['fpr', 'tpr', 'predicted_probs'] }, 'scores': results['scores'], 'network_edges': list(results['model'].edges()), 'timestamp': results['timestamp'] } return json.dumps(exportable_results, indent=2) def calculate_performance_gap(train_metrics, test_metrics): """ 計算訓練集和測試集之間的效能差距 Args: train_metrics: 訓練集指標 test_metrics: 測試集指標 Returns: dict: 效能差距字典 """ gaps = { 'accuracy_gap': train_metrics['accuracy'] - test_metrics['accuracy'], 'precision_gap': train_metrics['precision'] - test_metrics['precision'], 'recall_gap': train_metrics['recall'] - test_metrics['recall'], 'f1_gap': train_metrics['f1'] - test_metrics['f1'], 'auc_gap': train_metrics['auc'] - test_metrics['auc'] } # 判斷是否有過擬合 avg_gap = np.mean([abs(v) for v in gaps.values()]) overfitting_status = "High" if avg_gap > 10 else "Moderate" if avg_gap > 5 else "Low" gaps['average_gap'] = avg_gap gaps['overfitting_risk'] = overfitting_status return gaps