Spaces:
Sleeping
Sleeping
| import plotly.graph_objects as go | |
| import plotly.express as px | |
| import pandas as pd | |
| import numpy as np | |
| import networkx as nx | |
| from plotly.subplots import make_subplots | |
| from graphviz import Digraph | |
| import base64 | |
| def plot_roc_curve(fpr, tpr, auc, title="ROC Curve"): | |
| """ | |
| 繪製 ROC 曲線 | |
| Args: | |
| fpr: False positive rate | |
| tpr: True positive rate | |
| auc: Area under curve | |
| title: 圖表標題 | |
| Returns: | |
| plotly figure | |
| """ | |
| fig = go.Figure() | |
| # ROC 曲線 | |
| fig.add_trace(go.Scatter( | |
| x=fpr, | |
| y=tpr, | |
| mode='lines', | |
| name=f'ROC Curve (AUC = {auc:.4f})', | |
| line=dict(color='#2d6ca2', width=2) | |
| )) | |
| # 對角線(隨機分類器) | |
| fig.add_trace(go.Scatter( | |
| x=[0, 1], | |
| y=[0, 1], | |
| mode='lines', | |
| name='Random Classifier', | |
| line=dict(color='gray', width=1, dash='dash') | |
| )) | |
| fig.update_layout( | |
| title=title, | |
| xaxis_title='False Positive Rate', | |
| yaxis_title='True Positive Rate', | |
| width=600, | |
| height=500, | |
| template='plotly_white', | |
| legend=dict(x=0.6, y=0.1) | |
| ) | |
| return fig | |
| def plot_confusion_matrix(cm, title="Confusion Matrix"): | |
| """ | |
| 繪製混淆矩陣 | |
| Args: | |
| cm: 混淆矩陣 (2x2 list) | |
| title: 圖表標題 | |
| Returns: | |
| plotly figure | |
| """ | |
| # 轉換為 numpy array | |
| cm_array = np.array(cm) | |
| # 計算百分比 | |
| cm_percent = cm_array / cm_array.sum() * 100 | |
| # 創建標籤 | |
| labels = [ | |
| [f'{cm_array[i][j]}<br>({cm_percent[i][j]:.1f}%)' | |
| for j in range(2)] | |
| for i in range(2) | |
| ] | |
| fig = go.Figure(data=go.Heatmap( | |
| z=cm_array, | |
| x=['Predicted: 0', 'Predicted: 1'], | |
| y=['Actual: 0', 'Actual: 1'], | |
| text=labels, | |
| texttemplate='%{text}', | |
| textfont={"size": 14}, | |
| colorscale='Blues', | |
| showscale=True | |
| )) | |
| fig.update_layout( | |
| title=title, | |
| width=500, | |
| height=450, | |
| template='plotly_white' | |
| ) | |
| return fig | |
| def plot_probability_distribution(probs, title="Probability Distribution"): | |
| """ | |
| 繪製機率分佈圖 | |
| Args: | |
| probs: 預測機率列表 | |
| title: 圖表標題 | |
| Returns: | |
| plotly figure | |
| """ | |
| fig = go.Figure() | |
| fig.add_trace(go.Histogram( | |
| x=probs, | |
| nbinsx=20, | |
| name='Predicted Probabilities', | |
| marker=dict( | |
| color='#2d6ca2', | |
| line=dict(color='white', width=1) | |
| ) | |
| )) | |
| fig.update_layout( | |
| title=title, | |
| xaxis_title='Predicted Probability for Class 1', | |
| yaxis_title='Frequency', | |
| width=700, | |
| height=400, | |
| template='plotly_white', | |
| showlegend=False | |
| ) | |
| fig.update_xaxes(range=[0, 1]) | |
| return fig | |
| def generate_network_graph(model): # Pi | |
| """ | |
| Generate a Graphviz tree from a BayesianNetwork model and return it as a Base64-encoded string. | |
| Args: | |
| model: BayesianNetwork 模型 | |
| Returns: | |
| Base64-encoded PNG string | |
| """ | |
| dot = Digraph(format='png', engine='dot') | |
| dot.attr('node', style='filled', color='lightblue', shape='ellipse') | |
| dot.attr(dpi='300') | |
| # Add nodes and edges from the BayesianNetwork model | |
| for node in model.nodes(): | |
| dot.node(node) | |
| for edge in model.edges(): | |
| dot.edge(edge[1], edge[0]) | |
| # Render directly to binary and encode in Base64 | |
| png_data = dot.pipe(format='png') | |
| tree_base64 = base64.b64encode(png_data).decode('utf-8') | |
| return tree_base64 | |
| def create_cpd_table(cpd): | |
| """ | |
| 創建條件機率表的 DataFrame | |
| Args: | |
| cpd: CPD 物件 | |
| Returns: | |
| pandas DataFrame | |
| """ | |
| if cpd is None: | |
| return pd.DataFrame() | |
| # 獲取變數資訊 | |
| variable = cpd.variable | |
| evidence_vars = cpd.variables[1:] if len(cpd.variables) > 1 else [] | |
| # 如果是根節點(沒有父節點) | |
| if not evidence_vars: | |
| values = np.round(cpd.values.flatten(), 4) | |
| df = pd.DataFrame( | |
| {variable: values}, | |
| index=[f"{variable}({i})" for i in range(len(values))] | |
| ) | |
| return df | |
| # 有父節點的情況 | |
| evidence_card = cpd.cardinality[1:] | |
| # 生成多層索引欄位 | |
| from itertools import product | |
| column_values = list(product(*[range(card) for card in evidence_card])) | |
| # 創建欄位名稱 | |
| columns = pd.MultiIndex.from_tuples( | |
| [tuple(f"{var}({val})" for var, val in zip(evidence_vars, vals)) | |
| for vals in column_values], | |
| names=evidence_vars | |
| ) | |
| # 重塑 CPD 值 | |
| reshaped_values = cpd.values.reshape(len(cpd.values), -1) | |
| reshaped_values = np.round(reshaped_values, 4) | |
| # 創建 DataFrame | |
| df = pd.DataFrame( | |
| reshaped_values, | |
| index=[f"{variable}({i})" for i in range(len(cpd.values))], | |
| columns=columns | |
| ) | |
| return df | |
| def create_metrics_comparison_table(train_metrics, test_metrics): | |
| """ | |
| 創建訓練集和測試集指標比較表 | |
| Args: | |
| train_metrics: 訓練集指標字典 | |
| test_metrics: 測試集指標字典 | |
| Returns: | |
| pandas DataFrame | |
| """ | |
| metrics_data = { | |
| 'Metric': [ | |
| 'Accuracy', 'Precision', 'Recall', 'F1-Score', | |
| 'AUC', 'G-mean', 'P-mean', 'Specificity' | |
| ], | |
| 'Training Set': [ | |
| f"{train_metrics['accuracy']:.2f}%", | |
| f"{train_metrics['precision']:.2f}%", | |
| f"{train_metrics['recall']:.2f}%", | |
| f"{train_metrics['f1']:.2f}%", | |
| f"{train_metrics['auc']:.4f}", | |
| f"{train_metrics['g_mean']:.2f}%", | |
| f"{train_metrics['p_mean']:.2f}%", | |
| f"{train_metrics['specificity']:.2f}%" | |
| ], | |
| 'Test Set': [ | |
| f"{test_metrics['accuracy']:.2f}%", | |
| f"{test_metrics['precision']:.2f}%", | |
| f"{test_metrics['recall']:.2f}%", | |
| f"{test_metrics['f1']:.2f}%", | |
| f"{test_metrics['auc']:.4f}", | |
| f"{test_metrics['g_mean']:.2f}%", | |
| f"{test_metrics['p_mean']:.2f}%", | |
| f"{test_metrics['specificity']:.2f}%" | |
| ] | |
| } | |
| df = pd.DataFrame(metrics_data) | |
| return df | |
| def export_results_to_json(results, filename="analysis_results.json"): | |
| """ | |
| 將結果匯出為 JSON 格式 | |
| Args: | |
| results: 分析結果字典 | |
| filename: 檔案名稱 | |
| Returns: | |
| JSON 字串 | |
| """ | |
| import json | |
| # 移除無法序列化的物件 | |
| exportable_results = { | |
| 'parameters': results['parameters'], | |
| 'train_metrics': { | |
| k: v for k, v in results['train_metrics'].items() | |
| if k not in ['fpr', 'tpr', 'predicted_probs'] | |
| }, | |
| 'test_metrics': { | |
| k: v for k, v in results['test_metrics'].items() | |
| if k not in ['fpr', 'tpr', 'predicted_probs'] | |
| }, | |
| 'scores': results['scores'], | |
| 'network_edges': list(results['model'].edges()), | |
| 'timestamp': results['timestamp'] | |
| } | |
| return json.dumps(exportable_results, indent=2) | |
| def calculate_performance_gap(train_metrics, test_metrics): | |
| """ | |
| 計算訓練集和測試集之間的效能差距 | |
| Args: | |
| train_metrics: 訓練集指標 | |
| test_metrics: 測試集指標 | |
| Returns: | |
| dict: 效能差距字典 | |
| """ | |
| gaps = { | |
| 'accuracy_gap': train_metrics['accuracy'] - test_metrics['accuracy'], | |
| 'precision_gap': train_metrics['precision'] - test_metrics['precision'], | |
| 'recall_gap': train_metrics['recall'] - test_metrics['recall'], | |
| 'f1_gap': train_metrics['f1'] - test_metrics['f1'], | |
| 'auc_gap': train_metrics['auc'] - test_metrics['auc'] | |
| } | |
| # 判斷是否有過擬合 | |
| avg_gap = np.mean([abs(v) for v in gaps.values()]) | |
| overfitting_status = "High" if avg_gap > 10 else "Moderate" if avg_gap > 5 else "Low" | |
| gaps['average_gap'] = avg_gap | |
| gaps['overfitting_risk'] = overfitting_status | |
| return gaps | |