Spaces:

Wen1201
/

bayesian-network

Sleeping

App Files Files Community

Wen1201 commited on Oct 31, 2025

Commit

ffa2193

verified ·

1 Parent(s): 0e299c0

Update app.py

Browse files

Files changed (1) hide show

app.py +349 -152

app.py CHANGED Viewed

@@ -17,6 +17,73 @@ st.set_page_config(
     initial_sidebar_state="expanded"
 )
 # 導入自定義模組
 from bn_core import BayesianNetworkAnalyzer
 from llm_assistant import LLMAssistant
@@ -107,8 +174,8 @@ with tab1:
                 df = None
         if df is not None:
-            # 特徵選擇
-            st.subheader("🎯 Feature Selection")
             # 自動識別特徵類型
             numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
@@ -120,27 +187,43 @@ with tab1:
             col_feat1, col_feat2 = st.columns(2)
             with col_feat1:
-                st.markdown("**Categorical Features**")
-                cat_features = st.multiselect(
-                    "Select categorical features:",
-                    options=categorical_cols,
-                    default=categorical_cols[:5] if len(categorical_cols) > 0 else []
-                )
             with col_feat2:
-                st.markdown("**Continuous Features**")
-                con_features = st.multiselect(
-                    "Select continuous features:",
-                    options=numeric_cols,
-                    default=numeric_cols[:3] if len(numeric_cols) > 0 else []
                 )
-            # 目標變數
-            target_variable = st.selectbox(
-                "🎯 Target Variable (Y):",
-                options=binary_cols,
-                help="Must be a binary classification variable"
-            )
             # 驗證選擇
             selected_features = cat_features + con_features
@@ -150,40 +233,57 @@ with tab1:
             st.markdown("---")
-            # 模型參數
-            st.subheader("⚙️ Model Parameters")
-            col_param1, col_param2, col_param3 = st.columns(3)
             with col_param1:
-                test_fraction = st.slider(
-                    "Test Dataset Proportion:",
-                    min_value=0.1,
-                    max_value=0.5,
-                    value=0.25,
-                    step=0.05
-                )
-                algorithm = st.selectbox(
                     "Network Structure:",
                     options=['NB', 'TAN', 'CL', 'HC', 'PC'],
                     format_func=lambda x: {
-                        'NB': 'Naive Bayes',
-                        'TAN': 'Tree-Augmented Naive Bayes',
                         'CL': 'Chow-Liu',
                         'HC': 'Hill Climbing',
-                        'PC': 'PC Algorithm'
-                    }[x]
                 )
             with col_param2:
-                estimator = st.selectbox(
                     "Parameter Estimator:",
                     options=['ml', 'bn'],
                     format_func=lambda x: {
-                        'ml': 'Maximum Likelihood',
-                        'bn': 'Bayesian Estimator'
-                    }[x]
                 )
                 if estimator == 'bn':
@@ -191,44 +291,60 @@ with tab1:
                         "Equivalent Sample Size:",
                         min_value=1,
                         value=3,
-                        step=1
                     )
                 else:
                     equivalent_sample_size = 3
-                # 條件性參數
-                if algorithm == 'HC':
-                    score_method = st.selectbox(
-                        "Scoring Method:",
-                        options=['BIC', 'AIC', 'K2', 'BDeu', 'BDs']
                     )
                 else:
-                    score_method = 'BIC'
-            with col_param3:
-                if algorithm == 'PC':
-                    sig_level = st.number_input(
-                        "Significance Level:",
-                        min_value=0.01,
-                        max_value=1.0,
-                        value=0.05,
-                        step=0.01
-                    )
-                else:
-                    sig_level = 0.05
-                n_bins = st.number_input(
-                    "Number of Bins (for continuous):",
                     min_value=3,
                     max_value=20,
                     value=10,
-                    step=1
                 )
             # 執行分析按鈕
             st.markdown("---")
-            col_btn1, col_btn2, col_btn3 = st.columns([2, 1, 1])
             with col_btn1:
                 run_button = st.button("🚀 Run Analysis", type="primary", use_container_width=True)
@@ -240,18 +356,18 @@ with tab1:
                     st.session_state.chat_history = []
                     st.rerun()
-            with col_btn3:
-                with st.popover("ℹ️ Info"):
-                    st.markdown("""
-                    **Analysis Steps:**
-                    1. Split data (train/test)
-                    2. Learn network structure
-                    3. Process features (bins from train)
-                    4. Estimate parameters
-                    5. Evaluate performance
-                    **Note:** Test set bins are derived from training set to prevent data leakage.
-                    """)
             if run_button:
                 # 驗證
@@ -338,90 +454,171 @@ with tab1:
         results = st.session_state.analysis_results
-        # 網路結構
-        st.subheader("🕸️ Bayesian Network Structure")
-        network_fig = generate_network_graph(results['model'])
-        st.plotly_chart(network_fig, use_container_width=True)
-        # 效能指標
-        st.subheader("📈 Performance Metrics")
-        col_m1, col_m2 = st.columns(2)
-        with col_m1:
-            st.markdown("**Training Set**")
-            train_metrics = results['train_metrics']
-            metric_cols = st.columns(4)
-            metric_cols[0].metric("Accuracy", f"{train_metrics['accuracy']:.2f}%")
-            metric_cols[1].metric("Precision", f"{train_metrics['precision']:.2f}%")
-            metric_cols[2].metric("Recall", f"{train_metrics['recall']:.2f}%")
-            metric_cols[3].metric("F1-Score", f"{train_metrics['f1']:.2f}%")
-            # 混淆矩陣
-            conf_fig_train = plot_confusion_matrix(
-                train_metrics['confusion_matrix'],
-                title="Training Set Confusion Matrix"
-            )
-            st.plotly_chart(conf_fig_train, use_container_width=True)
-            # ROC Curve
-            roc_fig_train = plot_roc_curve(
-                train_metrics['fpr'],
-                train_metrics['tpr'],
-                train_metrics['auc'],
-                title="Training Set ROC Curve"
-            )
-            st.plotly_chart(roc_fig_train, use_container_width=True)
-        with col_m2:
-            st.markdown("**Test Set**")
-            test_metrics = results['test_metrics']
-            metric_cols = st.columns(4)
-            metric_cols[0].metric("Accuracy", f"{test_metrics['accuracy']:.2f}%")
-            metric_cols[1].metric("Precision", f"{test_metrics['precision']:.2f}%")
-            metric_cols[2].metric("Recall", f"{test_metrics['recall']:.2f}%")
-            metric_cols[3].metric("F1-Score", f"{test_metrics['f1']:.2f}%")
-            # 混淆矩陣
-            conf_fig_test = plot_confusion_matrix(
-                test_metrics['confusion_matrix'],
-                title="Test Set Confusion Matrix"
             )
-            st.plotly_chart(conf_fig_test, use_container_width=True)
-            # ROC Curve
-            roc_fig_test = plot_roc_curve(
-                test_metrics['fpr'],
-                test_metrics['tpr'],
-                test_metrics['auc'],
-                title="Test Set ROC Curve"
-            )
-            st.plotly_chart(roc_fig_test, use_container_width=True)
-        # 條件機率表
-        st.subheader("📋 Conditional Probability Tables")
-        selected_node = st.selectbox(
-            "Select a node to view its CPD:",
-            options=list(results['cpds'].keys())
-        )
-        if selected_node:
-            cpd_df = create_cpd_table(results['cpds'][selected_node])
-            st.dataframe(cpd_df, use_container_width=True)
-        # 評分指標
-        st.subheader("📊 Model Scores")
-        score_cols = st.columns(5)
-        scores = results['scores']
-        score_cols[0].metric("Log-Likelihood", f"{scores['log_likelihood']:.2f}")
-        score_cols[1].metric("BIC Score", f"{scores['bic']:.2f}")
-        score_cols[2].metric("K2 Score", f"{scores['k2']:.2f}")
-        score_cols[3].metric("BDeu Score", f"{scores['bdeu']:.2f}")
-        score_cols[4].metric("BDs Score", f"{scores['bds']:.2f}")
 # Tab 2: AI 助手
 with tab2:

     initial_sidebar_state="expanded"
 )
+# 自定義 CSS - 讓介面更像 Django
+st.markdown("""
+<style>
+    /* Expander 樣式 - 類似 Django 的摺疊區域 */
+    .streamlit-expanderHeader {
+        background-color: #e8f1f8;
+        border: 1px solid #b0cfe8;
+        border-radius: 5px;
+        font-weight: 600;
+        color: #1b4f72;
+    }
+    .streamlit-expanderHeader:hover {
+        background-color: #d0e7f8;
+    }
+    /* Checkbox 樣式 */
+    .stCheckbox {
+        padding: 2px 0;
+    }
+    /* Radio button 樣式 */
+    .stRadio > label {
+        font-weight: 600;
+        color: #1b4f72;
+    }
+    /* 選擇框樣式 */
+    .stSelectbox > label, .stNumberInput > label {
+        font-weight: 600;
+        color: #1b4f72;
+    }
+    /* 分隔線 */
+    hr {
+        margin: 1rem 0;
+        border-top: 2px solid #b0cfe8;
+    }
+    /* 表單容器 */
+    .element-container {
+        margin-bottom: 0.5rem;
+    }
+    /* 摺疊內容區域 */
+    .streamlit-expanderContent {
+        background-color: #f8fbff;
+        border: 1px solid #d0e4f5;
+        border-top: none;
+        padding: 1rem;
+    }
+    /* 按鈕樣式 */
+    .stButton > button {
+        width: 100%;
+        border-radius: 20px;
+        font-weight: 600;
+        transition: all 0.3s ease;
+    }
+    .stButton > button:hover {
+        transform: translateY(-2px);
+        box-shadow: 0 4px 8px rgba(0,0,0,0.2);
+    }
+</style>
+""", unsafe_allow_html=True)
 # 導入自定義模組
 from bn_core import BayesianNetworkAnalyzer
 from llm_assistant import LLMAssistant
                 df = None
         if df is not None:
+            # 特徵選擇 - 使用 expander (可摺疊)
+            st.subheader("🎯 Input Features")
             # 自動識別特徵類型
             numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
             col_feat1, col_feat2 = st.columns(2)
             with col_feat1:
+                with st.expander("**Continuous**", expanded=False):
+                    st.caption("Select continuous features:")
+                    con_features = []
+                    for col in numeric_cols:
+                        if st.checkbox(col, value=False, key=f"con_{col}"):
+                            con_features.append(col)
             with col_feat2:
+                with st.expander("**Categorical**", expanded=True):
+                    st.caption("Select categorical features:")
+                    cat_features = []
+                    for col in categorical_cols:
+                        # 預設勾選前幾個
+                        default_checked = categorical_cols.index(col) < 5 if len(categorical_cols) > 5 else True
+                        if st.checkbox(col, value=default_checked, key=f"cat_{col}"):
+                            cat_features.append(col)
+            # 目標變數 - 放在特徵選擇下方
+            st.markdown("---")
+            col_target1, col_target2 = st.columns([1, 2])
+            with col_target1:
+                target_variable = st.selectbox(
+                    "Target Variable (Y):",
+                    options=binary_cols,
+                    help="Must be a binary classification variable"
                 )
+            with col_target2:
+                test_fraction = st.number_input(
+                    "Test Dataset Proportion:",
+                    min_value=0.10,
+                    max_value=0.50,
+                    value=0.25,
+                    step=0.05,
+                    format="%.2f"
+                )
             # 驗證選擇
             selected_features = cat_features + con_features
             st.markdown("---")
+            # 模型參數 - 使用更緊湊的佈局
+            st.subheader("⚙️ Model Configuration")
+            col_param1, col_param2 = st.columns(2)
             with col_param1:
+                algorithm = st.radio(
                     "Network Structure:",
                     options=['NB', 'TAN', 'CL', 'HC', 'PC'],
                     format_func=lambda x: {
+                        'NB': 'Naive Bayes (NB)',
+                        'TAN': 'Tree-Augmented Naive Bayes (TAN)',
                         'CL': 'Chow-Liu',
                         'HC': 'Hill Climbing',
+                        'PC': 'PC'
+                    }[x],
+                    help="Select structure learning algorithm"
                 )
+                # 條件性參數 - HC
+                if algorithm == 'HC':
+                    score_method = st.selectbox(
+                        "Scoring Method:",
+                        options=['BIC', 'AIC', 'K2', 'BDeu', 'BDs'],
+                        help="Select scoring method for Hill Climbing"
+                    )
+                else:
+                    score_method = 'BIC'
+                # 條件性參數 - PC
+                if algorithm == 'PC':
+                    sig_level = st.number_input(
+                        "Significance Level:",
+                        min_value=0.01,
+                        max_value=1.0,
+                        value=0.05,
+                        step=0.01,
+                        help="Significance level for PC algorithm"
+                    )
+                else:
+                    sig_level = 0.05
             with col_param2:
+                estimator = st.radio(
                     "Parameter Estimator:",
                     options=['ml', 'bn'],
                     format_func=lambda x: {
+                        'ml': 'MaximumLikelihoodEstimator',
+                        'bn': 'BayesianEstimator'
+                    }[x],
+                    help="Select parameter estimation method"
                 )
                 if estimator == 'bn':
                         "Equivalent Sample Size:",
                         min_value=1,
                         value=3,
+                        step=1,
+                        help="Prior strength for Bayesian estimation"
                     )
                 else:
                     equivalent_sample_size = 3
+                # Decision (如果是預設資料集才顯示)
+                if data_source == "Use Default Dataset":
+                    decision = st.selectbox(
+                        "Decision:",
+                        options=['OverAll', 'Exposed', 'Unexposed'],
+                        index=0,
+                        help="Analysis subset selection"
                     )
                 else:
+                    decision = 'OverAll'
+            # Provide Evidence - 可摺疊區域
+            st.markdown("---")
+            with st.expander("**Provide Evidence**", expanded=False):
+                st.caption("Enter evidence values for inference (optional):")
+                evidence_cols = st.columns(2)
+                evidence_dict = {}
+                # 為每個非目標變數創建輸入框
+                all_vars = [v for v in selected_features if v != target_variable]
+                for idx, var in enumerate(all_vars):
+                    with evidence_cols[idx % 2]:
+                        val = st.text_input(
+                            f"{var}:",
+                            value="",
+                            key=f"evidence_{var}",
+                            help=f"Enter value for {var} (leave empty to ignore)"
+                        )
+                        if val.strip():
+                            evidence_dict[var] = val.strip()
+            # 進階參數 - 摺疊區域
+            with st.expander("**Advanced Parameters**", expanded=False):
+                n_bins = st.slider(
+                    "Number of Bins (for continuous variables):",
                     min_value=3,
                     max_value=20,
                     value=10,
+                    step=1,
+                    help="Number of bins for discretizing continuous features"
                 )
             # 執行分析按鈕
             st.markdown("---")
+            col_btn1, col_btn2 = st.columns([3, 1])
             with col_btn1:
                 run_button = st.button("🚀 Run Analysis", type="primary", use_container_width=True)
                     st.session_state.chat_history = []
                     st.rerun()
+            # 說明資訊
+            with st.expander("ℹ️ Analysis Information", expanded=False):
+                st.markdown("""
+                **Analysis Steps:**
+                1. Split data (train/test)
+                2. Learn network structure
+                3. Process features (bins from train)
+                4. Estimate parameters
+                5. Evaluate performance
+                **Note:** Test set bins are derived from training set to prevent data leakage.
+                """)
             if run_button:
                 # 驗證
         results = st.session_state.analysis_results
+        # 使用 tabs 來組織結果
+        result_tabs = st.tabs([
+            "🕸️ Network Structure",
+            "📈 Performance Metrics",
+            "📋 CPD Tables",
+            "📊 Model Scores"
+        ])
+        # Tab 1: 網路結構
+        with result_tabs[0]:
+            network_fig = generate_network_graph(results['model'])
+            st.plotly_chart(network_fig, use_container_width=True)
+            # 顯示邊的列表
+            with st.expander("View Network Edges", expanded=False):
+                edges = list(results['model'].edges())
+                st.write(f"Total edges: {len(edges)}")
+                # 每行顯示 3 個邊
+                for i in range(0, len(edges), 3):
+                    cols = st.columns(3)
+                    for j, col in enumerate(cols):
+                        if i + j < len(edges):
+                            edge = edges[i + j]
+                            col.markdown(f"**{edge[0]}** → {edge[1]}")
+        # Tab 2: 效能指標
+        with result_tabs[1]:
+            col_m1, col_m2 = st.columns(2)
+            with col_m1:
+                st.markdown("### Training Set")
+                train_metrics = results['train_metrics']
+                # 使用 metrics 卡片
+                metric_cols = st.columns(4)
+                metric_cols[0].metric("Accuracy", f"{train_metrics['accuracy']:.2f}%")
+                metric_cols[1].metric("Precision", f"{train_metrics['precision']:.2f}%")
+                metric_cols[2].metric("Recall", f"{train_metrics['recall']:.2f}%")
+                metric_cols[3].metric("F1-Score", f"{train_metrics['f1']:.2f}%")
+                metric_cols2 = st.columns(4)
+                metric_cols2[0].metric("AUC", f"{train_metrics['auc']:.4f}")
+                metric_cols2[1].metric("G-mean", f"{train_metrics['g_mean']:.2f}%")
+                metric_cols2[2].metric("P-mean", f"{train_metrics['p_mean']:.2f}%")
+                metric_cols2[3].metric("Specificity", f"{train_metrics['specificity']:.2f}%")
+                # 混淆矩陣
+                with st.expander("Confusion Matrix", expanded=True):
+                    conf_fig_train = plot_confusion_matrix(
+                        train_metrics['confusion_matrix'],
+                        title="Training Set"
+                    )
+                    st.plotly_chart(conf_fig_train, use_container_width=True)
+                # ROC Curve
+                with st.expander("ROC Curve", expanded=False):
+                    roc_fig_train = plot_roc_curve(
+                        train_metrics['fpr'],
+                        train_metrics['tpr'],
+                        train_metrics['auc'],
+                        title="Training Set"
+                    )
+                    st.plotly_chart(roc_fig_train, use_container_width=True)
+            with col_m2:
+                st.markdown("### Test Set")
+                test_metrics = results['test_metrics']
+                metric_cols = st.columns(4)
+                metric_cols[0].metric("Accuracy", f"{test_metrics['accuracy']:.2f}%")
+                metric_cols[1].metric("Precision", f"{test_metrics['precision']:.2f}%")
+                metric_cols[2].metric("Recall", f"{test_metrics['recall']:.2f}%")
+                metric_cols[3].metric("F1-Score", f"{test_metrics['f1']:.2f}%")
+                metric_cols2 = st.columns(4)
+                metric_cols2[0].metric("AUC", f"{test_metrics['auc']:.4f}")
+                metric_cols2[1].metric("G-mean", f"{test_metrics['g_mean']:.2f}%")
+                metric_cols2[2].metric("P-mean", f"{test_metrics['p_mean']:.2f}%")
+                metric_cols2[3].metric("Specificity", f"{test_metrics['specificity']:.2f}%")
+                # 混淆矩陣
+                with st.expander("Confusion Matrix", expanded=True):
+                    conf_fig_test = plot_confusion_matrix(
+                        test_metrics['confusion_matrix'],
+                        title="Test Set"
+                    )
+                    st.plotly_chart(conf_fig_test, use_container_width=True)
+                # ROC Curve
+                with st.expander("ROC Curve", expanded=False):
+                    roc_fig_test = plot_roc_curve(
+                        test_metrics['fpr'],
+                        test_metrics['tpr'],
+                        test_metrics['auc'],
+                        title="Test Set"
+                    )
+                    st.plotly_chart(roc_fig_test, use_container_width=True)
+        # Tab 3: 條件機率表
+        with result_tabs[2]:
+            selected_node = st.selectbox(
+                "Select a node to view its CPD:",
+                options=list(results['cpds'].keys())
             )
+            if selected_node:
+                cpd_df = create_cpd_table(results['cpds'][selected_node])
+                st.dataframe(cpd_df, use_container_width=True)
+                # 下載按鈕
+                csv = cpd_df.to_csv()
+                st.download_button(
+                    label="📥 Download CPD as CSV",
+                    data=csv,
+                    file_name=f"cpd_{selected_node}.csv",
+                    mime="text/csv"
+                )
+        # Tab 4: 模型評分
+        with result_tabs[3]:
+            scores = results['scores']
+            score_cols = st.columns(5)
+            score_cols[0].metric("Log-Likelihood", f"{scores['log_likelihood']:.2f}")
+            score_cols[1].metric("BIC Score", f"{scores['bic']:.2f}")
+            score_cols[2].metric("K2 Score", f"{scores['k2']:.2f}")
+            score_cols[3].metric("BDeu Score", f"{scores['bdeu']:.2f}")
+            score_cols[4].metric("BDs Score", f"{scores['bds']:.2f}")
+            # 參數摘要
+            with st.expander("Analysis Parameters", expanded=True):
+                params = results['parameters']
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    st.markdown("**Algorithm Settings**")
+                    st.write(f"- Algorithm: {params['algorithm']}")
+                    st.write(f"- Estimator: {params['estimator']}")
+                    st.write(f"- Test Fraction: {params['test_fraction']:.2%}")
+                with col2:
+                    st.markdown("**Feature Information**")
+                    st.write(f"- Total Features: {params['n_features']}")
+                    st.write(f"- Categorical: {len(params['cat_features'])}")
+                    st.write(f"- Continuous: {len(params['con_features'])}")
+                    st.write(f"- Target: {params['target_variable']}")
+                with col3:
+                    st.markdown("**Other Parameters**")
+                    st.write(f"- Bins: {params['n_bins']}")
+                    st.write(f"- Score Method: {params['score_method']}")
+                    st.write(f"- Significance Level: {params['sig_level']}")
+                    st.write(f"- Equivalent Sample Size: {params['equivalent_sample_size']}")
+            # 匯出結果
+            with st.expander("Export Results", expanded=False):
+                result_json = export_results_to_json(results)
+                st.download_button(
+                    label="📥 Download Full Results (JSON)",
+                    data=result_json,
+                    file_name=f"bn_analysis_{results['timestamp'][:10]}.json",
+                    mime="application/json"
+                )
 # Tab 2: AI 助手
 with tab2: