Spaces:

jeff7522553
/

CIP_Project

Sleeping

App Files Files Community

jeff7522553 commited on Nov 17, 2025

Commit

5b16257

1 Parent(s): e970db4

更新資料以及加入多準確率

Browse files

Files changed (2) hide show

app.py +48 -13
sampled_data.csv +0 -0

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from sklearn.tree import DecisionTreeClassifier
 from sklearn.svm import SVC
 import xgboost as xgb
 import statsmodels.api as sm
-from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, accuracy_score
 import warnings
 import json
@@ -129,15 +129,41 @@ def train_and_evaluate(history_log, model_name, features, dt_criterion, dt_max_d
         else: importances, title = None, 'Feature Importance'
     # --- 3. 評估與繪圖 ---
-    accuracy = accuracy_score(y_test, y_pred)
-    report_dict = classification_report(y_test, y_pred, target_names=['not purchase insurance (0)', 'purchase insurance (1)'], output_dict=True)
     # 2. 轉成 DataFrame（每個類別一列）
-    df_report = pd.DataFrame(report_dict).T  # T = transpose，讓 index 變成類別名稱
-    df_report.insert(0, "index",  df_report.index)
-    # print(df_report)
-    df_report = processDisplayDataframe(df_report)
-    auc_score = f"ROC-AUC 分數: {roc_auc_score(y_test, y_pred_proba):.4f}"
     cm = confusion_matrix(y_test, y_pred)
     fig_cm, ax_cm = plt.subplots(); sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax_cm, xticklabels=['Predicted 0', 'Predicted 1'], yticklabels=['Actual 0', 'Actual 1']); ax_cm.set_title('Confusion Matrix'); ax_cm.set_xlabel('Predicted Label'); ax_cm.set_ylabel('Actual Label'); plt.tight_layout()
@@ -155,16 +181,20 @@ def train_and_evaluate(history_log, model_name, features, dt_criterion, dt_max_d
         model_name,
         ', '.join(features),
         json.dumps(params),
-        f"{accuracy:.4f}"
     ]
     # 將新紀錄加到歷史紀錄的開頭
     updated_log = [new_log_entry] + history_log
     log_df = pd.DataFrame(updated_log, columns=LOG_COLUMNS)
-    return df_report, auc_score, fig_cm, fig_imp, log_df, updated_log
 # --- Gradio 介面設計 ---
-LOG_COLUMNS = ["時間", "模型", "特徵", "參數", "準確率"]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     # 用於儲存日誌的隱藏狀態元件
@@ -197,8 +227,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             run_btn = gr.Button("🚀 執行模型訓練", variant="primary")
         with gr.Column(scale=2):
             gr.Markdown("## 3. 模型評估結果")
-            model_output_report = gr.DataFrame(label="分類報告")
             model_output_auc = gr.Textbox(label="AUC 分數")
             model_plot_cm = gr.Plot(label="混淆矩陣")
             model_plot_importance = gr.Plot(label="特徵重要性/係數")
@@ -218,7 +253,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     run_btn.click(
         train_and_evaluate,
         inputs=[log_state, model_selector, feature_selector, dt_criterion, dt_max_depth, xgb_n_estimators, xgb_max_depth, xgb_learning_rate, svm_c, svm_kernel],
-        outputs=[model_output_report, model_output_auc, model_plot_cm, model_plot_importance, log_df_display, log_state]
     )
 if __name__ == "__main__":

 from sklearn.svm import SVC
 import xgboost as xgb
 import statsmodels.api as sm
+from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
 import warnings
 import json
         else: importances, title = None, 'Feature Importance'
     # --- 3. 評估與繪圖 ---
+    accuracy_value = accuracy_score(y_test, y_pred)
+    precision_value = precision_score(y_test, y_pred)
+    recall_value = recall_score(y_test, y_pred)
+    f1_score_value = f1_score(y_test, y_pred)
+    roc_auc_value = roc_auc_score(y_test, y_pred_proba)
+    accuracy_text = f"準確率 分數: {accuracy_value:.4f}"
+    precision_text = f"精確率 分數: {precision_value:.4f}"
+    recall_text = f"召回率 分數: {recall_value:.4f}"
+    f1_score_text = f"F1 分數: {f1_score_value:.4f}"
+    roc_auc_text = f"ROC-AUC 分數: {roc_auc_value:.4f}"
+    report_dict = classification_report(y_test, y_pred, target_names=['not purchase insurance (0)', 'purchase insurance (1)'], output_dict=True)
+    classfy_report = pd.DataFrame({
+        'not purchase insurance (0)':report_dict['not purchase insurance (0)'],
+        'purchase insurance (1)':report_dict['purchase insurance (1)'],
+    }, columns=[ 'not purchase insurance (0)', 'purchase insurance (1)']).T
+    classfy_report.insert(0, "index",  classfy_report.index)
+    classfy_report = processDisplayDataframe(classfy_report)
+    avg_report = pd.DataFrame([
+        report_dict["macro avg"],
+        report_dict["weighted avg"],
+    ], index=["macro avg", "weighted avg"])
+    avg_report.insert(0, "index",  avg_report.index)
+    avg_report = processDisplayDataframe(avg_report)
     # 2. 轉成 DataFrame（每個類別一列）
+    # df_report = pd.DataFrame(report_dict).T  # T = transpose，讓 index 變成類別名稱
+    # df_report.insert(0, "index",  df_report.index)
+    # # print(df_report)
+    # df_report = processDisplayDataframe(df_report)
     cm = confusion_matrix(y_test, y_pred)
     fig_cm, ax_cm = plt.subplots(); sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax_cm, xticklabels=['Predicted 0', 'Predicted 1'], yticklabels=['Actual 0', 'Actual 1']); ax_cm.set_title('Confusion Matrix'); ax_cm.set_xlabel('Predicted Label'); ax_cm.set_ylabel('Actual Label'); plt.tight_layout()
         model_name,
         ', '.join(features),
         json.dumps(params),
+        f"{accuracy_value:.4f}",
+        f"{precision_value:.4f}",
+        f"{recall_value:.4f}",
+        f"{f1_score_value:.4f}",
+        f"{roc_auc_value:.4f}",
     ]
     # 將新紀錄加到歷史紀錄的開頭
     updated_log = [new_log_entry] + history_log
     log_df = pd.DataFrame(updated_log, columns=LOG_COLUMNS)
+    return classfy_report, avg_report, accuracy_text, precision_text, recall_text, f1_score_text, roc_auc_text, fig_cm, fig_imp, log_df, updated_log
 # --- Gradio 介面設計 ---
+LOG_COLUMNS = ["時間", "模型", "特徵", "參數", "準確率", "精確率", "召回率", "F1 分數", "ROC-AUC 分數"]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     # 用於儲存日誌的隱藏狀態元件
             run_btn = gr.Button("🚀 執行模型訓練", variant="primary")
         with gr.Column(scale=2):
             gr.Markdown("## 3. 模型評估結果")
+            model_output_accuracy = gr.Textbox(label="準確率 分數")
+            model_output_precision = gr.Textbox(label="精確率 分數")
+            model_output_recall = gr.Textbox(label="召回率 分數")
+            model_output_f1_score = gr.Textbox(label="F1 分數")
             model_output_auc = gr.Textbox(label="AUC 分數")
+            model_output_report = gr.DataFrame(label="分類報告")
+            model_output_report_avg = gr.DataFrame(label="平均報告")
             model_plot_cm = gr.Plot(label="混淆矩陣")
             model_plot_importance = gr.Plot(label="特徵重要性/係數")
     run_btn.click(
         train_and_evaluate,
         inputs=[log_state, model_selector, feature_selector, dt_criterion, dt_max_depth, xgb_n_estimators, xgb_max_depth, xgb_learning_rate, svm_c, svm_kernel],
+        outputs=[model_output_report, model_output_report_avg, model_output_accuracy, model_output_precision, model_output_recall, model_output_f1_score, model_output_auc, model_plot_cm, model_plot_importance, log_df_display, log_state]
     )
 if __name__ == "__main__":

sampled_data.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff