Spaces:
Sleeping
Sleeping
jeff7522553
commited on
Commit
·
5b16257
1
Parent(s):
e970db4
更新資料以及加入多準確率
Browse files- app.py +48 -13
- sampled_data.csv +0 -0
app.py
CHANGED
|
@@ -9,7 +9,7 @@ from sklearn.tree import DecisionTreeClassifier
|
|
| 9 |
from sklearn.svm import SVC
|
| 10 |
import xgboost as xgb
|
| 11 |
import statsmodels.api as sm
|
| 12 |
-
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, accuracy_score
|
| 13 |
import warnings
|
| 14 |
import json
|
| 15 |
|
|
@@ -129,15 +129,41 @@ def train_and_evaluate(history_log, model_name, features, dt_criterion, dt_max_d
|
|
| 129 |
else: importances, title = None, 'Feature Importance'
|
| 130 |
|
| 131 |
# --- 3. 評估與繪圖 ---
|
| 132 |
-
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
# 2. 轉成 DataFrame(每個類別一列)
|
| 135 |
-
df_report = pd.DataFrame(report_dict).T # T = transpose,讓 index 變成類別名稱
|
| 136 |
-
df_report.insert(0, "index", df_report.index)
|
| 137 |
-
# print(df_report)
|
| 138 |
-
df_report = processDisplayDataframe(df_report)
|
| 139 |
|
| 140 |
-
|
| 141 |
cm = confusion_matrix(y_test, y_pred)
|
| 142 |
fig_cm, ax_cm = plt.subplots(); sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax_cm, xticklabels=['Predicted 0', 'Predicted 1'], yticklabels=['Actual 0', 'Actual 1']); ax_cm.set_title('Confusion Matrix'); ax_cm.set_xlabel('Predicted Label'); ax_cm.set_ylabel('Actual Label'); plt.tight_layout()
|
| 143 |
|
|
@@ -155,16 +181,20 @@ def train_and_evaluate(history_log, model_name, features, dt_criterion, dt_max_d
|
|
| 155 |
model_name,
|
| 156 |
', '.join(features),
|
| 157 |
json.dumps(params),
|
| 158 |
-
f"{
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
]
|
| 160 |
# 將新紀錄加到歷史紀錄的開頭
|
| 161 |
updated_log = [new_log_entry] + history_log
|
| 162 |
log_df = pd.DataFrame(updated_log, columns=LOG_COLUMNS)
|
| 163 |
|
| 164 |
-
return
|
| 165 |
|
| 166 |
# --- Gradio 介面設計 ---
|
| 167 |
-
LOG_COLUMNS = ["時間", "模型", "特徵", "參數", "準確率"]
|
| 168 |
|
| 169 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 170 |
# 用於儲存日誌的隱藏狀態元件
|
|
@@ -197,8 +227,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 197 |
run_btn = gr.Button("🚀 執行模型訓練", variant="primary")
|
| 198 |
with gr.Column(scale=2):
|
| 199 |
gr.Markdown("## 3. 模型評估結果")
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
| 201 |
model_output_auc = gr.Textbox(label="AUC 分數")
|
|
|
|
|
|
|
| 202 |
model_plot_cm = gr.Plot(label="混淆矩陣")
|
| 203 |
model_plot_importance = gr.Plot(label="特徵重要性/係數")
|
| 204 |
|
|
@@ -218,7 +253,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 218 |
run_btn.click(
|
| 219 |
train_and_evaluate,
|
| 220 |
inputs=[log_state, model_selector, feature_selector, dt_criterion, dt_max_depth, xgb_n_estimators, xgb_max_depth, xgb_learning_rate, svm_c, svm_kernel],
|
| 221 |
-
outputs=[model_output_report, model_output_auc, model_plot_cm, model_plot_importance, log_df_display, log_state]
|
| 222 |
)
|
| 223 |
|
| 224 |
if __name__ == "__main__":
|
|
|
|
| 9 |
from sklearn.svm import SVC
|
| 10 |
import xgboost as xgb
|
| 11 |
import statsmodels.api as sm
|
| 12 |
+
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
|
| 13 |
import warnings
|
| 14 |
import json
|
| 15 |
|
|
|
|
| 129 |
else: importances, title = None, 'Feature Importance'
|
| 130 |
|
| 131 |
# --- 3. 評估與繪圖 ---
|
| 132 |
+
accuracy_value = accuracy_score(y_test, y_pred)
|
| 133 |
+
precision_value = precision_score(y_test, y_pred)
|
| 134 |
+
recall_value = recall_score(y_test, y_pred)
|
| 135 |
+
f1_score_value = f1_score(y_test, y_pred)
|
| 136 |
+
roc_auc_value = roc_auc_score(y_test, y_pred_proba)
|
| 137 |
+
|
| 138 |
+
accuracy_text = f"準確率 分數: {accuracy_value:.4f}"
|
| 139 |
+
precision_text = f"精確率 分數: {precision_value:.4f}"
|
| 140 |
+
recall_text = f"召回率 分數: {recall_value:.4f}"
|
| 141 |
+
f1_score_text = f"F1 分數: {f1_score_value:.4f}"
|
| 142 |
+
roc_auc_text = f"ROC-AUC 分數: {roc_auc_value:.4f}"
|
| 143 |
+
|
| 144 |
+
report_dict = classification_report(y_test, y_pred, target_names=['not purchase insurance (0)', 'purchase insurance (1)'], output_dict=True)
|
| 145 |
+
classfy_report = pd.DataFrame({
|
| 146 |
+
'not purchase insurance (0)':report_dict['not purchase insurance (0)'],
|
| 147 |
+
'purchase insurance (1)':report_dict['purchase insurance (1)'],
|
| 148 |
+
}, columns=[ 'not purchase insurance (0)', 'purchase insurance (1)']).T
|
| 149 |
+
classfy_report.insert(0, "index", classfy_report.index)
|
| 150 |
+
classfy_report = processDisplayDataframe(classfy_report)
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
avg_report = pd.DataFrame([
|
| 154 |
+
report_dict["macro avg"],
|
| 155 |
+
report_dict["weighted avg"],
|
| 156 |
+
], index=["macro avg", "weighted avg"])
|
| 157 |
+
avg_report.insert(0, "index", avg_report.index)
|
| 158 |
+
avg_report = processDisplayDataframe(avg_report)
|
| 159 |
+
|
| 160 |
# 2. 轉成 DataFrame(每個類別一列)
|
| 161 |
+
# df_report = pd.DataFrame(report_dict).T # T = transpose,讓 index 變成類別名稱
|
| 162 |
+
# df_report.insert(0, "index", df_report.index)
|
| 163 |
+
# # print(df_report)
|
| 164 |
+
# df_report = processDisplayDataframe(df_report)
|
| 165 |
|
| 166 |
+
|
| 167 |
cm = confusion_matrix(y_test, y_pred)
|
| 168 |
fig_cm, ax_cm = plt.subplots(); sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax_cm, xticklabels=['Predicted 0', 'Predicted 1'], yticklabels=['Actual 0', 'Actual 1']); ax_cm.set_title('Confusion Matrix'); ax_cm.set_xlabel('Predicted Label'); ax_cm.set_ylabel('Actual Label'); plt.tight_layout()
|
| 169 |
|
|
|
|
| 181 |
model_name,
|
| 182 |
', '.join(features),
|
| 183 |
json.dumps(params),
|
| 184 |
+
f"{accuracy_value:.4f}",
|
| 185 |
+
f"{precision_value:.4f}",
|
| 186 |
+
f"{recall_value:.4f}",
|
| 187 |
+
f"{f1_score_value:.4f}",
|
| 188 |
+
f"{roc_auc_value:.4f}",
|
| 189 |
]
|
| 190 |
# 將新紀錄加到歷史紀錄的開頭
|
| 191 |
updated_log = [new_log_entry] + history_log
|
| 192 |
log_df = pd.DataFrame(updated_log, columns=LOG_COLUMNS)
|
| 193 |
|
| 194 |
+
return classfy_report, avg_report, accuracy_text, precision_text, recall_text, f1_score_text, roc_auc_text, fig_cm, fig_imp, log_df, updated_log
|
| 195 |
|
| 196 |
# --- Gradio 介面設計 ---
|
| 197 |
+
LOG_COLUMNS = ["時間", "模型", "特徵", "參數", "準確率", "精確率", "召回率", "F1 分數", "ROC-AUC 分數"]
|
| 198 |
|
| 199 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 200 |
# 用於儲存日誌的隱藏狀態元件
|
|
|
|
| 227 |
run_btn = gr.Button("🚀 執行模型訓練", variant="primary")
|
| 228 |
with gr.Column(scale=2):
|
| 229 |
gr.Markdown("## 3. 模型評估結果")
|
| 230 |
+
model_output_accuracy = gr.Textbox(label="準確率 分數")
|
| 231 |
+
model_output_precision = gr.Textbox(label="精確率 分數")
|
| 232 |
+
model_output_recall = gr.Textbox(label="召回率 分數")
|
| 233 |
+
model_output_f1_score = gr.Textbox(label="F1 分數")
|
| 234 |
model_output_auc = gr.Textbox(label="AUC 分數")
|
| 235 |
+
model_output_report = gr.DataFrame(label="分類報告")
|
| 236 |
+
model_output_report_avg = gr.DataFrame(label="平均報告")
|
| 237 |
model_plot_cm = gr.Plot(label="混淆矩陣")
|
| 238 |
model_plot_importance = gr.Plot(label="特徵重要性/係數")
|
| 239 |
|
|
|
|
| 253 |
run_btn.click(
|
| 254 |
train_and_evaluate,
|
| 255 |
inputs=[log_state, model_selector, feature_selector, dt_criterion, dt_max_depth, xgb_n_estimators, xgb_max_depth, xgb_learning_rate, svm_c, svm_kernel],
|
| 256 |
+
outputs=[model_output_report, model_output_report_avg, model_output_accuracy, model_output_precision, model_output_recall, model_output_f1_score, model_output_auc, model_plot_cm, model_plot_importance, log_df_display, log_state]
|
| 257 |
)
|
| 258 |
|
| 259 |
if __name__ == "__main__":
|
sampled_data.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|