jeff7522553 commited on
Commit
5b16257
·
1 Parent(s): e970db4

更新資料以及加入多準確率

Browse files
Files changed (2) hide show
  1. app.py +48 -13
  2. sampled_data.csv +0 -0
app.py CHANGED
@@ -9,7 +9,7 @@ from sklearn.tree import DecisionTreeClassifier
9
  from sklearn.svm import SVC
10
  import xgboost as xgb
11
  import statsmodels.api as sm
12
- from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, accuracy_score
13
  import warnings
14
  import json
15
 
@@ -129,15 +129,41 @@ def train_and_evaluate(history_log, model_name, features, dt_criterion, dt_max_d
129
  else: importances, title = None, 'Feature Importance'
130
 
131
  # --- 3. 評估與繪圖 ---
132
- accuracy = accuracy_score(y_test, y_pred)
133
- report_dict = classification_report(y_test, y_pred, target_names=['not purchase insurance (0)', 'purchase insurance (1)'], output_dict=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  # 2. 轉成 DataFrame(每個類別一列)
135
- df_report = pd.DataFrame(report_dict).T # T = transpose,讓 index 變成類別名稱
136
- df_report.insert(0, "index", df_report.index)
137
- # print(df_report)
138
- df_report = processDisplayDataframe(df_report)
139
 
140
- auc_score = f"ROC-AUC 分數: {roc_auc_score(y_test, y_pred_proba):.4f}"
141
  cm = confusion_matrix(y_test, y_pred)
142
  fig_cm, ax_cm = plt.subplots(); sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax_cm, xticklabels=['Predicted 0', 'Predicted 1'], yticklabels=['Actual 0', 'Actual 1']); ax_cm.set_title('Confusion Matrix'); ax_cm.set_xlabel('Predicted Label'); ax_cm.set_ylabel('Actual Label'); plt.tight_layout()
143
 
@@ -155,16 +181,20 @@ def train_and_evaluate(history_log, model_name, features, dt_criterion, dt_max_d
155
  model_name,
156
  ', '.join(features),
157
  json.dumps(params),
158
- f"{accuracy:.4f}"
 
 
 
 
159
  ]
160
  # 將新紀錄加到歷史紀錄的開頭
161
  updated_log = [new_log_entry] + history_log
162
  log_df = pd.DataFrame(updated_log, columns=LOG_COLUMNS)
163
 
164
- return df_report, auc_score, fig_cm, fig_imp, log_df, updated_log
165
 
166
  # --- Gradio 介面設計 ---
167
- LOG_COLUMNS = ["時間", "模型", "特徵", "參數", "準確率"]
168
 
169
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
170
  # 用於儲存日誌的隱藏狀態元件
@@ -197,8 +227,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
197
  run_btn = gr.Button("🚀 執行模型訓練", variant="primary")
198
  with gr.Column(scale=2):
199
  gr.Markdown("## 3. 模型評估結果")
200
- model_output_report = gr.DataFrame(label="分類報告")
 
 
 
201
  model_output_auc = gr.Textbox(label="AUC 分數")
 
 
202
  model_plot_cm = gr.Plot(label="混淆矩陣")
203
  model_plot_importance = gr.Plot(label="特徵重要性/係數")
204
 
@@ -218,7 +253,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
218
  run_btn.click(
219
  train_and_evaluate,
220
  inputs=[log_state, model_selector, feature_selector, dt_criterion, dt_max_depth, xgb_n_estimators, xgb_max_depth, xgb_learning_rate, svm_c, svm_kernel],
221
- outputs=[model_output_report, model_output_auc, model_plot_cm, model_plot_importance, log_df_display, log_state]
222
  )
223
 
224
  if __name__ == "__main__":
 
9
  from sklearn.svm import SVC
10
  import xgboost as xgb
11
  import statsmodels.api as sm
12
+ from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, accuracy_score, precision_score, recall_score, f1_score
13
  import warnings
14
  import json
15
 
 
129
  else: importances, title = None, 'Feature Importance'
130
 
131
  # --- 3. 評估與繪圖 ---
132
+ accuracy_value = accuracy_score(y_test, y_pred)
133
+ precision_value = precision_score(y_test, y_pred)
134
+ recall_value = recall_score(y_test, y_pred)
135
+ f1_score_value = f1_score(y_test, y_pred)
136
+ roc_auc_value = roc_auc_score(y_test, y_pred_proba)
137
+
138
+ accuracy_text = f"準確率 分數: {accuracy_value:.4f}"
139
+ precision_text = f"精確率 分數: {precision_value:.4f}"
140
+ recall_text = f"召回率 分數: {recall_value:.4f}"
141
+ f1_score_text = f"F1 分數: {f1_score_value:.4f}"
142
+ roc_auc_text = f"ROC-AUC 分數: {roc_auc_value:.4f}"
143
+
144
+ report_dict = classification_report(y_test, y_pred, target_names=['not purchase insurance (0)', 'purchase insurance (1)'], output_dict=True)
145
+ classfy_report = pd.DataFrame({
146
+ 'not purchase insurance (0)':report_dict['not purchase insurance (0)'],
147
+ 'purchase insurance (1)':report_dict['purchase insurance (1)'],
148
+ }, columns=[ 'not purchase insurance (0)', 'purchase insurance (1)']).T
149
+ classfy_report.insert(0, "index", classfy_report.index)
150
+ classfy_report = processDisplayDataframe(classfy_report)
151
+
152
+
153
+ avg_report = pd.DataFrame([
154
+ report_dict["macro avg"],
155
+ report_dict["weighted avg"],
156
+ ], index=["macro avg", "weighted avg"])
157
+ avg_report.insert(0, "index", avg_report.index)
158
+ avg_report = processDisplayDataframe(avg_report)
159
+
160
  # 2. 轉成 DataFrame(每個類別一列)
161
+ # df_report = pd.DataFrame(report_dict).T # T = transpose,讓 index 變成類別名稱
162
+ # df_report.insert(0, "index", df_report.index)
163
+ # # print(df_report)
164
+ # df_report = processDisplayDataframe(df_report)
165
 
166
+
167
  cm = confusion_matrix(y_test, y_pred)
168
  fig_cm, ax_cm = plt.subplots(); sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax_cm, xticklabels=['Predicted 0', 'Predicted 1'], yticklabels=['Actual 0', 'Actual 1']); ax_cm.set_title('Confusion Matrix'); ax_cm.set_xlabel('Predicted Label'); ax_cm.set_ylabel('Actual Label'); plt.tight_layout()
169
 
 
181
  model_name,
182
  ', '.join(features),
183
  json.dumps(params),
184
+ f"{accuracy_value:.4f}",
185
+ f"{precision_value:.4f}",
186
+ f"{recall_value:.4f}",
187
+ f"{f1_score_value:.4f}",
188
+ f"{roc_auc_value:.4f}",
189
  ]
190
  # 將新紀錄加到歷史紀錄的開頭
191
  updated_log = [new_log_entry] + history_log
192
  log_df = pd.DataFrame(updated_log, columns=LOG_COLUMNS)
193
 
194
+ return classfy_report, avg_report, accuracy_text, precision_text, recall_text, f1_score_text, roc_auc_text, fig_cm, fig_imp, log_df, updated_log
195
 
196
  # --- Gradio 介面設計 ---
197
+ LOG_COLUMNS = ["時間", "模型", "特徵", "參數", "準確率", "精確率", "召回率", "F1 分數", "ROC-AUC 分數"]
198
 
199
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
200
  # 用於儲存日誌的隱藏狀態元件
 
227
  run_btn = gr.Button("🚀 執行模型訓練", variant="primary")
228
  with gr.Column(scale=2):
229
  gr.Markdown("## 3. 模型評估結果")
230
+ model_output_accuracy = gr.Textbox(label="準確率 分數")
231
+ model_output_precision = gr.Textbox(label="精確率 分數")
232
+ model_output_recall = gr.Textbox(label="召回率 分數")
233
+ model_output_f1_score = gr.Textbox(label="F1 分數")
234
  model_output_auc = gr.Textbox(label="AUC 分數")
235
+ model_output_report = gr.DataFrame(label="分類報告")
236
+ model_output_report_avg = gr.DataFrame(label="平均報告")
237
  model_plot_cm = gr.Plot(label="混淆矩陣")
238
  model_plot_importance = gr.Plot(label="特徵重要性/係數")
239
 
 
253
  run_btn.click(
254
  train_and_evaluate,
255
  inputs=[log_state, model_selector, feature_selector, dt_criterion, dt_max_depth, xgb_n_estimators, xgb_max_depth, xgb_learning_rate, svm_c, svm_kernel],
256
+ outputs=[model_output_report, model_output_report_avg, model_output_accuracy, model_output_precision, model_output_recall, model_output_f1_score, model_output_auc, model_plot_cm, model_plot_importance, log_df_display, log_state]
257
  )
258
 
259
  if __name__ == "__main__":
sampled_data.csv CHANGED
The diff for this file is too large to render. See raw diff