Spaces:

Wen1201
/

bert-finetuning-platform

Paused

App Files Files Community

Wen1201 commited on Oct 30, 2025

Commit

6a07af8

verified ·

1 Parent(s): d00baad

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -13

app.py CHANGED Viewed

@@ -51,12 +51,20 @@ def setup_bitfit(model):
             param.requires_grad = False
     return model
-def train_bert_model(csv_file, method, num_epochs, batch_size, learning_rate,
                      weight_decay, dropout, lora_r, lora_alpha, lora_dropout,
                      weight_mult, best_metric):
     global trained_models, model_counter
-    bert_variant = "bert-base-uncased"
     try:
         if csv_file is None:
@@ -76,9 +84,8 @@ def train_bert_model(csv_file, method, num_epochs, batch_size, learning_rate,
         ratio = n0 / n1
         w0, w1 = 1.0, ratio * weight_mult
-        info = f"📊 資料: {len(df_clean)} 筆\n存活: {n0} | 死亡: {n1}\n權重: {w0:.2f} / {w1:.2f}\n模型: {bert_variant}\n方法: {method.upper()}"
-        model_name = bert_variant
         tokenizer = BertTokenizer.from_pretrained(model_name)
         dataset = Dataset.from_pandas(df_clean[['text', 'label']])
@@ -139,10 +146,10 @@ def train_bert_model(csv_file, method, num_epochs, batch_size, learning_rate,
         results = trainer.evaluate()
         model_counter += 1
-        model_id = f"BERT_Model_{model_counter}_{method}"
         trained_models[model_id] = {
             'model': model, 'tokenizer': tokenizer, 'results': results,
-            'config': {'type': 'BERT', 'variant': bert_variant, 'method': method, 'metric': best_metric}
         }
         output = f"✅ 模型: {model_id}\n\n"
@@ -205,13 +212,13 @@ def compare():
         return "❌ 尚未訓練模型"
     text = "# 📊 模型比較\n\n"
-    text += "| 模型 | 方法 | F1 | Acc | Prec | Recall | Sens | Spec |\n"
-    text += "|------|------|-----|-----|------|--------|------|------|\n"
     for mid, info in trained_models.items():
         r = info['results']
         c = info['config']
-        text += f"| {mid} | {c['method'].upper()} | {r['eval_f1']:.4f} | {r['eval_accuracy']:.4f} | "
         text += f"{r['eval_precision']:.4f} | {r['eval_recall']:.4f} | "
         text += f"{r['eval_sensitivity']:.4f} | {r['eval_specificity']:.4f} |\n"
@@ -226,7 +233,16 @@ with gr.Blocks(title="BERT Fine-tuning 教學平台", theme=gr.themes.Soft()) as
     gr.Markdown("# 🧬 BERT Fine-tuning 教學平台")
     with gr.Tab("訓練"):
-        gr.Markdown("## 步驟 1: 選擇微調方法")
         method = gr.Radio(
             choices=["lora", "adalora", "ia3", "bitfit"],
@@ -234,10 +250,10 @@ with gr.Blocks(title="BERT Fine-tuning 教學平台", theme=gr.themes.Soft()) as
             label="微調方法"
         )
-        gr.Markdown("## 步驟 2: 上傳資料")
         csv_file = gr.File(label="CSV 檔案 (需包含 Text 和 label 欄位)", file_types=[".csv"])
-        gr.Markdown("## 步驟 3: 設定訓練參數")
         gr.Markdown("### 🎯 基本訓練參數")
         with gr.Row():
@@ -278,7 +294,7 @@ with gr.Blocks(title="BERT Fine-tuning 教學平台", theme=gr.themes.Soft()) as
         train_btn.click(
             train_bert_model,
-            inputs=[csv_file, method, num_epochs, batch_size, learning_rate,
                    weight_decay, dropout, lora_r, lora_alpha, lora_dropout,
                    weight_mult, best_metric],
             outputs=[data_info, train_output, status]
@@ -317,6 +333,12 @@ with gr.Blocks(title="BERT Fine-tuning 教學平台", theme=gr.themes.Soft()) as
         gr.Markdown("""
         ## 📖 使用說明
         ### 微調方法
         - **LoRA**: 低秩適應，只訓練少量參數 ⭐推薦

             param.requires_grad = False
     return model
+def train_bert_model(csv_file, base_model, method, num_epochs, batch_size, learning_rate,
                      weight_decay, dropout, lora_r, lora_alpha, lora_dropout,
                      weight_mult, best_metric):
     global trained_models, model_counter
+    # 模型名稱映射
+    model_mapping = {
+        "BERT-base": "bert-base-uncased",
+        "BERT-large": "bert-large-uncased",
+        "BioBERT": "dmis-lab/biobert-v1.1",
+        "ClinicalBERT": "emilyalsentzer/Bio_ClinicalBERT"
+    }
+    model_name = model_mapping.get(base_model, "bert-base-uncased")
     try:
         if csv_file is None:
         ratio = n0 / n1
         w0, w1 = 1.0, ratio * weight_mult
+        info = f"📊 資料: {len(df_clean)} 筆\n存活: {n0} | 死亡: {n1}\n權重: {w0:.2f} / {w1:.2f}\n模型: {base_model}\n方法: {method.upper()}"
         tokenizer = BertTokenizer.from_pretrained(model_name)
         dataset = Dataset.from_pandas(df_clean[['text', 'label']])
         results = trainer.evaluate()
         model_counter += 1
+        model_id = f"{base_model}_Model_{model_counter}_{method}"
         trained_models[model_id] = {
             'model': model, 'tokenizer': tokenizer, 'results': results,
+            'config': {'type': base_model, 'model_name': model_name, 'method': method, 'metric': best_metric}
         }
         output = f"✅ 模型: {model_id}\n\n"
         return "❌ 尚未訓練模型"
     text = "# 📊 模型比較\n\n"
+    text += "| 模型 | 基礎模型 | 方法 | F1 | Acc | Prec | Recall | Sens | Spec |\n"
+    text += "|------|----------|------|-----|-----|------|--------|------|------|\n"
     for mid, info in trained_models.items():
         r = info['results']
         c = info['config']
+        text += f"| {mid} | {c['type']} | {c['method'].upper()} | {r['eval_f1']:.4f} | {r['eval_accuracy']:.4f} | "
         text += f"{r['eval_precision']:.4f} | {r['eval_recall']:.4f} | "
         text += f"{r['eval_sensitivity']:.4f} | {r['eval_specificity']:.4f} |\n"
     gr.Markdown("# 🧬 BERT Fine-tuning 教學平台")
     with gr.Tab("訓練"):
+        gr.Markdown("## 步驟 1: 選擇基礎模型")
+        base_model = gr.Dropdown(
+            choices=["BERT-base"],
+            value="BERT-base",
+            label="基礎模型",
+            info="更多模型即將推出"
+        )
+        gr.Markdown("## 步驟 2: 選擇微調方法")
         method = gr.Radio(
             choices=["lora", "adalora", "ia3", "bitfit"],
             label="微調方法"
         )
+        gr.Markdown("## 步驟 3: 上傳資料")
         csv_file = gr.File(label="CSV 檔案 (需包含 Text 和 label 欄位)", file_types=[".csv"])
+        gr.Markdown("## 步驟 4: 設定訓練參數")
         gr.Markdown("### 🎯 基本訓練參數")
         with gr.Row():
         train_btn.click(
             train_bert_model,
+            inputs=[csv_file, base_model, method, num_epochs, batch_size, learning_rate,
                    weight_decay, dropout, lora_r, lora_alpha, lora_dropout,
                    weight_mult, best_metric],
             outputs=[data_info, train_output, status]
         gr.Markdown("""
         ## 📖 使用說明
+        ### 基礎模型
+        - **BERT-base**: 標準 BERT，110M 參數 ⭐目前支援
+        *更多模型（BERT-large、BioBERT、ClinicalBERT）即將推出*
         ### 微調方法
         - **LoRA**: 低秩適應，只訓練少量參數 ⭐推薦