smartTranscend commited on
Commit
4533ea5
·
verified ·
1 Parent(s): 1f1d45e

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +6 -0
  2. requirements.txt +12 -0
  3. train.py +536 -0
  4. training_data.csv +0 -0
app.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import sys
3
+
4
+ print("🚀 開始執行訓練腳本...")
5
+ result = subprocess.run([sys.executable, "train.py"], capture_output=False)
6
+ sys.exit(result.returncode)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers>=4.45.0
2
+ torch>=2.0.0
3
+ gradio>=4.44.1
4
+ peft>=0.12.0
5
+ accelerate>=0.34.0
6
+ bitsandbytes>=0.43.2
7
+ sentencepiece>=0.2.0
8
+ protobuf>=3.20.0
9
+ scikit-learn>=1.4.0
10
+ datasets>=2.18.0
11
+ pandas>=2.2.0
12
+ huggingface_hub>=0.20.0
train.py ADDED
@@ -0,0 +1,536 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Llama NBCD Fine-tuning Script with Baseline Comparison
3
+ 比較未微調 vs 微調模型的效果
4
+ """
5
+
6
+ import pandas as pd
7
+ import torch
8
+ from datasets import Dataset, DatasetDict
9
+ from transformers import (
10
+ AutoTokenizer,
11
+ AutoModelForSequenceClassification,
12
+ TrainingArguments,
13
+ Trainer,
14
+ DataCollatorWithPadding
15
+ )
16
+ from peft import LoraConfig, get_peft_model, TaskType
17
+ from sklearn.model_selection import train_test_split
18
+ from sklearn.metrics import accuracy_score, precision_recall_fscore_support
19
+ from sklearn.utils import resample
20
+ import numpy as np
21
+ import json
22
+ from datetime import datetime
23
+ import os
24
+ from huggingface_hub import login
25
+
26
+ # ==================== HF Token 登入 ====================
27
+ print("🔐 檢查 Hugging Face Token...")
28
+ if "HF_TOKEN" in os.environ:
29
+ try:
30
+ login(token=os.environ["HF_TOKEN"])
31
+ print("✅ 已使用 HF Token 登入")
32
+ except Exception as e:
33
+ print(f"⚠️ Token 登入失敗: {e}")
34
+ else:
35
+ print("⚠️ 未找到 HF_TOKEN,可能無法下載 Llama 模型")
36
+
37
+ # ==================== 配置參數 ====================
38
+ MODEL_NAME = "meta-llama/Llama-3.2-1B"
39
+ TRAINING_DATA_PATH = "./training_data.csv"
40
+ OUTPUT_DIR = "./trained_model"
41
+ MAX_LENGTH = 512
42
+
43
+ # 訓練參數
44
+ TRAIN_CONFIG = {
45
+ "num_epochs": 3,
46
+ "batch_size": 4,
47
+ "learning_rate": 1e-4,
48
+ "lora_r": 8,
49
+ "lora_alpha": 16,
50
+ }
51
+
52
+ # 資料平衡配置
53
+ BALANCE_CONFIG = {
54
+ "target_samples_per_class": 700,
55
+ "use_class_weights": True,
56
+ }
57
+
58
+ print("\n" + "="*70)
59
+ print("🦙 Llama NBCD Fine-tuning with Baseline Comparison")
60
+ print(" (未微調 vs 微調模型比較)")
61
+ print("="*70)
62
+ print(f"\n📋 配置:")
63
+ print(f" 模型: {MODEL_NAME}")
64
+ print(f" 訓練數據: {TRAINING_DATA_PATH}")
65
+ print(f" 輸出目錄: {OUTPUT_DIR}")
66
+ print(f" Epochs: {TRAIN_CONFIG['num_epochs']}")
67
+ print(f" Batch Size: {TRAIN_CONFIG['batch_size']}")
68
+ print(f" Learning Rate: {TRAIN_CONFIG['learning_rate']}")
69
+ print(f" 目標樣本數: {BALANCE_CONFIG['target_samples_per_class']} 筆/類別")
70
+ print("="*70 + "\n")
71
+
72
+ # ==================== 1. 載入數據 ====================
73
+ print("📂 載入訓練數據...")
74
+ try:
75
+ df = pd.read_csv(TRAINING_DATA_PATH)
76
+ print(f"✅ 成功載入 {len(df)} 筆數據")
77
+ print(f" 欄位: {list(df.columns)}")
78
+ print(f" 原始 Class 0: {(df['nbcd']==0).sum()} 筆")
79
+ print(f" 原始 Class 1: {(df['nbcd']==1).sum()} 筆")
80
+ except Exception as e:
81
+ print(f"❌ 無法載入數據: {e}")
82
+ print(f" 請確認 {TRAINING_DATA_PATH} 存在且格式正確")
83
+ exit(1)
84
+
85
+ # ==================== 2. 資料平衡處理 ====================
86
+ print("\n⚖️ 執行資料平衡...")
87
+
88
+ df_class_0 = df[df['nbcd'] == 0]
89
+ df_class_1 = df[df['nbcd'] == 1]
90
+
91
+ target_n = BALANCE_CONFIG['target_samples_per_class']
92
+
93
+ # 欠採樣 Class 0
94
+ if len(df_class_0) > target_n:
95
+ df_class_0_balanced = resample(df_class_0, n_samples=target_n, random_state=42, replace=False)
96
+ print(f"✅ Class 0 欠採樣: {len(df_class_0)} → {len(df_class_0_balanced)} 筆")
97
+ else:
98
+ df_class_0_balanced = df_class_0
99
+ print(f"⚠️ Class 0 樣本數不足,保持 {len(df_class_0)} 筆")
100
+
101
+ # 過採樣 Class 1
102
+ if len(df_class_1) < target_n:
103
+ df_class_1_balanced = resample(df_class_1, n_samples=target_n, random_state=42, replace=True)
104
+ print(f"✅ Class 1 過採樣: {len(df_class_1)} → {len(df_class_1_balanced)} 筆")
105
+ else:
106
+ df_class_1_balanced = df_class_1
107
+ print(f"⚠️ Class 1 樣本數充足,保持 {len(df_class_1)} 筆")
108
+
109
+ df_balanced = pd.concat([df_class_0_balanced, df_class_1_balanced])
110
+ df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)
111
+
112
+ print(f"\n📊 平衡後數據:")
113
+ print(f" 總樣本數: {len(df_balanced)} 筆")
114
+ print(f" Class 0: {(df_balanced['nbcd']==0).sum()} 筆")
115
+ print(f" Class 1: {(df_balanced['nbcd']==1).sum()} 筆")
116
+
117
+ # ==================== 3. 計算類別權重 ====================
118
+ if BALANCE_CONFIG['use_class_weights']:
119
+ print("\n⚖️ 計算類別權重...")
120
+ class_counts = df_balanced['nbcd'].value_counts().sort_index()
121
+ total = len(df_balanced)
122
+ num_classes = 2
123
+
124
+ class_weight_0 = total / (num_classes * class_counts[0])
125
+ class_weight_1 = total / (num_classes * class_counts[1])
126
+ class_weights = torch.tensor([class_weight_0, class_weight_1], dtype=torch.float32)
127
+
128
+ print(f"✅ 類別權重計算完成:")
129
+ print(f" Class 0 權重: {class_weight_0:.4f}")
130
+ print(f" Class 1 權重: {class_weight_1:.4f}")
131
+ else:
132
+ class_weights = None
133
+ print("\n⚠️ 未使用類別權重")
134
+
135
+ # ==================== 4. 分割數據 ====================
136
+ print("\n✂️ 分割訓練集和測試集...")
137
+ train_df, test_df = train_test_split(
138
+ df_balanced,
139
+ test_size=0.2,
140
+ stratify=df_balanced['nbcd'],
141
+ random_state=42
142
+ )
143
+ print(f"✅ 訓練集: {len(train_df)} 筆 (Class 0: {(train_df['nbcd']==0).sum()}, Class 1: {(train_df['nbcd']==1).sum()})")
144
+ print(f"✅ 測試集: {len(test_df)} 筆 (Class 0: {(test_df['nbcd']==0).sum()}, Class 1: {(test_df['nbcd']==1).sum()})")
145
+
146
+ dataset = DatasetDict({
147
+ 'train': Dataset.from_pandas(train_df[['Text', 'nbcd']]),
148
+ 'test': Dataset.from_pandas(test_df[['Text', 'nbcd']])
149
+ })
150
+
151
+ # ==================== 5. 檢測設備 ====================
152
+ device = "cuda" if torch.cuda.is_available() else "cpu"
153
+ print(f"\n🖥️ 使用設備: {device}")
154
+ if device == "cpu":
155
+ print("⚠️ 警告: 使用 CPU 訓練會非常慢!")
156
+ else:
157
+ print(f"✅ GPU 可用: {torch.cuda.get_device_name(0)}")
158
+
159
+ if class_weights is not None and device == "cuda":
160
+ class_weights = class_weights.to(device)
161
+
162
+ # ==================== 6. 載入模型和 Tokenizer ====================
163
+ print("\n🤖 載入 Llama 模型和 Tokenizer...")
164
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
165
+ if tokenizer.pad_token is None:
166
+ tokenizer.pad_token = tokenizer.eos_token
167
+ tokenizer.pad_token_id = tokenizer.eos_token_id
168
+
169
+ # ==================== 7. 載入未微調的基礎模型 (用於比較) ====================
170
+ print("\n📦 載入未微調的基礎模型 (Baseline)...")
171
+ baseline_model = AutoModelForSequenceClassification.from_pretrained(
172
+ MODEL_NAME,
173
+ num_labels=2,
174
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
175
+ device_map="auto" if device == "cuda" else None
176
+ )
177
+ baseline_model.config.pad_token_id = tokenizer.pad_token_id
178
+ print("✅ Baseline 模型載入完成")
179
+
180
+ # ==================== 8. 載入要微調的模型 ====================
181
+ print("\n🔧 載入用於微調的模型...")
182
+ base_model = AutoModelForSequenceClassification.from_pretrained(
183
+ MODEL_NAME,
184
+ num_labels=2,
185
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
186
+ device_map="auto" if device == "cuda" else None
187
+ )
188
+ base_model.config.pad_token_id = tokenizer.pad_token_id
189
+ print("✅ 基礎模型載入完成")
190
+
191
+ # ==================== 9. 配置 LoRA ====================
192
+ print("\n🔧 配置 LoRA...")
193
+ lora_config = LoraConfig(
194
+ task_type=TaskType.SEQ_CLS,
195
+ r=TRAIN_CONFIG["lora_r"],
196
+ lora_alpha=TRAIN_CONFIG["lora_alpha"],
197
+ lora_dropout=0.1,
198
+ target_modules=["q_proj", "v_proj"],
199
+ bias="none"
200
+ )
201
+
202
+ model = get_peft_model(base_model, lora_config)
203
+ trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
204
+ total_params = sum(p.numel() for p in model.parameters())
205
+ print(f"✅ LoRA 配置完成")
206
+ print(f" 可訓練參數: {trainable_params:,} ({trainable_params/total_params*100:.2f}%)")
207
+
208
+ # ==================== 10. 預處理數據 ====================
209
+ print("\n🔄 預處理數據...")
210
+
211
+ def preprocess_function(examples):
212
+ return tokenizer(
213
+ examples['Text'],
214
+ truncation=True,
215
+ padding='max_length',
216
+ max_length=MAX_LENGTH
217
+ )
218
+
219
+ tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=['Text'])
220
+ tokenized_dataset = tokenized_dataset.rename_column("nbcd", "labels")
221
+ print("✅ 數據預處理完成")
222
+
223
+ # ==================== 11. 評估指標函數 ====================
224
+ def compute_metrics(eval_pred):
225
+ predictions, labels = eval_pred
226
+ predictions = np.argmax(predictions, axis=1)
227
+
228
+ accuracy = accuracy_score(labels, predictions)
229
+ precision, recall, f1, _ = precision_recall_fscore_support(
230
+ labels, predictions, average='binary', zero_division=0
231
+ )
232
+
233
+ return {
234
+ 'accuracy': accuracy,
235
+ 'precision': precision,
236
+ 'recall': recall,
237
+ 'f1': f1
238
+ }
239
+
240
+ # ==================== 12. 評估 Baseline 模型 (未微調) ====================
241
+ print("\n" + "="*70)
242
+ print("📊 評估未微調的 Baseline 模型...")
243
+ print("="*70)
244
+
245
+ baseline_trainer = Trainer(
246
+ model=baseline_model,
247
+ args=TrainingArguments(
248
+ output_dir="./temp_baseline",
249
+ per_device_eval_batch_size=TRAIN_CONFIG["batch_size"],
250
+ bf16=(device == "cuda"),
251
+ report_to="none"
252
+ ),
253
+ tokenizer=tokenizer,
254
+ data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
255
+ compute_metrics=compute_metrics
256
+ )
257
+
258
+ baseline_train_results = baseline_trainer.evaluate(eval_dataset=tokenized_dataset['train'])
259
+ baseline_test_results = baseline_trainer.evaluate(eval_dataset=tokenized_dataset['test'])
260
+
261
+ print("\n🔍 Baseline 模型 - 訓練集結果:")
262
+ print(f" Accuracy: {baseline_train_results['eval_accuracy']:.4f}")
263
+ print(f" Precision: {baseline_train_results['eval_precision']:.4f}")
264
+ print(f" Recall: {baseline_train_results['eval_recall']:.4f}")
265
+ print(f" F1 Score: {baseline_train_results['eval_f1']:.4f}")
266
+
267
+ print("\n🔍 Baseline 模型 - 測試集結果:")
268
+ print(f" Accuracy: {baseline_test_results['eval_accuracy']:.4f}")
269
+ print(f" Precision: {baseline_test_results['eval_precision']:.4f}")
270
+ print(f" Recall: {baseline_test_results['eval_recall']:.4f}")
271
+ print(f" F1 Score: {baseline_test_results['eval_f1']:.4f}")
272
+
273
+ # ==================== 13. 自定義 Trainer ====================
274
+ if BALANCE_CONFIG['use_class_weights']:
275
+ class WeightedTrainer(Trainer):
276
+ def __init__(self, *args, class_weights=None, **kwargs):
277
+ super().__init__(*args, **kwargs)
278
+ self.class_weights = class_weights
279
+
280
+ def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
281
+ labels = inputs.pop("labels")
282
+ outputs = model(**inputs)
283
+ logits = outputs.logits
284
+
285
+ loss_fct = torch.nn.CrossEntropyLoss(weight=self.class_weights)
286
+ loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
287
+
288
+ return (loss, outputs) if return_outputs else loss
289
+
290
+ TrainerClass = WeightedTrainer
291
+ else:
292
+ TrainerClass = Trainer
293
+
294
+ # ==================== 14. 訓練配置 ====================
295
+ print("\n" + "="*70)
296
+ print("⚙️ 配置微調訓練器...")
297
+ print("="*70)
298
+
299
+ training_args = TrainingArguments(
300
+ output_dir=OUTPUT_DIR,
301
+ num_train_epochs=TRAIN_CONFIG["num_epochs"],
302
+ per_device_train_batch_size=TRAIN_CONFIG["batch_size"],
303
+ per_device_eval_batch_size=TRAIN_CONFIG["batch_size"],
304
+ learning_rate=TRAIN_CONFIG["learning_rate"],
305
+ weight_decay=0.01,
306
+ eval_strategy="epoch",
307
+ save_strategy="epoch",
308
+ load_best_model_at_end=True,
309
+ metric_for_best_model="f1",
310
+ logging_dir=f"{OUTPUT_DIR}/logs",
311
+ logging_steps=10,
312
+ bf16=(device == "cuda"),
313
+ gradient_accumulation_steps=2,
314
+ warmup_steps=50,
315
+ report_to="none",
316
+ seed=42
317
+ )
318
+
319
+ if BALANCE_CONFIG['use_class_weights']:
320
+ trainer = TrainerClass(
321
+ model=model,
322
+ args=training_args,
323
+ train_dataset=tokenized_dataset['train'],
324
+ eval_dataset=tokenized_dataset['test'],
325
+ tokenizer=tokenizer,
326
+ data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
327
+ compute_metrics=compute_metrics,
328
+ class_weights=class_weights
329
+ )
330
+ else:
331
+ trainer = TrainerClass(
332
+ model=model,
333
+ args=training_args,
334
+ train_dataset=tokenized_dataset['train'],
335
+ eval_dataset=tokenized_dataset['test'],
336
+ tokenizer=tokenizer,
337
+ data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
338
+ compute_metrics=compute_metrics
339
+ )
340
+
341
+ # ==================== 15. 開始訓練 ====================
342
+ print("\n" + "="*70)
343
+ print("🚀 開始微調訓練...")
344
+ print("="*70 + "\n")
345
+
346
+ start_time = datetime.now()
347
+
348
+ try:
349
+ train_result = trainer.train()
350
+ end_time = datetime.now()
351
+ duration = (end_time - start_time).total_seconds() / 60
352
+
353
+ print("\n" + "="*70)
354
+ print(f"✅ 訓練完成!")
355
+ print(f" 耗時: {duration:.1f} 分鐘")
356
+ print("="*70)
357
+
358
+ except Exception as e:
359
+ print(f"\n❌ 訓練失敗: {e}")
360
+ import traceback
361
+ traceback.print_exc()
362
+ exit(1)
363
+
364
+ # ==================== 16. 評估微調後的模型 ====================
365
+ print("\n" + "="*70)
366
+ print("📊 評估微調後的模型...")
367
+ print("="*70)
368
+
369
+ finetuned_train_results = trainer.evaluate(eval_dataset=tokenized_dataset['train'])
370
+ finetuned_test_results = trainer.evaluate(eval_dataset=tokenized_dataset['test'])
371
+
372
+ print("\n🔍 微調模型 - 訓練集結果:")
373
+ print(f" Accuracy: {finetuned_train_results['eval_accuracy']:.4f}")
374
+ print(f" Precision: {finetuned_train_results['eval_precision']:.4f}")
375
+ print(f" Recall: {finetuned_train_results['eval_recall']:.4f}")
376
+ print(f" F1 Score: {finetuned_train_results['eval_f1']:.4f}")
377
+
378
+ print("\n🔍 微調模型 - 測試集結果:")
379
+ print(f" Accuracy: {finetuned_test_results['eval_accuracy']:.4f}")
380
+ print(f" Precision: {finetuned_test_results['eval_precision']:.4f}")
381
+ print(f" Recall: {finetuned_test_results['eval_recall']:.4f}")
382
+ print(f" F1 Score: {finetuned_test_results['eval_f1']:.4f}")
383
+
384
+ # ==================== 17. 比較結果 ====================
385
+ print("\n" + "="*70)
386
+ print("📈 Baseline vs Fine-tuned 比較 (測試集)")
387
+ print("="*70)
388
+
389
+ metrics = ['accuracy', 'precision', 'recall', 'f1']
390
+ print(f"\n{'指標':<12} {'Baseline':<12} {'Fine-tuned':<12} {'改善':<12} {'狀態'}")
391
+ print("-" * 70)
392
+
393
+ for metric in metrics:
394
+ baseline_val = baseline_test_results[f'eval_{metric}']
395
+ finetuned_val = finetuned_test_results[f'eval_{metric}']
396
+ improvement = finetuned_val - baseline_val
397
+ improvement_pct = (improvement / baseline_val * 100) if baseline_val > 0 else 0
398
+
399
+ status = "✅ 提升" if improvement > 0 else "⚠️ 下降" if improvement < 0 else "➖ 持平"
400
+
401
+ print(f"{metric.capitalize():<12} {baseline_val:<12.4f} {finetuned_val:<12.4f} "
402
+ f"{improvement:+.4f} ({improvement_pct:+.1f}%) {status}")
403
+
404
+ print("="*70)
405
+
406
+ # ==================== 18. 測試推論比較 ====================
407
+ print("\n" + "="*70)
408
+ print("🧪 測試推論比較 (5個樣本)")
409
+ print("="*70)
410
+
411
+ def predict_with_model(model_obj, text):
412
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=MAX_LENGTH)
413
+ if device == "cuda":
414
+ inputs = {k: v.to(model_obj.device) for k, v in inputs.items()}
415
+
416
+ with torch.no_grad():
417
+ outputs = model_obj(**inputs)
418
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
419
+ predicted_class = torch.argmax(probs, dim=-1).item()
420
+ confidence = probs[0][predicted_class].item()
421
+
422
+ return predicted_class, confidence
423
+
424
+ test_samples = test_df.head(5)
425
+
426
+ baseline_correct = 0
427
+ finetuned_correct = 0
428
+ baseline_class1_correct = 0
429
+ finetuned_class1_correct = 0
430
+ class1_total = 0
431
+
432
+ for idx, (_, row) in enumerate(test_samples.iterrows(), 1):
433
+ true_label = row['nbcd']
434
+ text = row['Text']
435
+
436
+ # Baseline 預測
437
+ baseline_pred, baseline_conf = predict_with_model(baseline_model, text)
438
+ baseline_match = "✅" if baseline_pred == true_label else "❌"
439
+ if baseline_pred == true_label:
440
+ baseline_correct += 1
441
+
442
+ # Fine-tuned 預測
443
+ finetuned_pred, finetuned_conf = predict_with_model(model, text)
444
+ finetuned_match = "✅" if finetuned_pred == true_label else "❌"
445
+ if finetuned_pred == true_label:
446
+ finetuned_correct += 1
447
+
448
+ # Class 1 統計
449
+ if true_label == 1:
450
+ class1_total += 1
451
+ if baseline_pred == 1:
452
+ baseline_class1_correct += 1
453
+ if finetuned_pred == 1:
454
+ finetuned_class1_correct += 1
455
+
456
+ print(f"\n樣本 {idx} (實際標籤: {true_label}):")
457
+ print(f" 文本: {text[:100]}...")
458
+ print(f" {baseline_match} Baseline: 預測={baseline_pred} 信心度={baseline_conf:.3f}")
459
+ print(f" {finetuned_match} Fine-tuned: 預測={finetuned_pred} 信心度={finetuned_conf:.3f}")
460
+
461
+ print("\n" + "="*70)
462
+ print("📊 5個樣本預測準確率:")
463
+ print(f" Baseline: {baseline_correct}/5 = {baseline_correct/5*100:.1f}%")
464
+ print(f" Fine-tuned: {finetuned_correct}/5 = {finetuned_correct/5*100:.1f}%")
465
+ if class1_total > 0:
466
+ print(f"\n Class 1 識別率 (共 {class1_total} 個):")
467
+ print(f" Baseline: {baseline_class1_correct}/{class1_total}")
468
+ print(f" Fine-tuned: {finetuned_class1_correct}/{class1_total}")
469
+ print("="*70)
470
+
471
+ # ==================== 19. 保存模型和結果 ====================
472
+ print("\n💾 保存模型和結果...")
473
+ trainer.save_model()
474
+ tokenizer.save_pretrained(OUTPUT_DIR)
475
+
476
+ comparison_results = {
477
+ "model": MODEL_NAME,
478
+ "config": TRAIN_CONFIG,
479
+ "balance_config": BALANCE_CONFIG,
480
+ "train_time_minutes": duration,
481
+ "baseline_results": {
482
+ "train": {
483
+ "accuracy": float(baseline_train_results['eval_accuracy']),
484
+ "precision": float(baseline_train_results['eval_precision']),
485
+ "recall": float(baseline_train_results['eval_recall']),
486
+ "f1": float(baseline_train_results['eval_f1'])
487
+ },
488
+ "test": {
489
+ "accuracy": float(baseline_test_results['eval_accuracy']),
490
+ "precision": float(baseline_test_results['eval_precision']),
491
+ "recall": float(baseline_test_results['eval_recall']),
492
+ "f1": float(baseline_test_results['eval_f1'])
493
+ }
494
+ },
495
+ "finetuned_results": {
496
+ "train": {
497
+ "accuracy": float(finetuned_train_results['eval_accuracy']),
498
+ "precision": float(finetuned_train_results['eval_precision']),
499
+ "recall": float(finetuned_train_results['eval_recall']),
500
+ "f1": float(finetuned_train_results['eval_f1'])
501
+ },
502
+ "test": {
503
+ "accuracy": float(finetuned_test_results['eval_accuracy']),
504
+ "precision": float(finetuned_test_results['eval_precision']),
505
+ "recall": float(finetuned_test_results['eval_recall']),
506
+ "f1": float(finetuned_test_results['eval_f1'])
507
+ }
508
+ },
509
+ "improvements": {
510
+ "accuracy": float(finetuned_test_results['eval_accuracy'] - baseline_test_results['eval_accuracy']),
511
+ "precision": float(finetuned_test_results['eval_precision'] - baseline_test_results['eval_precision']),
512
+ "recall": float(finetuned_test_results['eval_recall'] - baseline_test_results['eval_recall']),
513
+ "f1": float(finetuned_test_results['eval_f1'] - baseline_test_results['eval_f1'])
514
+ },
515
+ "timestamp": datetime.now().isoformat(),
516
+ "device": device
517
+ }
518
+
519
+ with open(f"{OUTPUT_DIR}/comparison_results.json", "w", encoding='utf-8') as f:
520
+ json.dump(comparison_results, f, indent=2, ensure_ascii=False)
521
+
522
+ print(f"✅ 結果已保存到: {OUTPUT_DIR}/comparison_results.json")
523
+
524
+ # ==================== 20. 總結 ====================
525
+ print("\n" + "="*70)
526
+ print("🎉 訓練和比較流程全部完成!")
527
+ print("="*70)
528
+ print(f"\n📦 輸出內容:")
529
+ print(f" 微調模型: {OUTPUT_DIR}/")
530
+ print(f" 比較結果: {OUTPUT_DIR}/comparison_results.json")
531
+ print(f" 訓練日誌: {OUTPUT_DIR}/logs/")
532
+ print("\n💡 關鍵發現:")
533
+ print(f" 測試集 F1 Score 提升: {comparison_results['improvements']['f1']:+.4f}")
534
+ print(f" 測試集 Recall 提升: {comparison_results['improvements']['recall']:+.4f}")
535
+ print(f" 測試集 Accuracy 提升: {comparison_results['improvements']['accuracy']:+.4f}")
536
+ print("="*70 + "\n")
training_data.csv ADDED
The diff for this file is too large to render. See raw diff