Spaces:

Snow2222
/

SSS-Distillation

Runtime error

Snow2222 commited on Feb 10, 2025

Commit

eb81ebd

verified ·

1 Parent(s): 9f0184e

Update train.py

Files changed (1) hide show

train.py CHANGED Viewed

@@ -17,7 +17,7 @@ else:
 # 定义教师模型与学生模型
 teacher_model_name = "Qwen/Qwen1.5-7B-Chat"  # 教师模型（较大模型）
-student_model_name = "distilgpt2"            # ✅ 学生模型，建议用 distilgpt2 替代 gpt2
 # 加载教师模型（仅用于生成软标签，不参与梯度计算）
 teacher = AutoModelForCausalLM.from_pretrained(
@@ -68,13 +68,13 @@ def preprocess_data(example):
 # 预处理数据集
 dataset = dataset.map(preprocess_data, batched=True)
-# 自定义知识蒸馏 Trainer
 class DistillationTrainer(Trainer):
     def __init__(self, teacher, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.teacher = teacher  # ✅ 传入教师模型
-    def compute_loss(self, model, inputs, return_outputs=False):  # ❌ 去掉 num_items_in_batch
         labels = inputs["input_ids"]
         # ✅ 计算学生模型的输出
@@ -104,6 +104,13 @@ class DistillationTrainer(Trainer):
         return (loss, outputs_student) if return_outputs else loss
 # 训练参数
 training_args = TrainingArguments(
     output_dir="/tmp/distilled_model",

 # 定义教师模型与学生模型
 teacher_model_name = "Qwen/Qwen1.5-7B-Chat"  # 教师模型（较大模型）
+student_model_name = "distilgpt2"            # ✅ 建议用 distilgpt2
 # 加载教师模型（仅用于生成软标签，不参与梯度计算）
 teacher = AutoModelForCausalLM.from_pretrained(
 # 预处理数据集
 dataset = dataset.map(preprocess_data, batched=True)
+# ✅ 自定义 `DistillationTrainer`，覆盖 `training_step()` 以防止 `num_items_in_batch` 传递
 class DistillationTrainer(Trainer):
     def __init__(self, teacher, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.teacher = teacher  # ✅ 传入教师模型
+    def compute_loss(self, model, inputs, return_outputs=False):
         labels = inputs["input_ids"]
         # ✅ 计算学生模型的输出
         return (loss, outputs_student) if return_outputs else loss
+    def training_step(self, model, inputs):
+        """✅ 关键修复点：覆盖 `training_step()`，防止 `num_items_in_batch` 传递"""
+        model.train()
+        inputs = self._prepare_inputs(inputs)
+        loss = self.compute_loss(model, inputs)  # ✅ 直接调用，不传递 `num_items_in_batch`
+        return loss
 # 训练参数
 training_args = TrainingArguments(
     output_dir="/tmp/distilled_model",