Spaces:

hajimemat
/

glaive-7b-training

Runtime error

Hajime MATSUMOTO commited on 15 days ago

Commit

c179929

1 Parent(s): 29f5c51

Speed optimization: 1 epoch, larger batch, shorter seq, no packing

Files changed (1) hide show

train.py CHANGED Viewed

@@ -234,16 +234,16 @@ training_args = TrainingArguments(
     output_dir=CHECKPOINT_DIR,
     # エポック・ステップ
-    num_train_epochs=2,
     max_steps=-1,  # -1 = エポックベース
-    # バッチサイズ (L40S 48GB + 7B QLoRA)
-    per_device_train_batch_size=2,
-    per_device_eval_batch_size=2,
-    gradient_accumulation_steps=16,  # 有効バッチサイズ: 2*16=32
-    # 学習率
-    learning_rate=1e-4,
     weight_decay=0.01,
     warmup_ratio=0.03,
     lr_scheduler_type="cosine",
@@ -332,8 +332,8 @@ def main():
         args=training_args,
         peft_config=lora_config,
         tokenizer=tokenizer,
-        max_seq_length=2048,  # 7Bなので少し長く
-        packing=True,
         dataset_text_field="text",
         callbacks=[VerboseLoggingCallback()],
     )

     output_dir=CHECKPOINT_DIR,
     # エポック・ステップ
+    num_train_epochs=1,
     max_steps=-1,  # -1 = エポックベース
+    # バッチサイズ (高速化設定)
+    per_device_train_batch_size=4,
+    per_device_eval_batch_size=4,
+    gradient_accumulation_steps=4,  # 有効バッチサイズ: 4*4=16
+    # 学習率 (1エポックで収束するよう高め)
+    learning_rate=2e-4,
     weight_decay=0.01,
     warmup_ratio=0.03,
     lr_scheduler_type="cosine",
         args=training_args,
         peft_config=lora_config,
         tokenizer=tokenizer,
+        max_seq_length=1024,  # 高速化のため短縮
+        packing=False,  # flash attention不要で高速化
         dataset_text_field="text",
         callbacks=[VerboseLoggingCallback()],
     )