Hajime MATSUMOTO commited on
Commit
113833d
ยท
1 Parent(s): c179929

L40S optimization: batch 8, disable gradient checkpointing, parallel dataloader

Browse files
Files changed (1) hide show
  1. train.py +8 -5
train.py CHANGED
@@ -237,10 +237,10 @@ training_args = TrainingArguments(
237
  num_train_epochs=1,
238
  max_steps=-1, # -1 = ใ‚จใƒใƒƒใ‚ฏใƒ™ใƒผใ‚น
239
 
240
- # ใƒใƒƒใƒใ‚ตใ‚คใ‚บ (้ซ˜้€ŸๅŒ–่จญๅฎš)
241
- per_device_train_batch_size=4,
242
- per_device_eval_batch_size=4,
243
- gradient_accumulation_steps=4, # ๆœ‰ๅŠนใƒใƒƒใƒใ‚ตใ‚คใ‚บ: 4*4=16
244
 
245
  # ๅญฆ็ฟ’็އ (1ใ‚จใƒใƒƒใ‚ฏใงๅŽๆŸใ™ใ‚‹ใ‚ˆใ†้ซ˜ใ‚)
246
  learning_rate=2e-4,
@@ -264,7 +264,10 @@ training_args = TrainingArguments(
264
  # ใใฎไป–
265
  report_to="none",
266
  group_by_length=True,
267
- gradient_checkpointing=True,
 
 
 
268
 
269
  # ๅ†้–‹็”จ
270
  save_safetensors=True,
 
237
  num_train_epochs=1,
238
  max_steps=-1, # -1 = ใ‚จใƒใƒƒใ‚ฏใƒ™ใƒผใ‚น
239
 
240
+ # ใƒใƒƒใƒใ‚ตใ‚คใ‚บ (L40S 48GB - ๆ”ปใ‚ใŸ่จญๅฎš)
241
+ per_device_train_batch_size=8,
242
+ per_device_eval_batch_size=8,
243
+ gradient_accumulation_steps=2, # ๆœ‰ๅŠนใƒใƒƒใƒใ‚ตใ‚คใ‚บ: 8*2=16
244
 
245
  # ๅญฆ็ฟ’็އ (1ใ‚จใƒใƒƒใ‚ฏใงๅŽๆŸใ™ใ‚‹ใ‚ˆใ†้ซ˜ใ‚)
246
  learning_rate=2e-4,
 
264
  # ใใฎไป–
265
  report_to="none",
266
  group_by_length=True,
267
+ gradient_checkpointing=False, # L40Sใฏ48GBใ‚ใ‚‹ใฎใงใ‚ชใƒ•ใง้ซ˜้€ŸๅŒ–
268
+ torch_compile=False, # ๅˆๅ›žใ‚ณใƒณใƒ‘ใ‚คใƒซๆ™‚้–“ใ‚’้ฟใ‘ใ‚‹
269
+ dataloader_num_workers=4,
270
+ dataloader_pin_memory=True,
271
 
272
  # ๅ†้–‹็”จ
273
  save_safetensors=True,