Spaces:
Runtime error
Runtime error
Hajime MATSUMOTO
commited on
Commit
·
c179929
1
Parent(s):
29f5c51
Speed optimization: 1 epoch, larger batch, shorter seq, no packing
Browse files
train.py
CHANGED
|
@@ -234,16 +234,16 @@ training_args = TrainingArguments(
|
|
| 234 |
output_dir=CHECKPOINT_DIR,
|
| 235 |
|
| 236 |
# エポック・ステップ
|
| 237 |
-
num_train_epochs=
|
| 238 |
max_steps=-1, # -1 = エポックベース
|
| 239 |
|
| 240 |
-
# バッチサイズ (
|
| 241 |
-
per_device_train_batch_size=
|
| 242 |
-
per_device_eval_batch_size=
|
| 243 |
-
gradient_accumulation_steps=
|
| 244 |
|
| 245 |
-
# 学習率
|
| 246 |
-
learning_rate=
|
| 247 |
weight_decay=0.01,
|
| 248 |
warmup_ratio=0.03,
|
| 249 |
lr_scheduler_type="cosine",
|
|
@@ -332,8 +332,8 @@ def main():
|
|
| 332 |
args=training_args,
|
| 333 |
peft_config=lora_config,
|
| 334 |
tokenizer=tokenizer,
|
| 335 |
-
max_seq_length=
|
| 336 |
-
packing=
|
| 337 |
dataset_text_field="text",
|
| 338 |
callbacks=[VerboseLoggingCallback()],
|
| 339 |
)
|
|
|
|
| 234 |
output_dir=CHECKPOINT_DIR,
|
| 235 |
|
| 236 |
# エポック・ステップ
|
| 237 |
+
num_train_epochs=1,
|
| 238 |
max_steps=-1, # -1 = エポックベース
|
| 239 |
|
| 240 |
+
# バッチサイズ (高速化設定)
|
| 241 |
+
per_device_train_batch_size=4,
|
| 242 |
+
per_device_eval_batch_size=4,
|
| 243 |
+
gradient_accumulation_steps=4, # 有効バッチサイズ: 4*4=16
|
| 244 |
|
| 245 |
+
# 学習率 (1エポックで収束するよう高め)
|
| 246 |
+
learning_rate=2e-4,
|
| 247 |
weight_decay=0.01,
|
| 248 |
warmup_ratio=0.03,
|
| 249 |
lr_scheduler_type="cosine",
|
|
|
|
| 332 |
args=training_args,
|
| 333 |
peft_config=lora_config,
|
| 334 |
tokenizer=tokenizer,
|
| 335 |
+
max_seq_length=1024, # 高速化のため短縮
|
| 336 |
+
packing=False, # flash attention不要で高速化
|
| 337 |
dataset_text_field="text",
|
| 338 |
callbacks=[VerboseLoggingCallback()],
|
| 339 |
)
|