Spaces:
Sleeping
Sleeping
Update train_seallm_khm_sum.py
Browse files- train_seallm_khm_sum.py +4 -4
train_seallm_khm_sum.py
CHANGED
|
@@ -111,16 +111,16 @@ def main():
|
|
| 111 |
gradient_accumulation_steps=8,
|
| 112 |
learning_rate=2e-4,
|
| 113 |
logging_steps=10,
|
| 114 |
-
evaluation_strategy="steps", # eval every eval_steps
|
| 115 |
-
eval_steps=200,
|
| 116 |
save_steps=200,
|
| 117 |
save_total_limit=2,
|
| 118 |
lr_scheduler_type="cosine",
|
| 119 |
warmup_ratio=0.03,
|
| 120 |
-
|
| 121 |
-
|
|
|
|
| 122 |
)
|
| 123 |
|
|
|
|
| 124 |
trainer = SFTTrainer(
|
| 125 |
model=model,
|
| 126 |
tokenizer=tokenizer,
|
|
|
|
| 111 |
gradient_accumulation_steps=8,
|
| 112 |
learning_rate=2e-4,
|
| 113 |
logging_steps=10,
|
|
|
|
|
|
|
| 114 |
save_steps=200,
|
| 115 |
save_total_limit=2,
|
| 116 |
lr_scheduler_type="cosine",
|
| 117 |
warmup_ratio=0.03,
|
| 118 |
+
# old transformers may not support bf16, so let's be safe:
|
| 119 |
+
fp16=True, # use fp16 instead of bf16
|
| 120 |
+
report_to="none", # if this errors next, we’ll drop it
|
| 121 |
)
|
| 122 |
|
| 123 |
+
|
| 124 |
trainer = SFTTrainer(
|
| 125 |
model=model,
|
| 126 |
tokenizer=tokenizer,
|