Spaces:
Sleeping
Sleeping
Update train_seallm_khm_sum.py
Browse files- train_seallm_khm_sum.py +4 -5
train_seallm_khm_sum.py
CHANGED
|
@@ -89,7 +89,6 @@ def load_model_and_tokenizer():
|
|
| 89 |
|
| 90 |
return model, tokenizer
|
| 91 |
|
| 92 |
-
|
| 93 |
def main():
|
| 94 |
train_ds, eval_ds = load_khm_dataset()
|
| 95 |
model, tokenizer = load_model_and_tokenizer()
|
|
@@ -102,6 +101,7 @@ def main():
|
|
| 102 |
task_type="CAUSAL_LM",
|
| 103 |
)
|
| 104 |
|
|
|
|
| 105 |
sft_config = SFTConfig(
|
| 106 |
output_dir="seallm-khm-sum-lora",
|
| 107 |
num_train_epochs=2,
|
|
@@ -110,17 +110,16 @@ def main():
|
|
| 110 |
gradient_accumulation_steps=8,
|
| 111 |
learning_rate=2e-4,
|
| 112 |
logging_steps=10,
|
| 113 |
-
|
| 114 |
eval_steps=200,
|
| 115 |
save_steps=200,
|
| 116 |
save_total_limit=2,
|
| 117 |
-
max_seq_length=1024,
|
| 118 |
packing=True,
|
| 119 |
lr_scheduler_type="cosine",
|
| 120 |
warmup_ratio=0.03,
|
| 121 |
bf16=True,
|
| 122 |
gradient_checkpointing=True,
|
| 123 |
-
report_to="none", # or "wandb"
|
| 124 |
)
|
| 125 |
|
| 126 |
trainer = SFTTrainer(
|
|
@@ -131,6 +130,7 @@ def main():
|
|
| 131 |
peft_config=lora_config,
|
| 132 |
args=sft_config,
|
| 133 |
dataset_text_field="text",
|
|
|
|
| 134 |
)
|
| 135 |
|
| 136 |
trainer.train()
|
|
@@ -139,7 +139,6 @@ def main():
|
|
| 139 |
trainer.model.save_pretrained("seallm-khm-sum-lora")
|
| 140 |
tokenizer.save_pretrained("seallm-khm-sum-lora")
|
| 141 |
|
| 142 |
-
# Optionally push directly to the Hub (needs HF_TOKEN env)
|
| 143 |
repo_id = os.environ.get("OUTPUT_REPO_ID", "")
|
| 144 |
if repo_id:
|
| 145 |
trainer.model.push_to_hub(repo_id)
|
|
|
|
| 89 |
|
| 90 |
return model, tokenizer
|
| 91 |
|
|
|
|
| 92 |
def main():
|
| 93 |
train_ds, eval_ds = load_khm_dataset()
|
| 94 |
model, tokenizer = load_model_and_tokenizer()
|
|
|
|
| 101 |
task_type="CAUSAL_LM",
|
| 102 |
)
|
| 103 |
|
| 104 |
+
# NOTE: no max_seq_length here
|
| 105 |
sft_config = SFTConfig(
|
| 106 |
output_dir="seallm-khm-sum-lora",
|
| 107 |
num_train_epochs=2,
|
|
|
|
| 110 |
gradient_accumulation_steps=8,
|
| 111 |
learning_rate=2e-4,
|
| 112 |
logging_steps=10,
|
| 113 |
+
evaluation_strategy="steps", # <- was eval_strategy
|
| 114 |
eval_steps=200,
|
| 115 |
save_steps=200,
|
| 116 |
save_total_limit=2,
|
|
|
|
| 117 |
packing=True,
|
| 118 |
lr_scheduler_type="cosine",
|
| 119 |
warmup_ratio=0.03,
|
| 120 |
bf16=True,
|
| 121 |
gradient_checkpointing=True,
|
| 122 |
+
report_to="none", # or "wandb"
|
| 123 |
)
|
| 124 |
|
| 125 |
trainer = SFTTrainer(
|
|
|
|
| 130 |
peft_config=lora_config,
|
| 131 |
args=sft_config,
|
| 132 |
dataset_text_field="text",
|
| 133 |
+
max_seq_length=1024, # <- moved here
|
| 134 |
)
|
| 135 |
|
| 136 |
trainer.train()
|
|
|
|
| 139 |
trainer.model.save_pretrained("seallm-khm-sum-lora")
|
| 140 |
tokenizer.save_pretrained("seallm-khm-sum-lora")
|
| 141 |
|
|
|
|
| 142 |
repo_id = os.environ.get("OUTPUT_REPO_ID", "")
|
| 143 |
if repo_id:
|
| 144 |
trainer.model.push_to_hub(repo_id)
|