lightita commited on
Commit
3628aa5
·
verified ·
1 Parent(s): 2096c4f

Update train_seallm_khm_sum.py

Browse files
Files changed (1) hide show
  1. train_seallm_khm_sum.py +4 -5
train_seallm_khm_sum.py CHANGED
@@ -89,7 +89,6 @@ def load_model_and_tokenizer():
89
 
90
  return model, tokenizer
91
 
92
-
93
  def main():
94
  train_ds, eval_ds = load_khm_dataset()
95
  model, tokenizer = load_model_and_tokenizer()
@@ -102,6 +101,7 @@ def main():
102
  task_type="CAUSAL_LM",
103
  )
104
 
 
105
  sft_config = SFTConfig(
106
  output_dir="seallm-khm-sum-lora",
107
  num_train_epochs=2,
@@ -110,17 +110,16 @@ def main():
110
  gradient_accumulation_steps=8,
111
  learning_rate=2e-4,
112
  logging_steps=10,
113
- eval_strategy="steps",
114
  eval_steps=200,
115
  save_steps=200,
116
  save_total_limit=2,
117
- max_seq_length=1024,
118
  packing=True,
119
  lr_scheduler_type="cosine",
120
  warmup_ratio=0.03,
121
  bf16=True,
122
  gradient_checkpointing=True,
123
- report_to="none", # or "wandb" etc.
124
  )
125
 
126
  trainer = SFTTrainer(
@@ -131,6 +130,7 @@ def main():
131
  peft_config=lora_config,
132
  args=sft_config,
133
  dataset_text_field="text",
 
134
  )
135
 
136
  trainer.train()
@@ -139,7 +139,6 @@ def main():
139
  trainer.model.save_pretrained("seallm-khm-sum-lora")
140
  tokenizer.save_pretrained("seallm-khm-sum-lora")
141
 
142
- # Optionally push directly to the Hub (needs HF_TOKEN env)
143
  repo_id = os.environ.get("OUTPUT_REPO_ID", "")
144
  if repo_id:
145
  trainer.model.push_to_hub(repo_id)
 
89
 
90
  return model, tokenizer
91
 
 
92
  def main():
93
  train_ds, eval_ds = load_khm_dataset()
94
  model, tokenizer = load_model_and_tokenizer()
 
101
  task_type="CAUSAL_LM",
102
  )
103
 
104
+ # NOTE: no max_seq_length here
105
  sft_config = SFTConfig(
106
  output_dir="seallm-khm-sum-lora",
107
  num_train_epochs=2,
 
110
  gradient_accumulation_steps=8,
111
  learning_rate=2e-4,
112
  logging_steps=10,
113
+ evaluation_strategy="steps", # <- was eval_strategy
114
  eval_steps=200,
115
  save_steps=200,
116
  save_total_limit=2,
 
117
  packing=True,
118
  lr_scheduler_type="cosine",
119
  warmup_ratio=0.03,
120
  bf16=True,
121
  gradient_checkpointing=True,
122
+ report_to="none", # or "wandb"
123
  )
124
 
125
  trainer = SFTTrainer(
 
130
  peft_config=lora_config,
131
  args=sft_config,
132
  dataset_text_field="text",
133
+ max_seq_length=1024, # <- moved here
134
  )
135
 
136
  trainer.train()
 
139
  trainer.model.save_pretrained("seallm-khm-sum-lora")
140
  tokenizer.save_pretrained("seallm-khm-sum-lora")
141
 
 
142
  repo_id = os.environ.get("OUTPUT_REPO_ID", "")
143
  if repo_id:
144
  trainer.model.push_to_hub(repo_id)