Upload train_code_reasoning.py
Browse files- train_code_reasoning.py +2 -3
train_code_reasoning.py
CHANGED
|
@@ -166,7 +166,7 @@ training_args = SFTConfig(
|
|
| 166 |
per_device_train_batch_size=4,
|
| 167 |
gradient_accumulation_steps=4,
|
| 168 |
learning_rate=5e-5,
|
| 169 |
-
|
| 170 |
lr_scheduler_type="cosine",
|
| 171 |
bf16=True,
|
| 172 |
gradient_checkpointing=True,
|
|
@@ -181,7 +181,6 @@ training_args = SFTConfig(
|
|
| 181 |
report_to=["trackio"],
|
| 182 |
seed=42,
|
| 183 |
hub_strategy="checkpoint",
|
| 184 |
-
hub_always_push=True,
|
| 185 |
)
|
| 186 |
|
| 187 |
print("\nInitializing SFTTrainer...")
|
|
@@ -189,7 +188,7 @@ trainer = SFTTrainer(
|
|
| 189 |
model=MODEL_ID,
|
| 190 |
train_dataset=train_dataset,
|
| 191 |
args=training_args,
|
| 192 |
-
|
| 193 |
max_seq_length=2048,
|
| 194 |
)
|
| 195 |
|
|
|
|
| 166 |
per_device_train_batch_size=4,
|
| 167 |
gradient_accumulation_steps=4,
|
| 168 |
learning_rate=5e-5,
|
| 169 |
+
warmup_steps=300,
|
| 170 |
lr_scheduler_type="cosine",
|
| 171 |
bf16=True,
|
| 172 |
gradient_checkpointing=True,
|
|
|
|
| 181 |
report_to=["trackio"],
|
| 182 |
seed=42,
|
| 183 |
hub_strategy="checkpoint",
|
|
|
|
| 184 |
)
|
| 185 |
|
| 186 |
print("\nInitializing SFTTrainer...")
|
|
|
|
| 188 |
model=MODEL_ID,
|
| 189 |
train_dataset=train_dataset,
|
| 190 |
args=training_args,
|
| 191 |
+
processing_class=tokenizer,
|
| 192 |
max_seq_length=2048,
|
| 193 |
)
|
| 194 |
|