Update README.md
Browse files
README.md
CHANGED
|
@@ -257,7 +257,32 @@ Or you can try out Gemma-T4 (Thanks to Sleepdeprived) : https://huggingface.co/s
|
|
| 257 |
<details>
|
| 258 |
<summary>SFT Trainer Config</summary>
|
| 259 |
<pre><code>
|
| 260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
</code></pre>
|
| 262 |
</details>
|
| 263 |
</div>
|
|
|
|
| 257 |
<details>
|
| 258 |
<summary>SFT Trainer Config</summary>
|
| 259 |
<pre><code>
|
| 260 |
+
trainer = SFTTrainer(
|
| 261 |
+
model=model,
|
| 262 |
+
tokenizer=tokenizer,
|
| 263 |
+
train_dataset=dataset,
|
| 264 |
+
eval_dataset=None,
|
| 265 |
+
args=SFTConfig(
|
| 266 |
+
dataset_text_field="text",
|
| 267 |
+
per_device_train_batch_size=1,
|
| 268 |
+
gradient_accumulation_steps=4,
|
| 269 |
+
warmup_steps=50,
|
| 270 |
+
num_train_epochs=1,
|
| 271 |
+
learning_rate=1e-4,
|
| 272 |
+
max_grad_norm=0.2,
|
| 273 |
+
logging_steps=1,
|
| 274 |
+
optim="paged_adamw_8bit",
|
| 275 |
+
weight_decay=0.01,
|
| 276 |
+
lr_scheduler_type="cosine",
|
| 277 |
+
seed=3407,
|
| 278 |
+
report_to="wandb",
|
| 279 |
+
output_dir = "outputs",
|
| 280 |
+
save_strategy = "steps",
|
| 281 |
+
save_steps = 500,
|
| 282 |
+
adam_beta1=0.92,
|
| 283 |
+
adam_beta2=0.999,
|
| 284 |
+
),
|
| 285 |
+
)
|
| 286 |
</code></pre>
|
| 287 |
</details>
|
| 288 |
</div>
|