Update README.md
Browse files
README.md
CHANGED
|
@@ -213,7 +213,8 @@ See [here](https://github.com/daniel-furman/sft-demos/blob/main/src/sft/mistral/
|
|
| 213 |
|
| 214 |
The following `TrainingArguments` config was used:
|
| 215 |
|
| 216 |
-
-
|
|
|
|
| 217 |
- auto_find_batch_size = True
|
| 218 |
- gradient_accumulation_steps = 1
|
| 219 |
- optim = "paged_adamw_32bit"
|
|
@@ -223,6 +224,8 @@ The following `TrainingArguments` config was used:
|
|
| 223 |
- warmup_ratio = 0.03
|
| 224 |
- logging_strategy = "steps"
|
| 225 |
- logging_steps = 25
|
|
|
|
|
|
|
| 226 |
- bf16 = True
|
| 227 |
|
| 228 |
The following `bitsandbytes` quantization config was used:
|
|
|
|
| 213 |
|
| 214 |
The following `TrainingArguments` config was used:
|
| 215 |
|
| 216 |
+
- output_dir = "./results"
|
| 217 |
+
- num_train_epochs = 3
|
| 218 |
- auto_find_batch_size = True
|
| 219 |
- gradient_accumulation_steps = 1
|
| 220 |
- optim = "paged_adamw_32bit"
|
|
|
|
| 224 |
- warmup_ratio = 0.03
|
| 225 |
- logging_strategy = "steps"
|
| 226 |
- logging_steps = 25
|
| 227 |
+
- evaluation_strategy = "epoch"
|
| 228 |
+
- prediction_loss_only = True
|
| 229 |
- bf16 = True
|
| 230 |
|
| 231 |
The following `bitsandbytes` quantization config was used:
|