Update README.md
Browse files
README.md
CHANGED
|
@@ -68,7 +68,7 @@ The model is trained of 8 A100 80GB for approximately 15hrs.
|
|
| 68 |
| per_device_train_batch_size | 2 |
|
| 69 |
| gradient_accumulation_steps | 1 |
|
| 70 |
| epoch | 3 |
|
| 71 |
-
| steps |
|
| 72 |
| learning_rate | 2e-5 |
|
| 73 |
| lr schedular type | cosine |
|
| 74 |
| warmup ratio | 0.1 |
|
|
|
|
| 68 |
| per_device_train_batch_size | 2 |
|
| 69 |
| gradient_accumulation_steps | 1 |
|
| 70 |
| epoch | 3 |
|
| 71 |
+
| steps | 34503 |
|
| 72 |
| learning_rate | 2e-5 |
|
| 73 |
| lr schedular type | cosine |
|
| 74 |
| warmup ratio | 0.1 |
|