Update README.md
Browse files
README.md
CHANGED
|
@@ -27,21 +27,21 @@ license: gpl-3.0
|
|
| 27 |
|
| 28 |
| | Value |
|
| 29 |
| :------------: | :------------------------: |
|
| 30 |
-
| Batch Size |
|
| 31 |
-
| Grad Acc Steps |
|
| 32 |
-
| Max LR |
|
| 33 |
-
| LR Scheduler | Trapezoidal
|
| 34 |
-
| Warmup Ratio |
|
| 35 |
-
| Decay Ratio |
|
| 36 |
| Decay Progress | Exponential |
|
| 37 |
-
| Min Decay LR |
|
| 38 |
| Optimizer | AdamW |
|
| 39 |
-
| Weight Decay |
|
| 40 |
-
| Max Grad Norm |
|
| 41 |
-
| Num Epochs |
|
| 42 |
| FP16 | True |
|
| 43 |
| Device | Tesla-V100-SXM2-32GB |
|
| 44 |
-
| Seed |
|
| 45 |
|
| 46 |
|
| 47 |
|
|
|
|
| 27 |
|
| 28 |
| | Value |
|
| 29 |
| :------------: | :------------------------: |
|
| 30 |
+
| Batch Size | 1024 |
|
| 31 |
+
| Grad Acc Steps | 1 |
|
| 32 |
+
| Max LR | 1.5 * 10^-3 |
|
| 33 |
+
| LR Scheduler | Trapezoidal / Cosine |
|
| 34 |
+
| Warmup Ratio | 0.01 |
|
| 35 |
+
| Decay Ratio | 0.35 |
|
| 36 |
| Decay Progress | Exponential |
|
| 37 |
+
| Min Decay LR | 0.01 \times \text{MaxLR} |
|
| 38 |
| Optimizer | AdamW |
|
| 39 |
+
| Weight Decay | 0.1 |
|
| 40 |
+
| Max Grad Norm | 1.0 |
|
| 41 |
+
| Num Epochs | 1 |
|
| 42 |
| FP16 | True |
|
| 43 |
| Device | Tesla-V100-SXM2-32GB |
|
| 44 |
+
| Seed | 3407 |
|
| 45 |
|
| 46 |
|
| 47 |
|