Upload 8 files
Browse files- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +83 -3
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 655342981
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae132a4a670d70c2365439b1b32f73578bbe513edd2e0299f01ea1302af81f0c
|
| 3 |
size 655342981
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 333968953
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95508259f48f41c3e02cf1556a1f27bbe48a513f6d862c8a7b3910ae72acee38
|
| 3 |
size 333968953
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13553
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:708c0633fc2806448a36d54172c18b6fbe465f892fb67e669ae170a4b5034cdf
|
| 3 |
size 13553
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 627
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6bf45a7cece26021363c348414b2495d0d387d9868ac402ae5067b6d6efc43b5
|
| 3 |
size 627
|
trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -110,11 +110,91 @@
|
|
| 110 |
"learning_rate": 8.816663171873909e-06,
|
| 111 |
"loss": 1.2822,
|
| 112 |
"step": 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
}
|
| 114 |
],
|
| 115 |
"max_steps": 14307,
|
| 116 |
"num_train_epochs": 3,
|
| 117 |
-
"total_flos":
|
| 118 |
"trial_name": null,
|
| 119 |
"trial_params": null
|
| 120 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.935625917383099,
|
| 5 |
+
"global_step": 14000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 110 |
"learning_rate": 8.816663171873909e-06,
|
| 111 |
"loss": 1.2822,
|
| 112 |
"step": 8000
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"epoch": 1.78,
|
| 116 |
+
"learning_rate": 8.117704620116029e-06,
|
| 117 |
+
"loss": 1.2721,
|
| 118 |
+
"step": 8500
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
"epoch": 1.89,
|
| 122 |
+
"learning_rate": 7.418746068358147e-06,
|
| 123 |
+
"loss": 1.2596,
|
| 124 |
+
"step": 9000
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"epoch": 1.99,
|
| 128 |
+
"learning_rate": 6.719787516600266e-06,
|
| 129 |
+
"loss": 1.2635,
|
| 130 |
+
"step": 9500
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"epoch": 2.0,
|
| 134 |
+
"eval_loss": 1.3475877046585083,
|
| 135 |
+
"eval_runtime": 2879.1327,
|
| 136 |
+
"eval_samples_per_second": 12.22,
|
| 137 |
+
"eval_steps_per_second": 1.528,
|
| 138 |
+
"step": 9538
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"epoch": 2.1,
|
| 142 |
+
"learning_rate": 6.020828964842386e-06,
|
| 143 |
+
"loss": 1.2592,
|
| 144 |
+
"step": 10000
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"epoch": 2.2,
|
| 148 |
+
"learning_rate": 5.321870413084504e-06,
|
| 149 |
+
"loss": 1.2459,
|
| 150 |
+
"step": 10500
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 2.31,
|
| 154 |
+
"learning_rate": 4.6229118613266235e-06,
|
| 155 |
+
"loss": 1.2477,
|
| 156 |
+
"step": 11000
|
| 157 |
+
},
|
| 158 |
+
{
|
| 159 |
+
"epoch": 2.41,
|
| 160 |
+
"learning_rate": 3.923953309568743e-06,
|
| 161 |
+
"loss": 1.249,
|
| 162 |
+
"step": 11500
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"epoch": 2.52,
|
| 166 |
+
"learning_rate": 3.224994757810862e-06,
|
| 167 |
+
"loss": 1.2376,
|
| 168 |
+
"step": 12000
|
| 169 |
+
},
|
| 170 |
+
{
|
| 171 |
+
"epoch": 2.62,
|
| 172 |
+
"learning_rate": 2.5260362060529814e-06,
|
| 173 |
+
"loss": 1.238,
|
| 174 |
+
"step": 12500
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"epoch": 2.73,
|
| 178 |
+
"learning_rate": 1.8270776542951004e-06,
|
| 179 |
+
"loss": 1.2409,
|
| 180 |
+
"step": 13000
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"epoch": 2.83,
|
| 184 |
+
"learning_rate": 1.1281191025372195e-06,
|
| 185 |
+
"loss": 1.2365,
|
| 186 |
+
"step": 13500
|
| 187 |
+
},
|
| 188 |
+
{
|
| 189 |
+
"epoch": 2.94,
|
| 190 |
+
"learning_rate": 4.291605507793388e-07,
|
| 191 |
+
"loss": 1.237,
|
| 192 |
+
"step": 14000
|
| 193 |
}
|
| 194 |
],
|
| 195 |
"max_steps": 14307,
|
| 196 |
"num_train_epochs": 3,
|
| 197 |
+
"total_flos": 3657827887349760.0,
|
| 198 |
"trial_name": null,
|
| 199 |
"trial_params": null
|
| 200 |
}
|