kavanmevada commited on
Commit
52526ff
·
verified ·
1 Parent(s): c37d61a

Training in progress, step 320, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b7b638956b5aefb96216cebb0cc4e2f5ca1f4b0731b8919df07c83dde33eab2
3
  size 936503576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2e4c22cf9e06580af30dce4f279974ede0ee3634a0dd139bd26cb4e25b25ed7
3
  size 936503576
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43017fbec9c8da66ae23541f07595a7ae4286d452825f32083f5e398be89f0ed
3
  size 936544523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7344bdd1e274ca01246f02556985f7a2cd03b4f3e5340ec3a06f3c587c4caa39
3
  size 936544523
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:949550c402a32533c4218565004de685bebcce6068fb43f8dd1d8fe10cec0d5b
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d97c0d3c8cfa82dd1ce5510efad605477e606178221dbf394aa018e5e13a0c32
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.001378087081767241,
6
  "eval_steps": 500,
7
- "global_step": 310,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2178,6 +2178,76 @@
2178
  "learning_rate": 1.9999998545088225e-05,
2179
  "loss": 4.3453,
2180
  "step": 310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2181
  }
2182
  ],
2183
  "logging_steps": 1,
@@ -2197,7 +2267,7 @@
2197
  "attributes": {}
2198
  }
2199
  },
2200
- "total_flos": 1.22720788414464e+16,
2201
  "train_batch_size": 1,
2202
  "trial_name": null,
2203
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0014225415037597328,
6
  "eval_steps": 500,
7
+ "global_step": 320,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2178
  "learning_rate": 1.9999998545088225e-05,
2179
  "loss": 4.3453,
2180
  "step": 310
2181
+ },
2182
+ {
2183
+ "epoch": 0.0013825325239664903,
2184
+ "grad_norm": 13.375,
2185
+ "learning_rate": 1.999999853565608e-05,
2186
+ "loss": 4.3769,
2187
+ "step": 311
2188
+ },
2189
+ {
2190
+ "epoch": 0.0013869779661657394,
2191
+ "grad_norm": 12.75,
2192
+ "learning_rate": 1.999999852619346e-05,
2193
+ "loss": 4.2999,
2194
+ "step": 312
2195
+ },
2196
+ {
2197
+ "epoch": 0.0013914234083649887,
2198
+ "grad_norm": 13.1875,
2199
+ "learning_rate": 1.9999998516700373e-05,
2200
+ "loss": 4.4593,
2201
+ "step": 313
2202
+ },
2203
+ {
2204
+ "epoch": 0.0013958688505642377,
2205
+ "grad_norm": 12.75,
2206
+ "learning_rate": 1.9999998507176803e-05,
2207
+ "loss": 4.4465,
2208
+ "step": 314
2209
+ },
2210
+ {
2211
+ "epoch": 0.001400314292763487,
2212
+ "grad_norm": 12.5625,
2213
+ "learning_rate": 1.9999998497622755e-05,
2214
+ "loss": 4.301,
2215
+ "step": 315
2216
+ },
2217
+ {
2218
+ "epoch": 0.001404759734962736,
2219
+ "grad_norm": 11.625,
2220
+ "learning_rate": 1.999999848803824e-05,
2221
+ "loss": 4.7077,
2222
+ "step": 316
2223
+ },
2224
+ {
2225
+ "epoch": 0.0014092051771619853,
2226
+ "grad_norm": 10.75,
2227
+ "learning_rate": 1.9999998478423243e-05,
2228
+ "loss": 4.3979,
2229
+ "step": 317
2230
+ },
2231
+ {
2232
+ "epoch": 0.0014136506193612344,
2233
+ "grad_norm": 8.1875,
2234
+ "learning_rate": 1.9999998468777773e-05,
2235
+ "loss": 4.4527,
2236
+ "step": 318
2237
+ },
2238
+ {
2239
+ "epoch": 0.0014180960615604835,
2240
+ "grad_norm": 12.5,
2241
+ "learning_rate": 1.9999998459101828e-05,
2242
+ "loss": 4.3529,
2243
+ "step": 319
2244
+ },
2245
+ {
2246
+ "epoch": 0.0014225415037597328,
2247
+ "grad_norm": 13.6875,
2248
+ "learning_rate": 1.9999998449395407e-05,
2249
+ "loss": 4.2385,
2250
+ "step": 320
2251
  }
2252
  ],
2253
  "logging_steps": 1,
 
2267
  "attributes": {}
2268
  }
2269
  },
2270
+ "total_flos": 1.26679523524608e+16,
2271
  "train_batch_size": 1,
2272
  "trial_name": null,
2273
  "trial_params": null