kavanmevada commited on
Commit
e09bbfc
·
verified ·
1 Parent(s): 17cc06b

Training in progress, step 330, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2e4c22cf9e06580af30dce4f279974ede0ee3634a0dd139bd26cb4e25b25ed7
3
  size 936503576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abc4c68f91f44c0bb1e9b3e7b76a52ad4e9ad5225330739244f161460684cdba
3
  size 936503576
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7344bdd1e274ca01246f02556985f7a2cd03b4f3e5340ec3a06f3c587c4caa39
3
  size 936544523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:142a1a99ba0620bdcfcc4c55495012f4704ec0a05f7b9a8582d625e5b6f01518
3
  size 936544523
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d97c0d3c8cfa82dd1ce5510efad605477e606178221dbf394aa018e5e13a0c32
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cba2e79d569d575b26dc3bead628a624c8d773702ed84eab62f3bad875bc1769
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0014225415037597328,
6
  "eval_steps": 500,
7
- "global_step": 320,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2248,6 +2248,76 @@
2248
  "learning_rate": 1.9999998449395407e-05,
2249
  "loss": 4.2385,
2250
  "step": 320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2251
  }
2252
  ],
2253
  "logging_steps": 1,
@@ -2267,7 +2337,7 @@
2267
  "attributes": {}
2268
  }
2269
  },
2270
- "total_flos": 1.26679523524608e+16,
2271
  "train_batch_size": 1,
2272
  "trial_name": null,
2273
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0014669959257522245,
6
  "eval_steps": 500,
7
+ "global_step": 330,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2248
  "learning_rate": 1.9999998449395407e-05,
2249
  "loss": 4.2385,
2250
  "step": 320
2251
+ },
2252
+ {
2253
+ "epoch": 0.0014269869459589818,
2254
+ "grad_norm": 15.1875,
2255
+ "learning_rate": 1.999999843965851e-05,
2256
+ "loss": 4.104,
2257
+ "step": 321
2258
+ },
2259
+ {
2260
+ "epoch": 0.001431432388158231,
2261
+ "grad_norm": 12.3125,
2262
+ "learning_rate": 1.999999842989114e-05,
2263
+ "loss": 4.3166,
2264
+ "step": 322
2265
+ },
2266
+ {
2267
+ "epoch": 0.0014358778303574802,
2268
+ "grad_norm": 8.5,
2269
+ "learning_rate": 1.9999998420093294e-05,
2270
+ "loss": 4.5187,
2271
+ "step": 323
2272
+ },
2273
+ {
2274
+ "epoch": 0.0014403232725567294,
2275
+ "grad_norm": 9.5,
2276
+ "learning_rate": 1.9999998410264968e-05,
2277
+ "loss": 4.2137,
2278
+ "step": 324
2279
+ },
2280
+ {
2281
+ "epoch": 0.0014447687147559785,
2282
+ "grad_norm": 13.75,
2283
+ "learning_rate": 1.9999998400406172e-05,
2284
+ "loss": 4.2093,
2285
+ "step": 325
2286
+ },
2287
+ {
2288
+ "epoch": 0.0014492141569552278,
2289
+ "grad_norm": 9.8125,
2290
+ "learning_rate": 1.99999983905169e-05,
2291
+ "loss": 4.3445,
2292
+ "step": 326
2293
+ },
2294
+ {
2295
+ "epoch": 0.0014536595991544768,
2296
+ "grad_norm": 13.25,
2297
+ "learning_rate": 1.999999838059715e-05,
2298
+ "loss": 4.0465,
2299
+ "step": 327
2300
+ },
2301
+ {
2302
+ "epoch": 0.0014581050413537261,
2303
+ "grad_norm": 14.0,
2304
+ "learning_rate": 1.9999998370646926e-05,
2305
+ "loss": 4.1375,
2306
+ "step": 328
2307
+ },
2308
+ {
2309
+ "epoch": 0.0014625504835529752,
2310
+ "grad_norm": 9.25,
2311
+ "learning_rate": 1.9999998360666225e-05,
2312
+ "loss": 4.3918,
2313
+ "step": 329
2314
+ },
2315
+ {
2316
+ "epoch": 0.0014669959257522245,
2317
+ "grad_norm": 12.25,
2318
+ "learning_rate": 1.999999835065505e-05,
2319
+ "loss": 4.0845,
2320
+ "step": 330
2321
  }
2322
  ],
2323
  "logging_steps": 1,
 
2337
  "attributes": {}
2338
  }
2339
  },
2340
+ "total_flos": 1.30638258634752e+16,
2341
  "train_batch_size": 1,
2342
  "trial_name": null,
2343
  "trial_params": null