FormlessAI commited on
Commit
ad495a4
·
verified ·
1 Parent(s): 31c02fd

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:099138994a05956d4ad7412e1c77594651af3a2aa230c7317bfb36b46e773219
3
  size 1037269336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b95cf130a9e8c2b019047ed038b74f166ebdae3e1d5a0e0eb651f33fdf9a7770
3
  size 1037269336
last-checkpoint/global_step1500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:728811bbb44140a3c434f5799f099f9947679be96be1722175a2c24ebc677041
3
+ size 781993445
last-checkpoint/global_step1500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0053512448d4ac6dd6037428defc2269837081c8263d66d2c308da0f52337d60
3
+ size 781993509
last-checkpoint/global_step1500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb8876694d6b40962f21a720e4894ca83a89ee0e54a4449e965905838ac8ab22
3
+ size 781993509
last-checkpoint/global_step1500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85bd19644c9ae2b1df089d742f347f4e3a3cfc377d367040b4a0399603a45fac
3
+ size 781993509
last-checkpoint/global_step1500/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2735aff831ad940fdebb21ff0d443ef332fc7e47361f0f4d91efc65a30a40d99
3
+ size 2610290277
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1400
 
1
+ global_step1500
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0659f7df65a8d4f022538c1db1324bb83c98939fba11457f135a834e4fc8b08d
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd5ed3dd7928bb74313c77ea5d320ce985054e6ddc8aa61e9429e0f6fdf59f6
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0774f2829e7ea47ef6e28c2b3b1640314596ceb8b0712423ec369fe44281c840
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c3291b1f2a97791de822e1303030c2812ebeebcb659d6850151ee68cfc065c9
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88f44ee23c1ea07ac60d274eaef197906028fd3d21288357504503497316897e
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b6a1d8c7626da137935d79389edc741f3d1059207b7bd90ce0a39bac3644369
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a15b31e0a15dc06b8a90dc969f3b213dbad5faacc1b3e26d1dd6b1716d9b3394
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f2aa27db34f72f5e0bd7731f8e4c1eb212b5ae960a3340dacb77565bf4ad72c
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1937275837d80b853c7dc3d5d6eec94618d6af1bb9c3bc4f9035a475fc209b5a
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbb5c756114849394828f515dee4b4c411b817cb6ec10f55e462bef9f88fa70a
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 2.156383514404297,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.20351795319087077,
6
  "eval_steps": 50,
7
- "global_step": 1400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2192,6 +2192,162 @@
2192
  "eval_samples_per_second": 175.883,
2193
  "eval_steps_per_second": 11.029,
2194
  "step": 1400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2195
  }
2196
  ],
2197
  "logging_steps": 5,
@@ -2220,7 +2376,7 @@
2220
  "attributes": {}
2221
  }
2222
  },
2223
- "total_flos": 3.654657124471931e+17,
2224
  "train_batch_size": 4,
2225
  "trial_name": null,
2226
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 2.1434316635131836,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.21805494984736154,
6
  "eval_steps": 50,
7
+ "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2192
  "eval_samples_per_second": 175.883,
2193
  "eval_steps_per_second": 11.029,
2194
  "step": 1400
2195
+ },
2196
+ {
2197
+ "epoch": 0.2042448030236953,
2198
+ "grad_norm": 2.1165425777435303,
2199
+ "learning_rate": 9.596837315829758e-05,
2200
+ "loss": 2.1683,
2201
+ "step": 1405
2202
+ },
2203
+ {
2204
+ "epoch": 0.20497165285651983,
2205
+ "grad_norm": 2.612478256225586,
2206
+ "learning_rate": 9.59363533678238e-05,
2207
+ "loss": 2.2159,
2208
+ "step": 1410
2209
+ },
2210
+ {
2211
+ "epoch": 0.2056985026893444,
2212
+ "grad_norm": 2.3951268196105957,
2213
+ "learning_rate": 9.590421818027541e-05,
2214
+ "loss": 2.2366,
2215
+ "step": 1415
2216
+ },
2217
+ {
2218
+ "epoch": 0.20642535252216893,
2219
+ "grad_norm": 2.632112503051758,
2220
+ "learning_rate": 9.587196767655274e-05,
2221
+ "loss": 2.292,
2222
+ "step": 1420
2223
+ },
2224
+ {
2225
+ "epoch": 0.20715220235499346,
2226
+ "grad_norm": 2.6061480045318604,
2227
+ "learning_rate": 9.583960193784638e-05,
2228
+ "loss": 2.1233,
2229
+ "step": 1425
2230
+ },
2231
+ {
2232
+ "epoch": 0.207879052187818,
2233
+ "grad_norm": 2.3458104133605957,
2234
+ "learning_rate": 9.580712104563704e-05,
2235
+ "loss": 2.2607,
2236
+ "step": 1430
2237
+ },
2238
+ {
2239
+ "epoch": 0.20860590202064253,
2240
+ "grad_norm": 2.6679835319519043,
2241
+ "learning_rate": 9.577452508169532e-05,
2242
+ "loss": 2.1165,
2243
+ "step": 1435
2244
+ },
2245
+ {
2246
+ "epoch": 0.2093327518534671,
2247
+ "grad_norm": 2.376077651977539,
2248
+ "learning_rate": 9.574181412808155e-05,
2249
+ "loss": 1.9408,
2250
+ "step": 1440
2251
+ },
2252
+ {
2253
+ "epoch": 0.21005960168629162,
2254
+ "grad_norm": 2.2623307704925537,
2255
+ "learning_rate": 9.570898826714549e-05,
2256
+ "loss": 2.285,
2257
+ "step": 1445
2258
+ },
2259
+ {
2260
+ "epoch": 0.21078645151911615,
2261
+ "grad_norm": 2.7653067111968994,
2262
+ "learning_rate": 9.56760475815262e-05,
2263
+ "loss": 2.2413,
2264
+ "step": 1450
2265
+ },
2266
+ {
2267
+ "epoch": 0.21078645151911615,
2268
+ "eval_loss": 2.1597976684570312,
2269
+ "eval_runtime": 19.7596,
2270
+ "eval_samples_per_second": 167.058,
2271
+ "eval_steps_per_second": 10.476,
2272
+ "step": 1450
2273
+ },
2274
+ {
2275
+ "epoch": 0.21151330135194069,
2276
+ "grad_norm": 2.770691156387329,
2277
+ "learning_rate": 9.564299215415182e-05,
2278
+ "loss": 2.2649,
2279
+ "step": 1455
2280
+ },
2281
+ {
2282
+ "epoch": 0.21224015118476522,
2283
+ "grad_norm": 2.748636484146118,
2284
+ "learning_rate": 9.560982206823937e-05,
2285
+ "loss": 2.3174,
2286
+ "step": 1460
2287
+ },
2288
+ {
2289
+ "epoch": 0.21296700101758975,
2290
+ "grad_norm": 2.9117774963378906,
2291
+ "learning_rate": 9.557653740729448e-05,
2292
+ "loss": 2.2844,
2293
+ "step": 1465
2294
+ },
2295
+ {
2296
+ "epoch": 0.2136938508504143,
2297
+ "grad_norm": 2.2464821338653564,
2298
+ "learning_rate": 9.554313825511126e-05,
2299
+ "loss": 2.1766,
2300
+ "step": 1470
2301
+ },
2302
+ {
2303
+ "epoch": 0.21442070068323885,
2304
+ "grad_norm": 2.3479864597320557,
2305
+ "learning_rate": 9.550962469577204e-05,
2306
+ "loss": 2.1467,
2307
+ "step": 1475
2308
+ },
2309
+ {
2310
+ "epoch": 0.21514755051606338,
2311
+ "grad_norm": 2.5859127044677734,
2312
+ "learning_rate": 9.547599681364714e-05,
2313
+ "loss": 2.2705,
2314
+ "step": 1480
2315
+ },
2316
+ {
2317
+ "epoch": 0.2158744003488879,
2318
+ "grad_norm": 2.3954455852508545,
2319
+ "learning_rate": 9.544225469339472e-05,
2320
+ "loss": 2.1185,
2321
+ "step": 1485
2322
+ },
2323
+ {
2324
+ "epoch": 0.21660125018171245,
2325
+ "grad_norm": 2.358924388885498,
2326
+ "learning_rate": 9.540839841996055e-05,
2327
+ "loss": 2.2151,
2328
+ "step": 1490
2329
+ },
2330
+ {
2331
+ "epoch": 0.217328100014537,
2332
+ "grad_norm": 2.672982931137085,
2333
+ "learning_rate": 9.537442807857774e-05,
2334
+ "loss": 2.1513,
2335
+ "step": 1495
2336
+ },
2337
+ {
2338
+ "epoch": 0.21805494984736154,
2339
+ "grad_norm": 2.6655712127685547,
2340
+ "learning_rate": 9.534034375476659e-05,
2341
+ "loss": 2.262,
2342
+ "step": 1500
2343
+ },
2344
+ {
2345
+ "epoch": 0.21805494984736154,
2346
+ "eval_loss": 2.1434316635131836,
2347
+ "eval_runtime": 19.0603,
2348
+ "eval_samples_per_second": 173.187,
2349
+ "eval_steps_per_second": 10.86,
2350
+ "step": 1500
2351
  }
2352
  ],
2353
  "logging_steps": 5,
 
2376
  "attributes": {}
2377
  }
2378
  },
2379
+ "total_flos": 3.907643632768778e+17,
2380
  "train_batch_size": 4,
2381
  "trial_name": null,
2382
  "trial_params": null