FormlessAI commited on
Commit
d7f7508
·
verified ·
1 Parent(s): 03e0d04

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ce397cee94d09c474cc9f566d5d750670c5bafdf025578fa7f6350febb307b3
3
  size 17640808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7c3adf6b848fd512d3ccbee07701d5b1281d14b5b6d954bd60bfedcb5226d84
3
  size 17640808
last-checkpoint/global_step600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee1b6c5e53b8c5c952aae054e001356d25146b40ed70ecd2f2b52bdd68ace937
3
+ size 26406896
last-checkpoint/global_step600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecb36a516a23aecebfe651a1c5cca918dca170c832d6bdc7a9702ef78855dfe0
3
+ size 26406896
last-checkpoint/global_step600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeebdb273713699af0a6f32430557fd46218f8d966809f2fa13c0b5dbca8034d
3
+ size 26406960
last-checkpoint/global_step600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c061daf01fde60488ccad6122d732edaabdff60e939c748c1b78728efd62a9c
3
+ size 26406960
last-checkpoint/global_step600/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95579a409da3ae7895a20eacd13ac4674232acbda1c300178e4e0668ff2d2b85
3
+ size 290123128
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step580
 
1
+ global_step600
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abfb47c9726fa35d508e54a3049a1757fb6e0ff26b3934ff47b66dd662a82d31
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1da147ba0da6ab5b60b2efc85755930d6b407244bb1369739c55160abe21812
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a504e3d11123830fb9a64b49b55ed317539ee5965b043766551954548bbed111
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:363d8b619021abcc488299c842ba022316c3dbc94bf7d41b4ef7eefc2f0a01b3
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4df122f3d29d0cc7cf083a996bb27cc89af3278c4ec424ddbeabc1f5a0b14723
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d941f67a913ea9f7baeb6478f49eea1e9cbea9622455f36d8025f2d80f5056d
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a073049aba3ed5321f7ff1d8d11ce04afc9315843d01b82a3475c4e7d90085c3
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e08479df3147ab731a7c132ca6cbbd2dd0a0df3e66b9c6d7a3372c4ee408deff
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87127e7b4de7b8cc43b1d112e5d5285b7668bbc7782ef3227ec6ffe19962953a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aab5f0cd062a08b47003f91a4fb1c05b55ff0a59f0c4434093fc45dcb54d07c1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 580,
3
  "best_metric": 1.5717545747756958,
4
  "best_model_checkpoint": "miner_id_24/checkpoint-580",
5
- "epoch": 0.8123249299719888,
6
  "eval_steps": 10,
7
- "global_step": 580,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1292,6 +1292,50 @@
1292
  "eval_samples_per_second": 23.941,
1293
  "eval_steps_per_second": 1.5,
1294
  "step": 580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1295
  }
1296
  ],
1297
  "logging_steps": 5,
@@ -1306,7 +1350,7 @@
1306
  "early_stopping_threshold": 0.0
1307
  },
1308
  "attributes": {
1309
- "early_stopping_patience_counter": 0
1310
  }
1311
  },
1312
  "TrainerControl": {
@@ -1320,7 +1364,7 @@
1320
  "attributes": {}
1321
  }
1322
  },
1323
- "total_flos": 6.690455755969004e+17,
1324
  "train_batch_size": 4,
1325
  "trial_name": null,
1326
  "trial_params": null
 
2
  "best_global_step": 580,
3
  "best_metric": 1.5717545747756958,
4
  "best_model_checkpoint": "miner_id_24/checkpoint-580",
5
+ "epoch": 0.8403361344537815,
6
  "eval_steps": 10,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1292
  "eval_samples_per_second": 23.941,
1293
  "eval_steps_per_second": 1.5,
1294
  "step": 580
1295
+ },
1296
+ {
1297
+ "epoch": 0.819327731092437,
1298
+ "grad_norm": 0.765592098236084,
1299
+ "learning_rate": 1.6821615206495312e-05,
1300
+ "loss": 1.6214,
1301
+ "step": 585
1302
+ },
1303
+ {
1304
+ "epoch": 0.8263305322128851,
1305
+ "grad_norm": 0.26741182804107666,
1306
+ "learning_rate": 1.558661759140786e-05,
1307
+ "loss": 1.5273,
1308
+ "step": 590
1309
+ },
1310
+ {
1311
+ "epoch": 0.8263305322128851,
1312
+ "eval_loss": 1.5749902725219727,
1313
+ "eval_runtime": 211.3039,
1314
+ "eval_samples_per_second": 24.022,
1315
+ "eval_steps_per_second": 1.505,
1316
+ "step": 590
1317
+ },
1318
+ {
1319
+ "epoch": 0.8333333333333334,
1320
+ "grad_norm": 0.508738100528717,
1321
+ "learning_rate": 1.439486275322357e-05,
1322
+ "loss": 1.6532,
1323
+ "step": 595
1324
+ },
1325
+ {
1326
+ "epoch": 0.8403361344537815,
1327
+ "grad_norm": 1.1134337186813354,
1328
+ "learning_rate": 1.324696119690173e-05,
1329
+ "loss": 1.4611,
1330
+ "step": 600
1331
+ },
1332
+ {
1333
+ "epoch": 0.8403361344537815,
1334
+ "eval_loss": 1.5721418857574463,
1335
+ "eval_runtime": 215.1945,
1336
+ "eval_samples_per_second": 23.588,
1337
+ "eval_steps_per_second": 1.478,
1338
+ "step": 600
1339
  }
1340
  ],
1341
  "logging_steps": 5,
 
1350
  "early_stopping_threshold": 0.0
1351
  },
1352
  "attributes": {
1353
+ "early_stopping_patience_counter": 2
1354
  }
1355
  },
1356
  "TrainerControl": {
 
1364
  "attributes": {}
1365
  }
1366
  },
1367
+ "total_flos": 6.921161126864486e+17,
1368
  "train_batch_size": 4,
1369
  "trial_name": null,
1370
  "trial_params": null