FormlessAI commited on
Commit
3f93e93
·
verified ·
1 Parent(s): a4938fd

Training in progress, step 430, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:893f5c8856fdc4a2b011a0f925f5f0a5a12489125014c73cd5998f9fc3897b44
3
  size 147770888
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dd61dbd46e4ee82fd33617e4a8411741cee905cc1e9401a102028fdb32c7ead
3
  size 147770888
last-checkpoint/global_step430/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cafbbf4182b398aff7c66d586f1a5026e634425180bfde77a5a8f8acd8d31ba2
3
+ size 443178960
last-checkpoint/global_step430/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bbf517ea4f7493903098ba52e54448f3715eae4e12ed7b79db25b739aa7cabd
3
+ size 443179088
last-checkpoint/global_step430/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e818c7480088688c038a9bc96cd5ab4624f19a8f2b3804db6a40c872605c6fe
3
+ size 614765880
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step420
 
1
+ global_step430
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85b27159412abe6a30485dc519cf403ee3db832c65abe733fbf34a73d6540791
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46a15aba38ae961b6e93b01cbaf204b4dfb1ff8281028ffbbf38cabbe605e87c
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ec1ea968138e2f514b82726a0056326f5069984d539a687bec45fbf72e0ccb1
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68cc2617943b1f6b82b3ab7caec02d1bb0162a82f58d1a50aea62b2e77f20038
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b76ff39d4805387c82a6745b83f0904cec0ca7e9eaedb729cc4bc23d42182fa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e7fcb688e077c6922e71258186f68ed91012a168b39ea778c3c18a2a291a0d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 410,
3
- "best_metric": 1.0409541130065918,
4
- "best_model_checkpoint": "miner_id_24/checkpoint-410",
5
- "epoch": 0.3226425965047052,
6
  "eval_steps": 10,
7
- "global_step": 420,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3292,6 +3292,84 @@
3292
  "eval_samples_per_second": 18.42,
3293
  "eval_steps_per_second": 2.31,
3294
  "step": 420
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3295
  }
3296
  ],
3297
  "logging_steps": 1,
@@ -3306,7 +3384,7 @@
3306
  "early_stopping_threshold": 0.0
3307
  },
3308
  "attributes": {
3309
- "early_stopping_patience_counter": 1
3310
  }
3311
  },
3312
  "TrainerControl": {
@@ -3320,7 +3398,7 @@
3320
  "attributes": {}
3321
  }
3322
  },
3323
- "total_flos": 4.572031475515392e+17,
3324
  "train_batch_size": 4,
3325
  "trial_name": null,
3326
  "trial_params": null
 
1
  {
2
+ "best_global_step": 430,
3
+ "best_metric": 1.0377129316329956,
4
+ "best_model_checkpoint": "miner_id_24/checkpoint-430",
5
+ "epoch": 0.33032456308815056,
6
  "eval_steps": 10,
7
+ "global_step": 430,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3292
  "eval_samples_per_second": 18.42,
3293
  "eval_steps_per_second": 2.31,
3294
  "step": 420
3295
+ },
3296
+ {
3297
+ "epoch": 0.3234107931630497,
3298
+ "grad_norm": 0.3012835383415222,
3299
+ "learning_rate": 1.2845275866310325e-05,
3300
+ "loss": 0.9846,
3301
+ "step": 421
3302
+ },
3303
+ {
3304
+ "epoch": 0.32417898982139426,
3305
+ "grad_norm": 0.3492763042449951,
3306
+ "learning_rate": 1.2814529218688688e-05,
3307
+ "loss": 0.9851,
3308
+ "step": 422
3309
+ },
3310
+ {
3311
+ "epoch": 0.3249471864797388,
3312
+ "grad_norm": 0.45387721061706543,
3313
+ "learning_rate": 1.2783753647424635e-05,
3314
+ "loss": 0.9763,
3315
+ "step": 423
3316
+ },
3317
+ {
3318
+ "epoch": 0.32571538313808335,
3319
+ "grad_norm": 0.4614562392234802,
3320
+ "learning_rate": 1.2752949468784776e-05,
3321
+ "loss": 0.7974,
3322
+ "step": 424
3323
+ },
3324
+ {
3325
+ "epoch": 0.3264835797964279,
3326
+ "grad_norm": 0.6264432072639465,
3327
+ "learning_rate": 1.2722116999329712e-05,
3328
+ "loss": 1.0572,
3329
+ "step": 425
3330
+ },
3331
+ {
3332
+ "epoch": 0.32725177645477244,
3333
+ "grad_norm": 0.09508559107780457,
3334
+ "learning_rate": 1.2691256555910769e-05,
3335
+ "loss": 0.5381,
3336
+ "step": 426
3337
+ },
3338
+ {
3339
+ "epoch": 0.328019973113117,
3340
+ "grad_norm": 0.16152898967266083,
3341
+ "learning_rate": 1.2660368455666752e-05,
3342
+ "loss": 1.0737,
3343
+ "step": 427
3344
+ },
3345
+ {
3346
+ "epoch": 0.32878816977146147,
3347
+ "grad_norm": 0.1710013449192047,
3348
+ "learning_rate": 1.2629453016020681e-05,
3349
+ "loss": 1.2014,
3350
+ "step": 428
3351
+ },
3352
+ {
3353
+ "epoch": 0.329556366429806,
3354
+ "grad_norm": 0.16182465851306915,
3355
+ "learning_rate": 1.259851055467653e-05,
3356
+ "loss": 1.1196,
3357
+ "step": 429
3358
+ },
3359
+ {
3360
+ "epoch": 0.33032456308815056,
3361
+ "grad_norm": 0.1539359837770462,
3362
+ "learning_rate": 1.2567541389615965e-05,
3363
+ "loss": 1.0812,
3364
+ "step": 430
3365
+ },
3366
+ {
3367
+ "epoch": 0.33032456308815056,
3368
+ "eval_loss": 1.0377129316329956,
3369
+ "eval_runtime": 119.0772,
3370
+ "eval_samples_per_second": 18.417,
3371
+ "eval_steps_per_second": 2.309,
3372
+ "step": 430
3373
  }
3374
  ],
3375
  "logging_steps": 1,
 
3384
  "early_stopping_threshold": 0.0
3385
  },
3386
  "attributes": {
3387
+ "early_stopping_patience_counter": 0
3388
  }
3389
  },
3390
  "TrainerControl": {
 
3398
  "attributes": {}
3399
  }
3400
  },
3401
+ "total_flos": 4.680889367789568e+17,
3402
  "train_batch_size": 4,
3403
  "trial_name": null,
3404
  "trial_params": null