moos124 commited on
Commit
3e893c8
·
verified ·
1 Parent(s): 99a7ce3

Training in progress, step 410, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b773ce2de6d7cc50ab369fe688147a42e0e791a508e666bce07706c4b35f10c
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4ed6a544f9d924a761db418c429862898e4ed7095da2fb2c963a476c8d15070
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db050a8e740f735f809fc576df4e37f292ec6a5314c77554aae6e4cf4e25267e
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3958f1ad44d5ba182a73d90deda80200014c28bf3778bfcb4423f62766ecc52
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b560efadabac5e38946943c60a5a9026b112488df1175e8c4d33d1ede001047c
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:341acbb4c55ddb99a44518768b4df844b002cd93d82b7183b339754ddb4142ab
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca62f923b6bedfedee2b94878f48259e4a79dad81665ba7ca91e99e993ab4fc9
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62455f2b1dbd96b08c8ebc6abeb255cec1cb2a2bf25cf908d290ca3a2151401a
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.08533333333333333,
6
  "eval_steps": 500,
7
- "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -418,6 +418,16 @@
418
  "mean_token_accuracy": 0.7820899412035942,
419
  "num_tokens": 1844640.0,
420
  "step": 400
 
 
 
 
 
 
 
 
 
 
421
  }
422
  ],
423
  "logging_steps": 10,
@@ -437,7 +447,7 @@
437
  "attributes": {}
438
  }
439
  },
440
- "total_flos": 8772071858506752.0,
441
  "train_batch_size": 4,
442
  "trial_name": null,
443
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.08746666666666666,
6
  "eval_steps": 500,
7
+ "global_step": 410,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
418
  "mean_token_accuracy": 0.7820899412035942,
419
  "num_tokens": 1844640.0,
420
  "step": 400
421
+ },
422
+ {
423
+ "entropy": 1.0211177349090577,
424
+ "epoch": 0.08746666666666666,
425
+ "grad_norm": 0.27772271633148193,
426
+ "learning_rate": 9.996441620562322e-05,
427
+ "loss": 1.1202519416809082,
428
+ "mean_token_accuracy": 0.7511946842074394,
429
+ "num_tokens": 1891181.0,
430
+ "step": 410
431
  }
432
  ],
433
  "logging_steps": 10,
 
447
  "attributes": {}
448
  }
449
  },
450
+ "total_flos": 8984544686186496.0,
451
  "train_batch_size": 4,
452
  "trial_name": null,
453
  "trial_params": null