moos124 commited on
Commit
2b8b9c5
·
verified ·
1 Parent(s): e372449

Training in progress, step 3660, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8eec73b30e13ab97f26d12e86d864478fd64b7c7c18c5a472871f9ce2d7e484e
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c86b10190bc6f9684c3c006906a12d974165411bf235ef5a1f598e83ba6d16f5
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54a4982669449d1a65097b4ca10d08f587ab9668cfed2459c31ef57d7bf3ffd7
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:736c8fc8e77818855996eaa3377af22d5b6ad8fd2953df9c0320b5230b50e8e5
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2550686ca12c289d016d5a3b807eea016a3ecb9a1f256fc96189b651d41409ec
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78214093a985edca04fbe69833eb81e36cc7ab74914b17c832358951bcd4c590
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c85aa0a64ea3b5e4cf74c3c3bddea266230518b8ebbe06f5c2504563745ab0d1
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63e1d259a9fc1d6377e767ec1a4613b6b2d8b67e6c51c4d868d5edc82275bd2
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7786666666666666,
6
  "eval_steps": 500,
7
- "global_step": 3650,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3668,6 +3668,16 @@
3668
  "mean_token_accuracy": 0.744163216650486,
3669
  "num_tokens": 16999652.0,
3670
  "step": 3650
 
 
 
 
 
 
 
 
 
 
3671
  }
3672
  ],
3673
  "logging_steps": 10,
@@ -3687,7 +3697,7 @@
3687
  "attributes": {}
3688
  }
3689
  },
3690
- "total_flos": 8.052271378002432e+16,
3691
  "train_batch_size": 4,
3692
  "trial_name": null,
3693
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7808,
6
  "eval_steps": 500,
7
+ "global_step": 3660,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3668
  "mean_token_accuracy": 0.744163216650486,
3669
  "num_tokens": 16999652.0,
3670
  "step": 3650
3671
+ },
3672
+ {
3673
+ "entropy": 0.8934633955359459,
3674
+ "epoch": 0.7808,
3675
+ "grad_norm": 0.25434958934783936,
3676
+ "learning_rate": 6.984341594519421e-05,
3677
+ "loss": 1.0075945854187012,
3678
+ "mean_token_accuracy": 0.7736709147691727,
3679
+ "num_tokens": 17046141.0,
3680
+ "step": 3660
3681
  }
3682
  ],
3683
  "logging_steps": 10,
 
3697
  "attributes": {}
3698
  }
3699
  },
3700
+ "total_flos": 8.073084288700416e+16,
3701
  "train_batch_size": 4,
3702
  "trial_name": null,
3703
  "trial_params": null