moos124 commited on
Commit
e994b76
·
verified ·
1 Parent(s): 4c098a4

Training in progress, step 3670, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c86b10190bc6f9684c3c006906a12d974165411bf235ef5a1f598e83ba6d16f5
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77dd33d15ee1608e4a5d147de3871a9866f8aa8a166d045974da6f84f60efed7
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:736c8fc8e77818855996eaa3377af22d5b6ad8fd2953df9c0320b5230b50e8e5
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d6dd7374734ad7f66c91cd50e6b708c0c972ff900882a7beba0e0da0fcfe0c4
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78214093a985edca04fbe69833eb81e36cc7ab74914b17c832358951bcd4c590
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bd3568fe11d8bbd83db97bd2d933c20fb36cb00dbf6e65d2005ff6b18675421
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d63e1d259a9fc1d6377e767ec1a4613b6b2d8b67e6c51c4d868d5edc82275bd2
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15a622b62caf72c6314589492b96800f0c8c29744716f35a900e0f0317b58e2d
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7808,
6
  "eval_steps": 500,
7
- "global_step": 3660,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3678,6 +3678,16 @@
3678
  "mean_token_accuracy": 0.7736709147691727,
3679
  "num_tokens": 17046141.0,
3680
  "step": 3660
 
 
 
 
 
 
 
 
 
 
3681
  }
3682
  ],
3683
  "logging_steps": 10,
@@ -3697,7 +3707,7 @@
3697
  "attributes": {}
3698
  }
3699
  },
3700
- "total_flos": 8.073084288700416e+16,
3701
  "train_batch_size": 4,
3702
  "trial_name": null,
3703
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7829333333333334,
6
  "eval_steps": 500,
7
+ "global_step": 3670,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3678
  "mean_token_accuracy": 0.7736709147691727,
3679
  "num_tokens": 17046141.0,
3680
  "step": 3660
3681
+ },
3682
+ {
3683
+ "entropy": 1.0374766498804093,
3684
+ "epoch": 0.7829333333333334,
3685
+ "grad_norm": 0.2597425878047943,
3686
+ "learning_rate": 6.968443934771933e-05,
3687
+ "loss": 1.1429466247558593,
3688
+ "mean_token_accuracy": 0.7515291333198547,
3689
+ "num_tokens": 17096136.0,
3690
+ "step": 3670
3691
  }
3692
  ],
3693
  "logging_steps": 10,
 
3707
  "attributes": {}
3708
  }
3709
  },
3710
+ "total_flos": 8.09674224633815e+16,
3711
  "train_batch_size": 4,
3712
  "trial_name": null,
3713
  "trial_params": null