moos124 commited on
Commit
84a3b61
·
verified ·
1 Parent(s): ad992c1

Training in progress, step 3680, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77dd33d15ee1608e4a5d147de3871a9866f8aa8a166d045974da6f84f60efed7
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c110a31da32ffd213cc59e0053a9afa56351942d16adcf5eaddb709a747bdb50
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d6dd7374734ad7f66c91cd50e6b708c0c972ff900882a7beba0e0da0fcfe0c4
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58e5e0230eff9b5e794d7b8d1cd83bcbb85fa38445afd970234bb68dcef10fb4
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bd3568fe11d8bbd83db97bd2d933c20fb36cb00dbf6e65d2005ff6b18675421
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31e58fd132b14730d384ea8fd82ae553fa932d349d08d387f8d2630fd9bd431c
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15a622b62caf72c6314589492b96800f0c8c29744716f35a900e0f0317b58e2d
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47c397115142eeea4d4000613982af98eada11892d251ef48622f892e5a6db62
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7829333333333334,
6
  "eval_steps": 500,
7
- "global_step": 3670,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3688,6 +3688,16 @@
3688
  "mean_token_accuracy": 0.7515291333198547,
3689
  "num_tokens": 17096136.0,
3690
  "step": 3670
 
 
 
 
 
 
 
 
 
 
3691
  }
3692
  ],
3693
  "logging_steps": 10,
@@ -3707,7 +3717,7 @@
3707
  "attributes": {}
3708
  }
3709
  },
3710
- "total_flos": 8.09674224633815e+16,
3711
  "train_batch_size": 4,
3712
  "trial_name": null,
3713
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7850666666666667,
6
  "eval_steps": 500,
7
+ "global_step": 3680,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3688
  "mean_token_accuracy": 0.7515291333198547,
3689
  "num_tokens": 17096136.0,
3690
  "step": 3670
3691
+ },
3692
+ {
3693
+ "entropy": 0.974248643219471,
3694
+ "epoch": 0.7850666666666667,
3695
+ "grad_norm": 0.22508691251277924,
3696
+ "learning_rate": 6.952522690159861e-05,
3697
+ "loss": 1.0584315299987792,
3698
+ "mean_token_accuracy": 0.7587296038866043,
3699
+ "num_tokens": 17144177.0,
3700
+ "step": 3680
3701
  }
3702
  ],
3703
  "logging_steps": 10,
 
3717
  "attributes": {}
3718
  }
3719
  },
3720
+ "total_flos": 8.119511633724826e+16,
3721
  "train_batch_size": 4,
3722
  "trial_name": null,
3723
  "trial_params": null