moos124 commited on
Commit
59fcb8e
·
verified ·
1 Parent(s): 0537c6c

Training in progress, step 3540, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11a99239d819c3069f49ded03f0fce147a3278e3f6d51f759ba6ce434f47d773
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6abe6b20e50f0cd882bad9eae9273328985ecef8a256aadab122834b3151431
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0a83e6b02b4547ad516aa7f88db092e5a1b3267629c227d1441ef4c707483b0
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b4f557f12ca90d94b174420b4779b88d959d7c57a6beb85b9140065d127466f
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d93ca6242fa88b3eded83b7744222f0f0ad83b81a54628230c8ecf83d52b7210
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2875a4b179039cc15eab719821faa1d49633f813ba1cc64aa5c16a4a837f702
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64532a108df5cb731e99f71affa4866f7f9ab11a8971f58a2cb0256caab809f6
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe86c621cf5e24510a000969182228a8eda127b5a3e17532385a1501315df4c0
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7530666666666667,
6
  "eval_steps": 500,
7
- "global_step": 3530,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3548,6 +3548,16 @@
3548
  "mean_token_accuracy": 0.7785162061452866,
3549
  "num_tokens": 16445628.0,
3550
  "step": 3530
 
 
 
 
 
 
 
 
 
 
3551
  }
3552
  ],
3553
  "logging_steps": 10,
@@ -3567,7 +3577,7 @@
3567
  "attributes": {}
3568
  }
3569
  },
3570
- "total_flos": 7.791692294164685e+16,
3571
  "train_batch_size": 4,
3572
  "trial_name": null,
3573
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7552,
6
  "eval_steps": 500,
7
+ "global_step": 3540,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3548
  "mean_token_accuracy": 0.7785162061452866,
3549
  "num_tokens": 16445628.0,
3550
  "step": 3530
3551
+ },
3552
+ {
3553
+ "entropy": 1.0267987482249736,
3554
+ "epoch": 0.7552,
3555
+ "grad_norm": 0.26450878381729126,
3556
+ "learning_rate": 7.173204847965333e-05,
3557
+ "loss": 1.1284149169921875,
3558
+ "mean_token_accuracy": 0.7466149963438511,
3559
+ "num_tokens": 16498069.0,
3560
+ "step": 3540
3561
  }
3562
  ],
3563
  "logging_steps": 10,
 
3577
  "attributes": {}
3578
  }
3579
  },
3580
+ "total_flos": 7.818332279145984e+16,
3581
  "train_batch_size": 4,
3582
  "trial_name": null,
3583
  "trial_params": null