moos124 commited on
Commit
885b10e
·
verified ·
1 Parent(s): bad3ef1

Training in progress, step 3530, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eaabce9a37b2dc21c4ddcfe2cc5789904f2de3abd7f17c2f73a36bc76bc5dc5d
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11a99239d819c3069f49ded03f0fce147a3278e3f6d51f759ba6ce434f47d773
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cd72153623ce8b1d5da24cf3891fd3128eaf61447bf3ad3158a82ee0a361466
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0a83e6b02b4547ad516aa7f88db092e5a1b3267629c227d1441ef4c707483b0
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1795a6a4ac0dd6383625543080a09d385e140315b6e92298e0cedcdf486f7f99
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d93ca6242fa88b3eded83b7744222f0f0ad83b81a54628230c8ecf83d52b7210
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b768181766a9a624e33d294d2855e294f5735388c4e8a9a87ae347064fbed161
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64532a108df5cb731e99f71affa4866f7f9ab11a8971f58a2cb0256caab809f6
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7509333333333333,
6
  "eval_steps": 500,
7
- "global_step": 3520,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3538,6 +3538,16 @@
3538
  "mean_token_accuracy": 0.7583063259720803,
3539
  "num_tokens": 16400826.0,
3540
  "step": 3520
 
 
 
 
 
 
 
 
 
 
3541
  }
3542
  ],
3543
  "logging_steps": 10,
@@ -3557,7 +3567,7 @@
3557
  "attributes": {}
3558
  }
3559
  },
3560
- "total_flos": 7.770614434527744e+16,
3561
  "train_batch_size": 4,
3562
  "trial_name": null,
3563
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7530666666666667,
6
  "eval_steps": 500,
7
+ "global_step": 3530,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3538
  "mean_token_accuracy": 0.7583063259720803,
3539
  "num_tokens": 16400826.0,
3540
  "step": 3520
3541
+ },
3542
+ {
3543
+ "entropy": 0.8583284638822078,
3544
+ "epoch": 0.7530666666666667,
3545
+ "grad_norm": 0.2846459448337555,
3546
+ "learning_rate": 7.188778676991064e-05,
3547
+ "loss": 0.914365577697754,
3548
+ "mean_token_accuracy": 0.7785162061452866,
3549
+ "num_tokens": 16445628.0,
3550
+ "step": 3530
3551
  }
3552
  ],
3553
  "logging_steps": 10,
 
3567
  "attributes": {}
3568
  }
3569
  },
3570
+ "total_flos": 7.791692294164685e+16,
3571
  "train_batch_size": 4,
3572
  "trial_name": null,
3573
  "trial_params": null