moos124 commited on
Commit
32e8696
·
verified ·
1 Parent(s): 36f0bb0

Training in progress, step 4260, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c4e11c1a46f9f109cb7ba38e56ab82215c2b48355ee32883a96bffeeba80b3a
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9787c69ac5edc91a0d9ad2cab2b3013e798e0d53aa5aa2a8c0b12473f632b04d
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88190cfb1563f790352d127bd91a0a8c464222e483c32ff4500e5750d29298e6
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eb78fe51b6d209ded36784588a47f41efd10c942f011013dd9e8bf39973b994
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b68fd0029e7b261e63271307deb9f4836f5f68bdb71f73d7c4d4addbb5ece23
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6e5bd59b4e61772c90f5d89a34001ecd9e333ff923184eb604e53de48807b89
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:893524f66429fdbfbd92fa63a34c02cb9fec44170efb9ec59a0ac5b90c37545c
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:410d32551231a48e90fd152a5ac1593daeb38fecc140c21bd3edb0a99a6d54d2
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.9066666666666666,
6
  "eval_steps": 500,
7
- "global_step": 4250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4268,6 +4268,16 @@
4268
  "mean_token_accuracy": 0.7600424617528916,
4269
  "num_tokens": 19789800.0,
4270
  "step": 4250
 
 
 
 
 
 
 
 
 
 
4271
  }
4272
  ],
4273
  "logging_steps": 10,
@@ -4287,7 +4297,7 @@
4287
  "attributes": {}
4288
  }
4289
  },
4290
- "total_flos": 9.365159157174682e+16,
4291
  "train_batch_size": 4,
4292
  "trial_name": null,
4293
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.9088,
6
  "eval_steps": 500,
7
+ "global_step": 4260,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4268
  "mean_token_accuracy": 0.7600424617528916,
4269
  "num_tokens": 19789800.0,
4270
  "step": 4250
4271
+ },
4272
+ {
4273
+ "entropy": 1.019892977923155,
4274
+ "epoch": 0.9088,
4275
+ "grad_norm": 0.23841184377670288,
4276
+ "learning_rate": 5.995388632070827e-05,
4277
+ "loss": 1.0938913345336914,
4278
+ "mean_token_accuracy": 0.7473356157541275,
4279
+ "num_tokens": 19839462.0,
4280
+ "step": 4260
4281
  }
4282
  ],
4283
  "logging_steps": 10,
 
4297
  "attributes": {}
4298
  }
4299
  },
4300
+ "total_flos": 9.38982013579561e+16,
4301
  "train_batch_size": 4,
4302
  "trial_name": null,
4303
  "trial_params": null