moos124 commited on
Commit
e0c55dc
·
verified ·
1 Parent(s): c2ca68c

Training in progress, step 4390, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0b57499e7743293e3d63393286b1030b9ad58b617e9da83647241dbe9048e37
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a93ca27c93bb18118dc8faf2e9e6f8dd528574415ecd725b43e22e5bed162969
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50adcb58c766a6af5fb5ae4d9231da8e4fd3cd711d2b78b237b1a5325c87cec7
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1087e683ebd568a145ddfabb7d2efe7d820451cbb4ec74cd3bab8abd48801ea
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c08d4497334c96aec714474814fbf07fd5c4b74414f334d4bf742d8252f204f
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3240f0aebc4d6a296fd2d5c3d89b0dfb51f76d5bee9087d1bdcb98f947fcc35d
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d40e6a84f7bdae3c64a3e94c5cdd0573fe920cb78e23fc2b376054692b452f4c
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13a20148b8ca05863768cad01be7d695bc69f08669f517b5f3fd3a6d6e738c47
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.9344,
6
  "eval_steps": 500,
7
- "global_step": 4380,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4398,6 +4398,16 @@
4398
  "mean_token_accuracy": 0.7675649732351303,
4399
  "num_tokens": 20384687.0,
4400
  "step": 4380
 
 
 
 
 
 
 
 
 
 
4401
  }
4402
  ],
4403
  "logging_steps": 10,
@@ -4417,7 +4427,7 @@
4417
  "attributes": {}
4418
  }
4419
  },
4420
- "total_flos": 9.65205920584274e+16,
4421
  "train_batch_size": 4,
4422
  "trial_name": null,
4423
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.9365333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 4390,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4398
  "mean_token_accuracy": 0.7675649732351303,
4399
  "num_tokens": 20384687.0,
4400
  "step": 4380
4401
+ },
4402
+ {
4403
+ "entropy": 0.9403703153133393,
4404
+ "epoch": 0.9365333333333333,
4405
+ "grad_norm": 0.23416976630687714,
4406
+ "learning_rate": 5.773966098814579e-05,
4407
+ "loss": 1.0653534889221192,
4408
+ "mean_token_accuracy": 0.764773941040039,
4409
+ "num_tokens": 20432372.0,
4410
+ "step": 4390
4411
  }
4412
  ],
4413
  "logging_steps": 10,
 
4427
  "attributes": {}
4428
  }
4429
  },
4430
+ "total_flos": 9.673928307549082e+16,
4431
  "train_batch_size": 4,
4432
  "trial_name": null,
4433
  "trial_params": null