moos124 commited on
Commit
eb2db06
·
verified ·
1 Parent(s): 1d16b7f

Training in progress, step 3800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38de148b42235201b6f25e82304ce8313ab40648d17c6d2773a002faf625cfb2
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1860091f6919f7735d38c70ff6a7843b1af1fc98d35d421517ebb72d27e35a8b
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:17e147d862df4e75cb4a34bfc407d6115443fcf13052e77ce4269ec3ecb3a800
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3baddf5b574a25960405c4dfad82684c402c2fe9a1d6829f286d4a343034acae
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b08ed07c30748809e170fd825eca3191e635e294121c3c9b7fbbf0d0e06fd62
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d887e8675b7564c41377da8b4aef48cfb83a5ecf6b9df33bf5c9bc0f1798223
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bd6961a16775d2dfd539e613859a399e48d4390d84444f6f9175e36534a47b5
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3adc0426eabe638b25789d83999511b320746a743905b8e6521fb7ada8e58d21
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8085333333333333,
6
  "eval_steps": 500,
7
- "global_step": 3790,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3808,6 +3808,16 @@
3808
  "mean_token_accuracy": 0.7612074792385102,
3809
  "num_tokens": 17657838.0,
3810
  "step": 3790
 
 
 
 
 
 
 
 
 
 
3811
  }
3812
  ],
3813
  "logging_steps": 10,
@@ -3827,7 +3837,7 @@
3827
  "attributes": {}
3828
  }
3829
  },
3830
- "total_flos": 8.36086750049833e+16,
3831
  "train_batch_size": 4,
3832
  "trial_name": null,
3833
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.8106666666666666,
6
  "eval_steps": 500,
7
+ "global_step": 3800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3808
  "mean_token_accuracy": 0.7612074792385102,
3809
  "num_tokens": 17657838.0,
3810
  "step": 3790
3811
+ },
3812
+ {
3813
+ "entropy": 0.9567753560841084,
3814
+ "epoch": 0.8106666666666666,
3815
+ "grad_norm": 0.2228858321905136,
3816
+ "learning_rate": 6.759697848007238e-05,
3817
+ "loss": 1.0671761512756348,
3818
+ "mean_token_accuracy": 0.7626087903976441,
3819
+ "num_tokens": 17705375.0,
3820
+ "step": 3800
3821
  }
3822
  ],
3823
  "logging_steps": 10,
 
3837
  "attributes": {}
3838
  }
3839
  },
3840
+ "total_flos": 8.382420285614285e+16,
3841
  "train_batch_size": 4,
3842
  "trial_name": null,
3843
  "trial_params": null