moos124 commited on
Commit
721025e
·
verified ·
1 Parent(s): 1b36b35

Training in progress, step 3900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:766a2968831f87935845b94cf2be48203c26036529e1babc3789618d748fd532
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ea9e6fd508e45d1b0ae522a9810346fe23625e219516b549ad54254c64dbd6e
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50bb07a02ae53a5f8733a4bf0b9fd780e85485b72feccf16b45b90891fad5a72
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78f606c519b852382af670295682d3d8d2c90a3ce42a4d99f71bb1cccf542329
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85726ef5713cadd1df3ece9c9ff240c456cc7e697b4feb9e69941f0040445bc5
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe533b221cc6b1eaa1d9441627c260ce867c6c45607cc6abb24180b84588829b
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5191c82830fae6addcc4e62aa736c747f395bbcb71d6b30fbda00baefa87f10b
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c56064c1735ee527f04c4d25c090a26dce7f3ac690032151cffbf5e97e8d4c1f
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8298666666666666,
6
  "eval_steps": 500,
7
- "global_step": 3890,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3908,6 +3908,16 @@
3908
  "mean_token_accuracy": 0.7845224231481552,
3909
  "num_tokens": 18135064.0,
3910
  "step": 3890
 
 
 
 
 
 
 
 
 
 
3911
  }
3912
  ],
3913
  "logging_steps": 10,
@@ -3927,7 +3937,7 @@
3927
  "attributes": {}
3928
  }
3929
  },
3930
- "total_flos": 8.583954507099955e+16,
3931
  "train_batch_size": 4,
3932
  "trial_name": null,
3933
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.832,
6
  "eval_steps": 500,
7
+ "global_step": 3900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3908
  "mean_token_accuracy": 0.7845224231481552,
3909
  "num_tokens": 18135064.0,
3910
  "step": 3890
3911
+ },
3912
+ {
3913
+ "entropy": 1.0117795512080192,
3914
+ "epoch": 0.832,
3915
+ "grad_norm": 0.26254504919052124,
3916
+ "learning_rate": 6.596677316924355e-05,
3917
+ "loss": 1.1285503387451172,
3918
+ "mean_token_accuracy": 0.7520590081810952,
3919
+ "num_tokens": 18184374.0,
3920
+ "step": 3900
3921
  }
3922
  ],
3923
  "logging_steps": 10,
 
3937
  "attributes": {}
3938
  }
3939
  },
3940
+ "total_flos": 8.607022187339674e+16,
3941
  "train_batch_size": 4,
3942
  "trial_name": null,
3943
  "trial_params": null