mgh6 commited on
Commit
cf822c3
·
verified ·
1 Parent(s): 1e66cab

Training in progress, epoch 11, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:738193b43a1ddc49a6b37452f390cf37e63f0bef9a1c61e386a5e2b6d138eecd
3
  size 2682482800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bf7f54b64df93ce0f2fcb68fc7e653d2405f54ec2007bc21b1dbbf171b3b035
3
  size 2682482800
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c57b4d51a5d8639db9ae6b325a825cf66ee6d51be8a39f945da91642215c31c2
3
  size 5365108834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9be947d954b496169e373878044be7308c4f2c478c1251870839f1a7cb47be36
3
  size 5365108834
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c88d52cdd8bfc63870ec8c6cfb4399bc14947ba93e051c849375243bf4017226
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2322f66ff1c6e187e3a28e99424d0d727cfb601d8182c3cf171308f1229edf01
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b14b4e9c95c66720db6c6a64c68b170bafb8648830648375f2c99f07c4ff19a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc4fd1f16fcb95d9548ef1b42dd4a5a37d5b3e9dd4348574c75fdb00d7912893
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.9511386752128601,
3
- "best_model_checkpoint": "mgh6/HTH_nt_MLM/checkpoint-2570",
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 2570,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -157,6 +157,21 @@
157
  "eval_samples_per_second": 46.579,
158
  "eval_steps_per_second": 5.822,
159
  "step": 2570
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  }
161
  ],
162
  "logging_steps": 500,
@@ -185,7 +200,7 @@
185
  "attributes": {}
186
  }
187
  },
188
- "total_flos": 3.693286615993549e+17,
189
  "train_batch_size": 8,
190
  "trial_name": null,
191
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9429148435592651,
3
+ "best_model_checkpoint": "mgh6/HTH_nt_MLM/checkpoint-2827",
4
+ "epoch": 11.0,
5
  "eval_steps": 500,
6
+ "global_step": 2827,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
157
  "eval_samples_per_second": 46.579,
158
  "eval_steps_per_second": 5.822,
159
  "step": 2570
160
+ },
161
+ {
162
+ "epoch": 11.0,
163
+ "grad_norm": 0.5148940682411194,
164
+ "learning_rate": 8.159505208333334e-05,
165
+ "loss": 0.7466,
166
+ "step": 2827
167
+ },
168
+ {
169
+ "epoch": 11.0,
170
+ "eval_loss": 0.9429148435592651,
171
+ "eval_runtime": 9.2986,
172
+ "eval_samples_per_second": 46.459,
173
+ "eval_steps_per_second": 5.807,
174
+ "step": 2827
175
  }
176
  ],
177
  "logging_steps": 500,
 
200
  "attributes": {}
201
  }
202
  },
203
+ "total_flos": 4.062615277592904e+17,
204
  "train_batch_size": 8,
205
  "trial_name": null,
206
  "trial_params": null