mgh6 commited on
Commit
b7ab815
·
verified ·
1 Parent(s): 3cab61c

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d83f997012693855cffc76cc9f0284d03c34033bf8dfa18754e4844392487e2
3
  size 2611614300
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48475f083a5c2dd20122d335f8f1642d2b1704a4a54b55ecc266741d8af4decc
3
  size 2611614300
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:243605873029fbc0a2972efec25e0f01e22e5afe67e67dc13ca375cd9525c6a7
3
  size 5213028466
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd01a2371483ae6f6e5acd61fd57cc71db67bf7044ba7e1b6b06f33fe8ab5258
3
  size 5213028466
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d20f3103c38ed7048ffd97ef9a323e40cf73fc6a4fe1e50af8aeeb52d1fe2ac
3
  size 14942
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5c2bb2c73c38e1a76a17a11eb153818930706a055ece3df18411968d5d527a9
3
  size 14942
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:218c032aa5091c27de2cef35285afb12a9d3cbfe80e6248c615057c7bae5ef8e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd7d798f6dc160785a8da7f98e23574f8db4fbcce75b7717e46f600053dad5a1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5998488662036323,
5
  "eval_steps": 500,
6
- "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -207,6 +207,21 @@
207
  "learning_rate": 0.0009400149962509373,
208
  "loss": 972726877552.64,
209
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
  }
211
  ],
212
  "logging_steps": 100,
@@ -226,7 +241,7 @@
226
  "attributes": {}
227
  }
228
  },
229
- "total_flos": 4.667711358542807e+17,
230
  "train_batch_size": 1,
231
  "trial_name": null,
232
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.624842568962117,
5
  "eval_steps": 500,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
207
  "learning_rate": 0.0009400149962509373,
208
  "loss": 972726877552.64,
209
  "step": 2400
210
+ },
211
+ {
212
+ "epoch": 0.624842568962117,
213
+ "grad_norm": 0.6738520264625549,
214
+ "learning_rate": 0.0009375156210947263,
215
+ "loss": 1207760825876.48,
216
+ "step": 2500
217
+ },
218
+ {
219
+ "epoch": 0.624842568962117,
220
+ "eval_loss": NaN,
221
+ "eval_runtime": 423.912,
222
+ "eval_samples_per_second": 36.616,
223
+ "eval_steps_per_second": 36.616,
224
+ "step": 2500
225
  }
226
  ],
227
  "logging_steps": 100,
 
241
  "attributes": {}
242
  }
243
  },
244
+ "total_flos": 4.862199331815424e+17,
245
  "train_batch_size": 1,
246
  "trial_name": null,
247
  "trial_params": null