shorecode commited on
Commit
a5a8b09
·
verified ·
1 Parent(s): b34c5c9

Upload folder using huggingface_hub

Browse files
checkpoint-latest/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:987cdddaddd6ee9efcbf0406b5dbea003e19c76523f179ea3826ca2f5707f3cd
3
  size 62293080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0658507901e47a1e081e2301e0186a70659dfde4aff7e134ef8412a196e54b86
3
  size 62293080
checkpoint-latest/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4ea40a6da395d8e9289cbe3e64187c17945564d3b3cb832aeaf878ad5504ff8
3
  size 124642443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e1a6161b8cd8bad5f97beb9334e870db02a25dc2cceea6bf6e8548be9b91bb0
3
  size 124642443
checkpoint-latest/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b7133fcea647449b2caaa65223ebbc0c180189bbd59dbe842634047836d81e2
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b0ec4d2188868fd24263efa2856258953fca7ad21aed2b50e22b491f1d8939f
3
  size 14645
checkpoint-latest/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac8a8c1f0ca136b116df977b16704d013fad49cab357d7231675ec3945e85ad2
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a902ca58dea28be10847ac21293e6d27c44fc74bd49d763b881d90cbd1e58f0a
3
  size 1465
checkpoint-latest/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.5278901988386415,
6
  "eval_steps": 500,
7
- "global_step": 3000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -224,6 +224,78 @@
224
  "eval_samples_per_second": 107.962,
225
  "eval_steps_per_second": 4.154,
226
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  }
228
  ],
229
  "logging_steps": 125,
@@ -243,7 +315,7 @@
243
  "attributes": {}
244
  }
245
  },
246
- "total_flos": 1760138625024000.0,
247
  "train_batch_size": 26,
248
  "trial_name": null,
249
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7038535984515221,
6
  "eval_steps": 500,
7
+ "global_step": 4000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
224
  "eval_samples_per_second": 107.962,
225
  "eval_steps_per_second": 4.154,
226
  "step": 3000
227
+ },
228
+ {
229
+ "epoch": 0.5498856237902516,
230
+ "grad_norm": 0.7471196055412292,
231
+ "learning_rate": 0.0002450290339609361,
232
+ "loss": 3.3313,
233
+ "step": 3125
234
+ },
235
+ {
236
+ "epoch": 0.5718810487418617,
237
+ "grad_norm": 0.7734588384628296,
238
+ "learning_rate": 0.00024282949146577507,
239
+ "loss": 3.3004,
240
+ "step": 3250
241
+ },
242
+ {
243
+ "epoch": 0.5938764736934717,
244
+ "grad_norm": 0.7516855597496033,
245
+ "learning_rate": 0.0002406299489706141,
246
+ "loss": 3.304,
247
+ "step": 3375
248
+ },
249
+ {
250
+ "epoch": 0.6158718986450818,
251
+ "grad_norm": 0.7859501242637634,
252
+ "learning_rate": 0.0002384304064754531,
253
+ "loss": 3.2958,
254
+ "step": 3500
255
+ },
256
+ {
257
+ "epoch": 0.6158718986450818,
258
+ "eval_loss": 2.7763619422912598,
259
+ "eval_runtime": 91.3446,
260
+ "eval_samples_per_second": 107.833,
261
+ "eval_steps_per_second": 4.149,
262
+ "step": 3500
263
+ },
264
+ {
265
+ "epoch": 0.6378673235966918,
266
+ "grad_norm": 0.754370927810669,
267
+ "learning_rate": 0.00023623086398029207,
268
+ "loss": 3.2671,
269
+ "step": 3625
270
+ },
271
+ {
272
+ "epoch": 0.659862748548302,
273
+ "grad_norm": 0.8835521936416626,
274
+ "learning_rate": 0.00023403132148513107,
275
+ "loss": 3.284,
276
+ "step": 3750
277
+ },
278
+ {
279
+ "epoch": 0.6818581734999121,
280
+ "grad_norm": 0.7852393984794617,
281
+ "learning_rate": 0.00023183177898997007,
282
+ "loss": 3.2879,
283
+ "step": 3875
284
+ },
285
+ {
286
+ "epoch": 0.7038535984515221,
287
+ "grad_norm": 0.6869296431541443,
288
+ "learning_rate": 0.00022963223649480905,
289
+ "loss": 3.2702,
290
+ "step": 4000
291
+ },
292
+ {
293
+ "epoch": 0.7038535984515221,
294
+ "eval_loss": 2.7806477546691895,
295
+ "eval_runtime": 91.4522,
296
+ "eval_samples_per_second": 107.707,
297
+ "eval_steps_per_second": 4.144,
298
+ "step": 4000
299
  }
300
  ],
301
  "logging_steps": 125,
 
315
  "attributes": {}
316
  }
317
  },
318
+ "total_flos": 2346851500032000.0,
319
  "train_batch_size": 26,
320
  "trial_name": null,
321
  "trial_params": null