seoeunseo commited on
Commit
716ff61
·
verified ·
1 Parent(s): 53a62b1

Training in progress, step 29815, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2808ef6d7c1ec76f205bfc3bf60e8896b8d012108d053710f2b9d7309d7f42b2
3
  size 959732416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c07327c7b7da4c43a47ffb81f6c4f6a4fec25e1261fa34365dcc766509128f4
3
  size 959732416
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93f4aafd5e55920f58c0febce193ec74760cdf950639df7ad73eefccd9da8ec1
3
  size 1915006400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1433b898173b1a5c7863cdff93e63e7206b206d609822044b1e2437bfafc192
3
  size 1915006400
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe1ca75804c729ecd6274811b801cee592417281624e7bdb93722530ee68ca62
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbbe6a48b674827d84b4f767001ab0185f81594882203e7c8ba1b5ab2f80c9e8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:348961f7e743fe4e2fc3f96e9872ae30fee7c5dae2b7050fcbf673c342e559b9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccc5794f619fdf6bef21052c83e93ac1b40acd82a7f910575e12c05843e440f8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.192520543350662,
5
  "eval_steps": 500,
6
- "global_step": 25000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -409,6 +409,69 @@
409
  "learning_rate": 8.074794566493377e-06,
410
  "loss": 0.0309,
411
  "step": 25000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  }
413
  ],
414
  "logging_steps": 500,
@@ -423,12 +486,12 @@
423
  "should_evaluate": false,
424
  "should_log": false,
425
  "should_save": true,
426
- "should_training_stop": false
427
  },
428
  "attributes": {}
429
  }
430
  },
431
- "total_flos": 9.02247270044192e+18,
432
  "train_batch_size": 2,
433
  "trial_name": null,
434
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 29815,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
409
  "learning_rate": 8.074794566493377e-06,
410
  "loss": 0.0309,
411
  "step": 25000
412
+ },
413
+ {
414
+ "epoch": 4.2763709542176755,
415
+ "grad_norm": 0.37754014134407043,
416
+ "learning_rate": 7.236290457823244e-06,
417
+ "loss": 0.0312,
418
+ "step": 25500
419
+ },
420
+ {
421
+ "epoch": 4.360221365084689,
422
+ "grad_norm": 0.23429933190345764,
423
+ "learning_rate": 6.397786349153111e-06,
424
+ "loss": 0.0335,
425
+ "step": 26000
426
+ },
427
+ {
428
+ "epoch": 4.444071775951702,
429
+ "grad_norm": 0.25711116194725037,
430
+ "learning_rate": 5.5592822404829784e-06,
431
+ "loss": 0.03,
432
+ "step": 26500
433
+ },
434
+ {
435
+ "epoch": 4.527922186818715,
436
+ "grad_norm": 0.5407963395118713,
437
+ "learning_rate": 4.720778131812846e-06,
438
+ "loss": 0.0325,
439
+ "step": 27000
440
+ },
441
+ {
442
+ "epoch": 4.611772597685729,
443
+ "grad_norm": 0.3212348520755768,
444
+ "learning_rate": 3.882274023142714e-06,
445
+ "loss": 0.0321,
446
+ "step": 27500
447
+ },
448
+ {
449
+ "epoch": 4.695623008552742,
450
+ "grad_norm": 0.34211465716362,
451
+ "learning_rate": 3.043769914472581e-06,
452
+ "loss": 0.0309,
453
+ "step": 28000
454
+ },
455
+ {
456
+ "epoch": 4.779473419419755,
457
+ "grad_norm": 0.19717147946357727,
458
+ "learning_rate": 2.2052658058024483e-06,
459
+ "loss": 0.0306,
460
+ "step": 28500
461
+ },
462
+ {
463
+ "epoch": 4.8633238302867685,
464
+ "grad_norm": 0.4883914589881897,
465
+ "learning_rate": 1.366761697132316e-06,
466
+ "loss": 0.0322,
467
+ "step": 29000
468
+ },
469
+ {
470
+ "epoch": 4.947174241153782,
471
+ "grad_norm": 0.45241132378578186,
472
+ "learning_rate": 5.282575884621835e-07,
473
+ "loss": 0.0319,
474
+ "step": 29500
475
  }
476
  ],
477
  "logging_steps": 500,
 
486
  "should_evaluate": false,
487
  "should_log": false,
488
  "should_save": true,
489
+ "should_training_stop": true
490
  },
491
  "attributes": {}
492
  }
493
  },
494
+ "total_flos": 1.0760159508037632e+19,
495
  "train_batch_size": 2,
496
  "trial_name": null,
497
  "trial_params": null