Training in progress, step 11838, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +235 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bbe911786fb4b3454d02608c237b36cc20b52333d42fa68272921c094a01a632
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:321e47c1dc832a190631f95fac9772b430fc73140d3d5243eca49fa4976c0528
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83e156617695722c4ccab8876c70abb964581f51616c0cec63d83f236c2f3130
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2507c13555041b38460e977baf58f0396d26b9357458953b37b5599c0ee7222
 size 318646859

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:116f0b85bffdc97adeb264e8dbd65d6acc7d514e82a48ea5ea50bd5091784a48
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b172ef1a2b23540cb3d53eed9b6dcd9ee9e06553bb8c4f5a46142cb0fe60689
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.9428957594188208,
   "eval_steps": 500,
-  "global_step": 11500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -8249,6 +8249,237 @@
       "eval_samples_per_second": 246.128,
       "eval_steps_per_second": 5.169,
       "step": 11500
     }
   ],
   "logging_steps": 10,
@@ -8263,12 +8494,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.846232536325816e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.0,
   "eval_steps": 500,
+  "global_step": 11838,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 246.128,
       "eval_steps_per_second": 5.169,
       "step": 11500
+    },
+    {
+      "epoch": 1.9445852339922283,
+      "grad_norm": 0.4352650046348572,
+      "learning_rate": 8.270650415620584e-07,
+      "loss": 4.2965538024902346,
+      "step": 11510
+    },
+    {
+      "epoch": 1.9462747085656362,
+      "grad_norm": 0.43585142493247986,
+      "learning_rate": 7.775944524542055e-07,
+      "loss": 4.270129776000976,
+      "step": 11520
+    },
+    {
+      "epoch": 1.9479641831390437,
+      "grad_norm": 0.4469541907310486,
+      "learning_rate": 7.296455308872406e-07,
+      "loss": 4.283909606933594,
+      "step": 11530
+    },
+    {
+      "epoch": 1.9496536577124515,
+      "grad_norm": 0.4361380636692047,
+      "learning_rate": 6.832187658113441e-07,
+      "loss": 4.296160125732422,
+      "step": 11540
+    },
+    {
+      "epoch": 1.9513431322858592,
+      "grad_norm": 0.44409504532814026,
+      "learning_rate": 6.383146306547626e-07,
+      "loss": 4.304541778564453,
+      "step": 11550
+    },
+    {
+      "epoch": 1.9530326068592667,
+      "grad_norm": 0.45062074065208435,
+      "learning_rate": 5.949335833189628e-07,
+      "loss": 4.3281913757324215,
+      "step": 11560
+    },
+    {
+      "epoch": 1.9547220814326745,
+      "grad_norm": 0.45208507776260376,
+      "learning_rate": 5.530760661741018e-07,
+      "loss": 4.3035846710205075,
+      "step": 11570
+    },
+    {
+      "epoch": 1.9564115560060822,
+      "grad_norm": 0.44333794713020325,
+      "learning_rate": 5.127425060543478e-07,
+      "loss": 4.278887939453125,
+      "step": 11580
+    },
+    {
+      "epoch": 1.9581010305794897,
+      "grad_norm": 0.44367748498916626,
+      "learning_rate": 4.7393331425364943e-07,
+      "loss": 4.281793594360352,
+      "step": 11590
+    },
+    {
+      "epoch": 1.9597905051528974,
+      "grad_norm": 0.4411092698574066,
+      "learning_rate": 4.3664888652144017e-07,
+      "loss": 4.278807067871094,
+      "step": 11600
+    },
+    {
+      "epoch": 1.9614799797263052,
+      "grad_norm": 0.44609910249710083,
+      "learning_rate": 4.008896030587072e-07,
+      "loss": 4.270274353027344,
+      "step": 11610
+    },
+    {
+      "epoch": 1.9631694542997127,
+      "grad_norm": 0.43740522861480713,
+      "learning_rate": 3.6665582851406195e-07,
+      "loss": 4.296014785766602,
+      "step": 11620
+    },
+    {
+      "epoch": 1.9648589288731204,
+      "grad_norm": 0.44448962807655334,
+      "learning_rate": 3.3394791198000927e-07,
+      "loss": 4.282284927368164,
+      "step": 11630
+    },
+    {
+      "epoch": 1.9665484034465281,
+      "grad_norm": 0.45065152645111084,
+      "learning_rate": 3.027661869893672e-07,
+      "loss": 4.2820892333984375,
+      "step": 11640
+    },
+    {
+      "epoch": 1.9682378780199357,
+      "grad_norm": 0.4398045539855957,
+      "learning_rate": 2.731109715119861e-07,
+      "loss": 4.281244277954102,
+      "step": 11650
+    },
+    {
+      "epoch": 1.9699273525933436,
+      "grad_norm": 0.4467960000038147,
+      "learning_rate": 2.4498256795135173e-07,
+      "loss": 4.307322311401367,
+      "step": 11660
+    },
+    {
+      "epoch": 1.9716168271667511,
+      "grad_norm": 0.4327242970466614,
+      "learning_rate": 2.183812631415871e-07,
+      "loss": 4.275672149658203,
+      "step": 11670
+    },
+    {
+      "epoch": 1.9733063017401589,
+      "grad_norm": 0.43306484818458557,
+      "learning_rate": 1.933073283445219e-07,
+      "loss": 4.291437149047852,
+      "step": 11680
+    },
+    {
+      "epoch": 1.9749957763135666,
+      "grad_norm": 0.4464097023010254,
+      "learning_rate": 1.697610192469112e-07,
+      "loss": 4.312542343139649,
+      "step": 11690
+    },
+    {
+      "epoch": 1.976685250886974,
+      "grad_norm": 0.4436480700969696,
+      "learning_rate": 1.4774257595783766e-07,
+      "loss": 4.300673294067383,
+      "step": 11700
+    },
+    {
+      "epoch": 1.9783747254603818,
+      "grad_norm": 0.44450485706329346,
+      "learning_rate": 1.272522230062467e-07,
+      "loss": 4.290340805053711,
+      "step": 11710
+    },
+    {
+      "epoch": 1.9800642000337896,
+      "grad_norm": 0.4362986981868744,
+      "learning_rate": 1.0829016933869838e-07,
+      "loss": 4.2894245147705075,
+      "step": 11720
+    },
+    {
+      "epoch": 1.981753674607197,
+      "grad_norm": 0.43450725078582764,
+      "learning_rate": 9.085660831715247e-08,
+      "loss": 4.298795700073242,
+      "step": 11730
+    },
+    {
+      "epoch": 1.9834431491806048,
+      "grad_norm": 0.44246765971183777,
+      "learning_rate": 7.495171771710328e-08,
+      "loss": 4.293585968017578,
+      "step": 11740
+    },
+    {
+      "epoch": 1.9851326237540126,
+      "grad_norm": 0.43929263949394226,
+      "learning_rate": 6.057565972568123e-08,
+      "loss": 4.293174743652344,
+      "step": 11750
+    },
+    {
+      "epoch": 1.98682209832742,
+      "grad_norm": 0.4450415372848511,
+      "learning_rate": 4.772858094005405e-08,
+      "loss": 4.3004913330078125,
+      "step": 11760
+    },
+    {
+      "epoch": 1.9885115729008278,
+      "grad_norm": 0.4472520053386688,
+      "learning_rate": 3.641061236591136e-08,
+      "loss": 4.2836250305175785,
+      "step": 11770
+    },
+    {
+      "epoch": 1.9902010474742355,
+      "grad_norm": 0.44302183389663696,
+      "learning_rate": 2.6621869416099118e-08,
+      "loss": 4.290175247192383,
+      "step": 11780
+    },
+    {
+      "epoch": 1.991890522047643,
+      "grad_norm": 0.4414844512939453,
+      "learning_rate": 1.8362451909520458e-08,
+      "loss": 4.286873245239258,
+      "step": 11790
+    },
+    {
+      "epoch": 1.993579996621051,
+      "grad_norm": 0.44598934054374695,
+      "learning_rate": 1.16324440700033e-08,
+      "loss": 4.297615051269531,
+      "step": 11800
+    },
+    {
+      "epoch": 1.9952694711944585,
+      "grad_norm": 0.4523853361606598,
+      "learning_rate": 6.431914525567572e-09,
+      "loss": 4.289733505249023,
+      "step": 11810
+    },
+    {
+      "epoch": 1.996958945767866,
+      "grad_norm": 0.4494129419326782,
+      "learning_rate": 2.760916307625871e-09,
+      "loss": 4.304800415039063,
+      "step": 11820
+    },
+    {
+      "epoch": 1.998648420341274,
+      "grad_norm": 0.4344528913497925,
+      "learning_rate": 6.194868504838524e-10,
+      "loss": 4.279055786132813,
+      "step": 11830
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.959258038224814e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null