Training in progress, step 8000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:152c34cc1ef8eea86d84f7b0351d9f983b40e24507e8054571349aacd4aba343
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a5b1264363800d835097d941071eaf668b648591456cb18035122aa338a30b9
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8012a529a81b3f92efa4c79d19d5460d546f7ff16907210ecdb6456891de9745
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfe3532ddb10671229c77a55f85cca973229a308c2faa98d60ea12da855a7153
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b50695bbf99bef39c4d13662a35b1f845a2b2c6b19490939ad9cc39127e32ab1
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:16ef5699c401ab357753367766bad7490c0997d4f3cbc8e6689c7f21d470f2f2
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1a5b64fb90c999b23793906d64020914f128f72d1523c4f0f8e8ea53ab2425c
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:578bef6269d270c9ba7be042609ff28604e2fee3538e234c365c9aa652e62f33
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.2671059300557528,
   "eval_steps": 500,
-  "global_step": 7500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5385,6 +5385,364 @@
       "eval_samples_per_second": 266.869,
       "eval_steps_per_second": 5.604,
       "step": 7500
     }
   ],
   "logging_steps": 10,
@@ -5404,7 +5762,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.5084052528902963e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.3515796587261362,
   "eval_steps": 500,
+  "global_step": 8000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 266.869,
       "eval_steps_per_second": 5.604,
       "step": 7500
+    },
+    {
+      "epoch": 1.2687954046291603,
+      "grad_norm": 0.48278650641441345,
+      "learning_rate": 0.00012190597732468595,
+      "loss": 4.407323837280273,
+      "step": 7510
+    },
+    {
+      "epoch": 1.270484879202568,
+      "grad_norm": 0.48528528213500977,
+      "learning_rate": 0.00012143559904956533,
+      "loss": 4.389751815795899,
+      "step": 7520
+    },
+    {
+      "epoch": 1.2721743537759758,
+      "grad_norm": 0.4944697320461273,
+      "learning_rate": 0.00012096551205457511,
+      "loss": 4.385165786743164,
+      "step": 7530
+    },
+    {
+      "epoch": 1.2738638283493833,
+      "grad_norm": 0.5002730488777161,
+      "learning_rate": 0.00012049572113333949,
+      "loss": 4.374062347412109,
+      "step": 7540
+    },
+    {
+      "epoch": 1.275553302922791,
+      "grad_norm": 0.46715047955513,
+      "learning_rate": 0.00012002623107646327,
+      "loss": 4.394298553466797,
+      "step": 7550
+    },
+    {
+      "epoch": 1.2772427774961987,
+      "grad_norm": 0.4903099834918976,
+      "learning_rate": 0.00011955704667148361,
+      "loss": 4.400055694580078,
+      "step": 7560
+    },
+    {
+      "epoch": 1.2789322520696063,
+      "grad_norm": 0.5333164930343628,
+      "learning_rate": 0.00011908817270282048,
+      "loss": 4.424139404296875,
+      "step": 7570
+    },
+    {
+      "epoch": 1.280621726643014,
+      "grad_norm": 0.47946473956108093,
+      "learning_rate": 0.00011861961395172844,
+      "loss": 4.419405746459961,
+      "step": 7580
+    },
+    {
+      "epoch": 1.2823112012164217,
+      "grad_norm": 0.4778226315975189,
+      "learning_rate": 0.00011815137519624767,
+      "loss": 4.414478302001953,
+      "step": 7590
+    },
+    {
+      "epoch": 1.2840006757898292,
+      "grad_norm": 0.4878886342048645,
+      "learning_rate": 0.0001176834612111551,
+      "loss": 4.384803009033203,
+      "step": 7600
+    },
+    {
+      "epoch": 1.285690150363237,
+      "grad_norm": 0.4819967746734619,
+      "learning_rate": 0.0001172158767679161,
+      "loss": 4.3730110168457035,
+      "step": 7610
+    },
+    {
+      "epoch": 1.2873796249366447,
+      "grad_norm": 0.4928823411464691,
+      "learning_rate": 0.00011674862663463538,
+      "loss": 4.3778236389160154,
+      "step": 7620
+    },
+    {
+      "epoch": 1.2890690995100524,
+      "grad_norm": 0.4724312722682953,
+      "learning_rate": 0.00011628171557600869,
+      "loss": 4.387655639648438,
+      "step": 7630
+    },
+    {
+      "epoch": 1.2907585740834602,
+      "grad_norm": 0.5023632049560547,
+      "learning_rate": 0.0001158151483532742,
+      "loss": 4.366682052612305,
+      "step": 7640
+    },
+    {
+      "epoch": 1.2924480486568677,
+      "grad_norm": 0.47042906284332275,
+      "learning_rate": 0.00011534892972416382,
+      "loss": 4.3992149353027346,
+      "step": 7650
+    },
+    {
+      "epoch": 1.2941375232302754,
+      "grad_norm": 0.5019961595535278,
+      "learning_rate": 0.00011488306444285465,
+      "loss": 4.408546829223633,
+      "step": 7660
+    },
+    {
+      "epoch": 1.2958269978036832,
+      "grad_norm": 0.4686186909675598,
+      "learning_rate": 0.0001144175572599207,
+      "loss": 4.392362976074219,
+      "step": 7670
+    },
+    {
+      "epoch": 1.2975164723770907,
+      "grad_norm": 0.5097217559814453,
+      "learning_rate": 0.00011395241292228435,
+      "loss": 4.350882339477539,
+      "step": 7680
+    },
+    {
+      "epoch": 1.2992059469504984,
+      "grad_norm": 0.5009888410568237,
+      "learning_rate": 0.00011348763617316781,
+      "loss": 4.407807159423828,
+      "step": 7690
+    },
+    {
+      "epoch": 1.3008954215239061,
+      "grad_norm": 0.4623536765575409,
+      "learning_rate": 0.00011302323175204497,
+      "loss": 4.383738708496094,
+      "step": 7700
+    },
+    {
+      "epoch": 1.3025848960973136,
+      "grad_norm": 0.49098923802375793,
+      "learning_rate": 0.00011255920439459302,
+      "loss": 4.3777015686035154,
+      "step": 7710
+    },
+    {
+      "epoch": 1.3042743706707214,
+      "grad_norm": 0.47158893942832947,
+      "learning_rate": 0.00011209555883264406,
+      "loss": 4.398603439331055,
+      "step": 7720
+    },
+    {
+      "epoch": 1.3059638452441291,
+      "grad_norm": 0.4723564684391022,
+      "learning_rate": 0.00011163229979413685,
+      "loss": 4.379953384399414,
+      "step": 7730
+    },
+    {
+      "epoch": 1.3076533198175366,
+      "grad_norm": 0.478575199842453,
+      "learning_rate": 0.00011116943200306871,
+      "loss": 4.369690322875977,
+      "step": 7740
+    },
+    {
+      "epoch": 1.3093427943909444,
+      "grad_norm": 0.4801791310310364,
+      "learning_rate": 0.00011070696017944728,
+      "loss": 4.421099853515625,
+      "step": 7750
+    },
+    {
+      "epoch": 1.311032268964352,
+      "grad_norm": 0.5147274732589722,
+      "learning_rate": 0.00011024488903924235,
+      "loss": 4.396934127807617,
+      "step": 7760
+    },
+    {
+      "epoch": 1.3127217435377598,
+      "grad_norm": 0.4905327558517456,
+      "learning_rate": 0.00010978322329433796,
+      "loss": 4.368836975097656,
+      "step": 7770
+    },
+    {
+      "epoch": 1.3144112181111673,
+      "grad_norm": 0.47583821415901184,
+      "learning_rate": 0.00010932196765248396,
+      "loss": 4.351024627685547,
+      "step": 7780
+    },
+    {
+      "epoch": 1.316100692684575,
+      "grad_norm": 0.4749636650085449,
+      "learning_rate": 0.0001088611268172485,
+      "loss": 4.381603622436524,
+      "step": 7790
+    },
+    {
+      "epoch": 1.3177901672579828,
+      "grad_norm": 0.47106119990348816,
+      "learning_rate": 0.00010840070548796967,
+      "loss": 4.386127471923828,
+      "step": 7800
+    },
+    {
+      "epoch": 1.3194796418313905,
+      "grad_norm": 0.49278977513313293,
+      "learning_rate": 0.00010794070835970782,
+      "loss": 4.393439865112304,
+      "step": 7810
+    },
+    {
+      "epoch": 1.321169116404798,
+      "grad_norm": 0.49596497416496277,
+      "learning_rate": 0.00010748114012319747,
+      "loss": 4.369705581665039,
+      "step": 7820
+    },
+    {
+      "epoch": 1.3228585909782058,
+      "grad_norm": 0.48959940671920776,
+      "learning_rate": 0.0001070220054647997,
+      "loss": 4.353339767456054,
+      "step": 7830
+    },
+    {
+      "epoch": 1.3245480655516135,
+      "grad_norm": 0.4975447952747345,
+      "learning_rate": 0.00010656330906645422,
+      "loss": 4.378279113769532,
+      "step": 7840
+    },
+    {
+      "epoch": 1.326237540125021,
+      "grad_norm": 0.48734408617019653,
+      "learning_rate": 0.00010610505560563163,
+      "loss": 4.365981674194336,
+      "step": 7850
+    },
+    {
+      "epoch": 1.3279270146984288,
+      "grad_norm": 0.4985700845718384,
+      "learning_rate": 0.00010564724975528584,
+      "loss": 4.384627151489258,
+      "step": 7860
+    },
+    {
+      "epoch": 1.3296164892718365,
+      "grad_norm": 0.48617759346961975,
+      "learning_rate": 0.00010518989618380632,
+      "loss": 4.387208938598633,
+      "step": 7870
+    },
+    {
+      "epoch": 1.331305963845244,
+      "grad_norm": 0.479184091091156,
+      "learning_rate": 0.00010473299955497044,
+      "loss": 4.39497184753418,
+      "step": 7880
+    },
+    {
+      "epoch": 1.3329954384186518,
+      "grad_norm": 0.5024631023406982,
+      "learning_rate": 0.000104276564527896,
+      "loss": 4.341180801391602,
+      "step": 7890
+    },
+    {
+      "epoch": 1.3346849129920595,
+      "grad_norm": 0.5147078633308411,
+      "learning_rate": 0.0001038205957569938,
+      "loss": 4.36151008605957,
+      "step": 7900
+    },
+    {
+      "epoch": 1.336374387565467,
+      "grad_norm": 0.4864480197429657,
+      "learning_rate": 0.00010336509789191994,
+      "loss": 4.3700817108154295,
+      "step": 7910
+    },
+    {
+      "epoch": 1.3380638621388747,
+      "grad_norm": 0.48009052872657776,
+      "learning_rate": 0.00010291007557752861,
+      "loss": 4.372967910766602,
+      "step": 7920
+    },
+    {
+      "epoch": 1.3397533367122825,
+      "grad_norm": 0.4770645499229431,
+      "learning_rate": 0.00010245553345382467,
+      "loss": 4.361065673828125,
+      "step": 7930
+    },
+    {
+      "epoch": 1.3414428112856902,
+      "grad_norm": 0.47222378849983215,
+      "learning_rate": 0.00010200147615591643,
+      "loss": 4.3356986999511715,
+      "step": 7940
+    },
+    {
+      "epoch": 1.343132285859098,
+      "grad_norm": 0.513080894947052,
+      "learning_rate": 0.00010154790831396805,
+      "loss": 4.402030181884766,
+      "step": 7950
+    },
+    {
+      "epoch": 1.3448217604325055,
+      "grad_norm": 0.48416030406951904,
+      "learning_rate": 0.00010109483455315269,
+      "loss": 4.381985855102539,
+      "step": 7960
+    },
+    {
+      "epoch": 1.3465112350059132,
+      "grad_norm": 0.46342408657073975,
+      "learning_rate": 0.00010064225949360525,
+      "loss": 4.364437103271484,
+      "step": 7970
+    },
+    {
+      "epoch": 1.348200709579321,
+      "grad_norm": 0.4690420925617218,
+      "learning_rate": 0.00010019018775037509,
+      "loss": 4.399689102172852,
+      "step": 7980
+    },
+    {
+      "epoch": 1.3498901841527284,
+      "grad_norm": 0.47876372933387756,
+      "learning_rate": 9.973862393337925e-05,
+      "loss": 4.388835144042969,
+      "step": 7990
+    },
+    {
+      "epoch": 1.3515796587261362,
+      "grad_norm": 0.48350629210472107,
+      "learning_rate": 9.928757264735506e-05,
+      "loss": 4.405188751220703,
+      "step": 8000
+    },
+    {
+      "epoch": 1.3515796587261362,
+      "eval_loss": 4.357097148895264,
+      "eval_runtime": 3.734,
+      "eval_samples_per_second": 267.812,
+      "eval_steps_per_second": 5.624,
+      "step": 8000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.6756336633197363e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null