Training in progress, step 6000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:850517b9cf5da4903168f8b9dbfcfcb01385d34bc0d5bd1c93041c99d5afbbab
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:8c74bfe809433060df3635ef406235f0717bc42781fff9acd5df0f855eb57b3f
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8eaed0cac576a8a9a03addbea043ecae521ca2a1d3d91c2f8f4543bcfc559783
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:70990f23441c3c0fadf8ff7b5b48864178e6a3f9dbc5c1184cb7c19ddf968c0f
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a9c47849ad44860f45019fca12bd8b47e7589be1317a01ad6705b924156a6be
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:11940f1313899a11d3e47a2d43f508134dd8e03ac7613f4eca32c754da2d1839
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bda4b56b57284b5d776cea834f86539fa062d5e046885e07dcb7516921ccd6ee
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:5732bb4fae95fda377427872ad7c4fed0c45a84922701b3143ffa39cf761f9db
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9292110153742186,
   "eval_steps": 500,
-  "global_step": 5500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3953,6 +3953,364 @@
       "eval_samples_per_second": 275.136,
       "eval_steps_per_second": 5.778,
       "step": 5500
     }
   ],
   "logging_steps": 10,
@@ -3972,7 +4330,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.83951251472384e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0136847440446022,
   "eval_steps": 500,
+  "global_step": 6000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 275.136,
       "eval_steps_per_second": 5.778,
       "step": 5500
+    },
+    {
+      "epoch": 0.9309004899476263,
+      "grad_norm": 0.4983241558074951,
+      "learning_rate": 0.00021527977734609537,
+      "loss": 4.547625732421875,
+      "step": 5510
+    },
+    {
+      "epoch": 0.9325899645210339,
+      "grad_norm": 0.5012770295143127,
+      "learning_rate": 0.00021484818619522722,
+      "loss": 4.557040023803711,
+      "step": 5520
+    },
+    {
+      "epoch": 0.9342794390944417,
+      "grad_norm": 0.5078200101852417,
+      "learning_rate": 0.00021441593376712224,
+      "loss": 4.553184890747071,
+      "step": 5530
+    },
+    {
+      "epoch": 0.9359689136678493,
+      "grad_norm": 0.48705384135246277,
+      "learning_rate": 0.0002139830244695935,
+      "loss": 4.5813232421875,
+      "step": 5540
+    },
+    {
+      "epoch": 0.937658388241257,
+      "grad_norm": 0.5023474097251892,
+      "learning_rate": 0.00021354946271715265,
+      "loss": 4.552815628051758,
+      "step": 5550
+    },
+    {
+      "epoch": 0.9393478628146646,
+      "grad_norm": 0.5058281421661377,
+      "learning_rate": 0.00021311525293096444,
+      "loss": 4.541165924072265,
+      "step": 5560
+    },
+    {
+      "epoch": 0.9410373373880723,
+      "grad_norm": 0.5129496455192566,
+      "learning_rate": 0.00021268039953880184,
+      "loss": 4.529154968261719,
+      "step": 5570
+    },
+    {
+      "epoch": 0.94272681196148,
+      "grad_norm": 0.5097109079360962,
+      "learning_rate": 0.00021224490697500088,
+      "loss": 4.535088348388672,
+      "step": 5580
+    },
+    {
+      "epoch": 0.9444162865348876,
+      "grad_norm": 0.5103420615196228,
+      "learning_rate": 0.00021180877968041552,
+      "loss": 4.553527069091797,
+      "step": 5590
+    },
+    {
+      "epoch": 0.9461057611082954,
+      "grad_norm": 0.4936409294605255,
+      "learning_rate": 0.00021137202210237213,
+      "loss": 4.54007568359375,
+      "step": 5600
+    },
+    {
+      "epoch": 0.947795235681703,
+      "grad_norm": 0.5701144933700562,
+      "learning_rate": 0.0002109346386946243,
+      "loss": 4.558887100219726,
+      "step": 5610
+    },
+    {
+      "epoch": 0.9494847102551106,
+      "grad_norm": 0.4890182912349701,
+      "learning_rate": 0.00021049663391730752,
+      "loss": 4.543179702758789,
+      "step": 5620
+    },
+    {
+      "epoch": 0.9511741848285183,
+      "grad_norm": 0.5074143409729004,
+      "learning_rate": 0.00021005801223689344,
+      "loss": 4.5704292297363285,
+      "step": 5630
+    },
+    {
+      "epoch": 0.952863659401926,
+      "grad_norm": 0.4767675995826721,
+      "learning_rate": 0.00020961877812614458,
+      "loss": 4.569948196411133,
+      "step": 5640
+    },
+    {
+      "epoch": 0.9545531339753337,
+      "grad_norm": 0.5034293532371521,
+      "learning_rate": 0.00020917893606406843,
+      "loss": 4.524322128295898,
+      "step": 5650
+    },
+    {
+      "epoch": 0.9562426085487413,
+      "grad_norm": 0.5619840621948242,
+      "learning_rate": 0.0002087384905358722,
+      "loss": 4.528865051269531,
+      "step": 5660
+    },
+    {
+      "epoch": 0.9579320831221491,
+      "grad_norm": 0.5692474842071533,
+      "learning_rate": 0.00020829744603291663,
+      "loss": 4.5155292510986325,
+      "step": 5670
+    },
+    {
+      "epoch": 0.9596215576955567,
+      "grad_norm": 0.504224419593811,
+      "learning_rate": 0.00020785580705267047,
+      "loss": 4.559905624389648,
+      "step": 5680
+    },
+    {
+      "epoch": 0.9613110322689643,
+      "grad_norm": 0.563014805316925,
+      "learning_rate": 0.00020741357809866447,
+      "loss": 4.556017303466797,
+      "step": 5690
+    },
+    {
+      "epoch": 0.963000506842372,
+      "grad_norm": 0.4872301518917084,
+      "learning_rate": 0.0002069707636804457,
+      "loss": 4.550839233398437,
+      "step": 5700
+    },
+    {
+      "epoch": 0.9646899814157797,
+      "grad_norm": 0.5135483145713806,
+      "learning_rate": 0.0002065273683135312,
+      "loss": 4.550697708129883,
+      "step": 5710
+    },
+    {
+      "epoch": 0.9663794559891874,
+      "grad_norm": 0.4852290451526642,
+      "learning_rate": 0.00020608339651936224,
+      "loss": 4.531842422485352,
+      "step": 5720
+    },
+    {
+      "epoch": 0.968068930562595,
+      "grad_norm": 0.5045028924942017,
+      "learning_rate": 0.00020563885282525802,
+      "loss": 4.532521057128906,
+      "step": 5730
+    },
+    {
+      "epoch": 0.9697584051360028,
+      "grad_norm": 0.530616044998169,
+      "learning_rate": 0.00020519374176436968,
+      "loss": 4.546891403198242,
+      "step": 5740
+    },
+    {
+      "epoch": 0.9714478797094104,
+      "grad_norm": 0.49565091729164124,
+      "learning_rate": 0.00020474806787563392,
+      "loss": 4.533766555786133,
+      "step": 5750
+    },
+    {
+      "epoch": 0.973137354282818,
+      "grad_norm": 0.5225724577903748,
+      "learning_rate": 0.0002043018357037267,
+      "loss": 4.542942810058594,
+      "step": 5760
+    },
+    {
+      "epoch": 0.9748268288562257,
+      "grad_norm": 0.49189162254333496,
+      "learning_rate": 0.00020385504979901712,
+      "loss": 4.545899200439453,
+      "step": 5770
+    },
+    {
+      "epoch": 0.9765163034296334,
+      "grad_norm": 0.5116291642189026,
+      "learning_rate": 0.00020340771471752078,
+      "loss": 4.532541656494141,
+      "step": 5780
+    },
+    {
+      "epoch": 0.9782057780030411,
+      "grad_norm": 0.5132644772529602,
+      "learning_rate": 0.0002029598350208534,
+      "loss": 4.524928283691406,
+      "step": 5790
+    },
+    {
+      "epoch": 0.9798952525764487,
+      "grad_norm": 0.4904372990131378,
+      "learning_rate": 0.00020251141527618434,
+      "loss": 4.532776641845703,
+      "step": 5800
+    },
+    {
+      "epoch": 0.9815847271498563,
+      "grad_norm": 0.48598089814186096,
+      "learning_rate": 0.00020206246005618998,
+      "loss": 4.519465637207031,
+      "step": 5810
+    },
+    {
+      "epoch": 0.9832742017232641,
+      "grad_norm": 0.5415476560592651,
+      "learning_rate": 0.00020161297393900713,
+      "loss": 4.512179565429688,
+      "step": 5820
+    },
+    {
+      "epoch": 0.9849636762966717,
+      "grad_norm": 0.5061231255531311,
+      "learning_rate": 0.00020116296150818623,
+      "loss": 4.534863662719727,
+      "step": 5830
+    },
+    {
+      "epoch": 0.9866531508700794,
+      "grad_norm": 0.5157834887504578,
+      "learning_rate": 0.0002007124273526449,
+      "loss": 4.50738639831543,
+      "step": 5840
+    },
+    {
+      "epoch": 0.988342625443487,
+      "grad_norm": 0.509292483329773,
+      "learning_rate": 0.00020026137606662077,
+      "loss": 4.5266845703125,
+      "step": 5850
+    },
+    {
+      "epoch": 0.9900321000168948,
+      "grad_norm": 0.5107020139694214,
+      "learning_rate": 0.0001998098122496249,
+      "loss": 4.533035659790039,
+      "step": 5860
+    },
+    {
+      "epoch": 0.9917215745903024,
+      "grad_norm": 0.5432437062263489,
+      "learning_rate": 0.00019935774050639472,
+      "loss": 4.518278884887695,
+      "step": 5870
+    },
+    {
+      "epoch": 0.99341104916371,
+      "grad_norm": 0.5360410213470459,
+      "learning_rate": 0.0001989051654468473,
+      "loss": 4.502675628662109,
+      "step": 5880
+    },
+    {
+      "epoch": 0.9951005237371178,
+      "grad_norm": 0.5418276786804199,
+      "learning_rate": 0.00019845209168603195,
+      "loss": 4.5235343933105465,
+      "step": 5890
+    },
+    {
+      "epoch": 0.9967899983105254,
+      "grad_norm": 0.5157185792922974,
+      "learning_rate": 0.00019799852384408355,
+      "loss": 4.524081420898438,
+      "step": 5900
+    },
+    {
+      "epoch": 0.9984794728839331,
+      "grad_norm": 0.5043293237686157,
+      "learning_rate": 0.00019754446654617527,
+      "loss": 4.508223342895508,
+      "step": 5910
+    },
+    {
+      "epoch": 1.0001689474573408,
+      "grad_norm": 0.5386601090431213,
+      "learning_rate": 0.00019708992442247136,
+      "loss": 4.5236083984375,
+      "step": 5920
+    },
+    {
+      "epoch": 1.0018584220307485,
+      "grad_norm": 0.5341511368751526,
+      "learning_rate": 0.0001966349021080801,
+      "loss": 4.459320068359375,
+      "step": 5930
+    },
+    {
+      "epoch": 1.003547896604156,
+      "grad_norm": 0.5038416981697083,
+      "learning_rate": 0.0001961794042430062,
+      "loss": 4.505880355834961,
+      "step": 5940
+    },
+    {
+      "epoch": 1.0052373711775637,
+      "grad_norm": 0.47585076093673706,
+      "learning_rate": 0.000195723435472104,
+      "loss": 4.477125930786133,
+      "step": 5950
+    },
+    {
+      "epoch": 1.0069268457509715,
+      "grad_norm": 0.49405696988105774,
+      "learning_rate": 0.00019526700044502956,
+      "loss": 4.483388137817383,
+      "step": 5960
+    },
+    {
+      "epoch": 1.0086163203243792,
+      "grad_norm": 0.47832658886909485,
+      "learning_rate": 0.0001948101038161937,
+      "loss": 4.474266052246094,
+      "step": 5970
+    },
+    {
+      "epoch": 1.0103057948977867,
+      "grad_norm": 0.470113068819046,
+      "learning_rate": 0.0001943527502447141,
+      "loss": 4.483303833007812,
+      "step": 5980
+    },
+    {
+      "epoch": 1.0119952694711944,
+      "grad_norm": 0.4839136004447937,
+      "learning_rate": 0.00019389494439436836,
+      "loss": 4.453615188598633,
+      "step": 5990
+    },
+    {
+      "epoch": 1.0136847440446022,
+      "grad_norm": 0.482327401638031,
+      "learning_rate": 0.0001934366909335458,
+      "loss": 4.491983413696289,
+      "step": 6000
+    },
+    {
+      "epoch": 1.0136847440446022,
+      "eval_loss": 4.487085819244385,
+      "eval_runtime": 4.7973,
+      "eval_samples_per_second": 208.452,
+      "eval_steps_per_second": 4.377,
+      "step": 6000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.0067200216019763e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null