Training in progress, step 5500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9bdb004cc12734dde986cb14fdf851cce0f063e2d6a2ac9c9566bb962bc0873
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:850517b9cf5da4903168f8b9dbfcfcb01385d34bc0d5bd1c93041c99d5afbbab
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:68b278926ebe3e854059774715cf944c796b018b9ed04789c02ad5bd2ddb56db
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:8eaed0cac576a8a9a03addbea043ecae521ca2a1d3d91c2f8f4543bcfc559783
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5948a5161f7923aa0acf66b01adf35dc2196a8acf5bd2c21227561e5bff45666
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:4a9c47849ad44860f45019fca12bd8b47e7589be1317a01ad6705b924156a6be
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a29280eedf28bde93a8485de1b90963ca69c84125cea86695b5935449e18f453
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:bda4b56b57284b5d776cea834f86539fa062d5e046885e07dcb7516921ccd6ee
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8447372867038351,
   "eval_steps": 500,
-  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3595,6 +3595,364 @@
       "eval_samples_per_second": 275.05,
       "eval_steps_per_second": 5.776,
       "step": 5000
     }
   ],
   "logging_steps": 10,
@@ -3614,7 +3972,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.6722841042944e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9292110153742186,
   "eval_steps": 500,
+  "global_step": 5500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 275.05,
       "eval_steps_per_second": 5.776,
       "step": 5000
+    },
+    {
+      "epoch": 0.8464267612772428,
+      "grad_norm": 0.5504565238952637,
+      "learning_rate": 0.00023592089546305216,
+      "loss": 4.576148986816406,
+      "step": 5010
+    },
+    {
+      "epoch": 0.8481162358506504,
+      "grad_norm": 0.5207253098487854,
+      "learning_rate": 0.00023552782714923343,
+      "loss": 4.615359497070313,
+      "step": 5020
+    },
+    {
+      "epoch": 0.8498057104240582,
+      "grad_norm": 0.5456526875495911,
+      "learning_rate": 0.00023513388668141118,
+      "loss": 4.583608627319336,
+      "step": 5030
+    },
+    {
+      "epoch": 0.8514951849974658,
+      "grad_norm": 0.5371212959289551,
+      "learning_rate": 0.00023473907807671952,
+      "loss": 4.605810546875,
+      "step": 5040
+    },
+    {
+      "epoch": 0.8531846595708734,
+      "grad_norm": 0.5273321270942688,
+      "learning_rate": 0.00023434340536114531,
+      "loss": 4.596974945068359,
+      "step": 5050
+    },
+    {
+      "epoch": 0.8548741341442812,
+      "grad_norm": 0.5454714894294739,
+      "learning_rate": 0.00023394687256948697,
+      "loss": 4.595716094970703,
+      "step": 5060
+    },
+    {
+      "epoch": 0.8565636087176888,
+      "grad_norm": 0.6011702418327332,
+      "learning_rate": 0.00023354948374531344,
+      "loss": 4.590705108642578,
+      "step": 5070
+    },
+    {
+      "epoch": 0.8582530832910965,
+      "grad_norm": 0.5225823521614075,
+      "learning_rate": 0.00023315124294092277,
+      "loss": 4.578453063964844,
+      "step": 5080
+    },
+    {
+      "epoch": 0.8599425578645041,
+      "grad_norm": 0.5181743502616882,
+      "learning_rate": 0.000232752154217301,
+      "loss": 4.5722908020019535,
+      "step": 5090
+    },
+    {
+      "epoch": 0.8616320324379118,
+      "grad_norm": 0.5235112309455872,
+      "learning_rate": 0.00023235222164408076,
+      "loss": 4.600410461425781,
+      "step": 5100
+    },
+    {
+      "epoch": 0.8633215070113195,
+      "grad_norm": 0.5427247881889343,
+      "learning_rate": 0.00023195144929949953,
+      "loss": 4.576435089111328,
+      "step": 5110
+    },
+    {
+      "epoch": 0.8650109815847271,
+      "grad_norm": 0.5017905235290527,
+      "learning_rate": 0.00023154984127035823,
+      "loss": 4.6031841278076175,
+      "step": 5120
+    },
+    {
+      "epoch": 0.8667004561581348,
+      "grad_norm": 0.5279256105422974,
+      "learning_rate": 0.00023114740165197957,
+      "loss": 4.570458221435547,
+      "step": 5130
+    },
+    {
+      "epoch": 0.8683899307315425,
+      "grad_norm": 0.5026883482933044,
+      "learning_rate": 0.00023074413454816619,
+      "loss": 4.587477493286133,
+      "step": 5140
+    },
+    {
+      "epoch": 0.8700794053049502,
+      "grad_norm": 0.5021783709526062,
+      "learning_rate": 0.0002303400440711589,
+      "loss": 4.580776977539062,
+      "step": 5150
+    },
+    {
+      "epoch": 0.8717688798783578,
+      "grad_norm": 0.5208005309104919,
+      "learning_rate": 0.00022993513434159464,
+      "loss": 4.606272125244141,
+      "step": 5160
+    },
+    {
+      "epoch": 0.8734583544517655,
+      "grad_norm": 0.4933724105358124,
+      "learning_rate": 0.0002295294094884646,
+      "loss": 4.598735046386719,
+      "step": 5170
+    },
+    {
+      "epoch": 0.8751478290251732,
+      "grad_norm": 0.4844622015953064,
+      "learning_rate": 0.00022912287364907204,
+      "loss": 4.577612686157226,
+      "step": 5180
+    },
+    {
+      "epoch": 0.8768373035985808,
+      "grad_norm": 0.49681806564331055,
+      "learning_rate": 0.00022871553096899,
+      "loss": 4.6206306457519535,
+      "step": 5190
+    },
+    {
+      "epoch": 0.8785267781719885,
+      "grad_norm": 0.5069138407707214,
+      "learning_rate": 0.00022830738560201911,
+      "loss": 4.576866149902344,
+      "step": 5200
+    },
+    {
+      "epoch": 0.8802162527453962,
+      "grad_norm": 0.49277958273887634,
+      "learning_rate": 0.00022789844171014557,
+      "loss": 4.570761489868164,
+      "step": 5210
+    },
+    {
+      "epoch": 0.8819057273188039,
+      "grad_norm": 0.5152326822280884,
+      "learning_rate": 0.00022748870346349796,
+      "loss": 4.591669082641602,
+      "step": 5220
+    },
+    {
+      "epoch": 0.8835952018922115,
+      "grad_norm": 0.5280734896659851,
+      "learning_rate": 0.00022707817504030538,
+      "loss": 4.600007629394531,
+      "step": 5230
+    },
+    {
+      "epoch": 0.8852846764656191,
+      "grad_norm": 0.5109785795211792,
+      "learning_rate": 0.0002266668606268545,
+      "loss": 4.551007461547852,
+      "step": 5240
+    },
+    {
+      "epoch": 0.8869741510390269,
+      "grad_norm": 0.511035144329071,
+      "learning_rate": 0.00022625476441744715,
+      "loss": 4.596772766113281,
+      "step": 5250
+    },
+    {
+      "epoch": 0.8886636256124345,
+      "grad_norm": 0.5007238984107971,
+      "learning_rate": 0.00022584189061435725,
+      "loss": 4.5646717071533205,
+      "step": 5260
+    },
+    {
+      "epoch": 0.8903531001858422,
+      "grad_norm": 0.517419159412384,
+      "learning_rate": 0.00022542824342778806,
+      "loss": 4.561199188232422,
+      "step": 5270
+    },
+    {
+      "epoch": 0.8920425747592499,
+      "grad_norm": 0.5943387746810913,
+      "learning_rate": 0.0002250138270758293,
+      "loss": 4.576548385620117,
+      "step": 5280
+    },
+    {
+      "epoch": 0.8937320493326576,
+      "grad_norm": 0.5131561160087585,
+      "learning_rate": 0.00022459864578441415,
+      "loss": 4.587300109863281,
+      "step": 5290
+    },
+    {
+      "epoch": 0.8954215239060652,
+      "grad_norm": 0.5333006381988525,
+      "learning_rate": 0.0002241827037872761,
+      "loss": 4.5638988494873045,
+      "step": 5300
+    },
+    {
+      "epoch": 0.8971109984794728,
+      "grad_norm": 0.46661046147346497,
+      "learning_rate": 0.00022376600532590578,
+      "loss": 4.5343585968017575,
+      "step": 5310
+    },
+    {
+      "epoch": 0.8988004730528806,
+      "grad_norm": 0.4886866509914398,
+      "learning_rate": 0.00022334855464950775,
+      "loss": 4.5834095001220705,
+      "step": 5320
+    },
+    {
+      "epoch": 0.9004899476262882,
+      "grad_norm": 0.5262774229049683,
+      "learning_rate": 0.00022293035601495708,
+      "loss": 4.579534912109375,
+      "step": 5330
+    },
+    {
+      "epoch": 0.9021794221996959,
+      "grad_norm": 0.5163218975067139,
+      "learning_rate": 0.00022251141368675607,
+      "loss": 4.577048492431641,
+      "step": 5340
+    },
+    {
+      "epoch": 0.9038688967731036,
+      "grad_norm": 0.5345433950424194,
+      "learning_rate": 0.00022209173193699067,
+      "loss": 4.582790374755859,
+      "step": 5350
+    },
+    {
+      "epoch": 0.9055583713465112,
+      "grad_norm": 0.5151252150535583,
+      "learning_rate": 0.00022167131504528695,
+      "loss": 4.594097900390625,
+      "step": 5360
+    },
+    {
+      "epoch": 0.9072478459199189,
+      "grad_norm": 0.47062498331069946,
+      "learning_rate": 0.00022125016729876743,
+      "loss": 4.574803161621094,
+      "step": 5370
+    },
+    {
+      "epoch": 0.9089373204933265,
+      "grad_norm": 0.49667978286743164,
+      "learning_rate": 0.00022082829299200743,
+      "loss": 4.580567932128906,
+      "step": 5380
+    },
+    {
+      "epoch": 0.9106267950667343,
+      "grad_norm": 0.48394060134887695,
+      "learning_rate": 0.00022040569642699112,
+      "loss": 4.555598449707031,
+      "step": 5390
+    },
+    {
+      "epoch": 0.9123162696401419,
+      "grad_norm": 0.48837390542030334,
+      "learning_rate": 0.00021998238191306798,
+      "loss": 4.534821319580078,
+      "step": 5400
+    },
+    {
+      "epoch": 0.9140057442135496,
+      "grad_norm": 0.5261453986167908,
+      "learning_rate": 0.00021955835376690841,
+      "loss": 4.546956634521484,
+      "step": 5410
+    },
+    {
+      "epoch": 0.9156952187869573,
+      "grad_norm": 0.5199710130691528,
+      "learning_rate": 0.00021913361631246004,
+      "loss": 4.561407852172851,
+      "step": 5420
+    },
+    {
+      "epoch": 0.9173846933603649,
+      "grad_norm": 0.5369474291801453,
+      "learning_rate": 0.0002187081738809036,
+      "loss": 4.550098419189453,
+      "step": 5430
+    },
+    {
+      "epoch": 0.9190741679337726,
+      "grad_norm": 0.5480945110321045,
+      "learning_rate": 0.00021828203081060858,
+      "loss": 4.559786224365235,
+      "step": 5440
+    },
+    {
+      "epoch": 0.9207636425071802,
+      "grad_norm": 0.5149338245391846,
+      "learning_rate": 0.00021785519144708912,
+      "loss": 4.534018325805664,
+      "step": 5450
+    },
+    {
+      "epoch": 0.922453117080588,
+      "grad_norm": 0.5365586280822754,
+      "learning_rate": 0.00021742766014295976,
+      "loss": 4.546533584594727,
+      "step": 5460
+    },
+    {
+      "epoch": 0.9241425916539956,
+      "grad_norm": 0.5260055661201477,
+      "learning_rate": 0.00021699944125789096,
+      "loss": 4.534712600708008,
+      "step": 5470
+    },
+    {
+      "epoch": 0.9258320662274033,
+      "grad_norm": 0.4802268147468567,
+      "learning_rate": 0.00021657053915856455,
+      "loss": 4.560755920410156,
+      "step": 5480
+    },
+    {
+      "epoch": 0.927521540800811,
+      "grad_norm": 0.4982668459415436,
+      "learning_rate": 0.0002161409582186294,
+      "loss": 4.584963989257813,
+      "step": 5490
+    },
+    {
+      "epoch": 0.9292110153742186,
+      "grad_norm": 0.49544209241867065,
+      "learning_rate": 0.0002157107028186567,
+      "loss": 4.547665786743164,
+      "step": 5500
+    },
+    {
+      "epoch": 0.9292110153742186,
+      "eval_loss": 4.52970552444458,
+      "eval_runtime": 3.6346,
+      "eval_samples_per_second": 275.136,
+      "eval_steps_per_second": 5.778,
+      "step": 5500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.83951251472384e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null