Training in progress, step 5500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29ab3bcbd54c5e63c4e604ac4ad2f368ae42aa766977dc0340b7b8e0814fb858
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8ae4d439763bede675a7bb8407ca626ba1a1ca1d28d508145ff27990bcdfd60
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5104f05c76008a8cc4ebab2ab5f343ccdca71dafda81e126d612fe143dbfa54
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:8df4f8c8c0f93c7a4647906cc1e5f85c72386b1b581eb687df3d305abbdc44a7
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a04575953c998a8fd3197b1b8249c8e72c33f4bb7c27b036788a4d9e537cf3cd
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:8af3cc3f1560f815527e73bcdf0bbfb03998a87b5067ff9928ca94f46e638231
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a29280eedf28bde93a8485de1b90963ca69c84125cea86695b5935449e18f453
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:bda4b56b57284b5d776cea834f86539fa062d5e046885e07dcb7516921ccd6ee
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8447372867038351,
   "eval_steps": 500,
-  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3595,6 +3595,364 @@
       "eval_samples_per_second": 279.83,
       "eval_steps_per_second": 5.876,
       "step": 5000
     }
   ],
   "logging_steps": 10,
@@ -3614,7 +3972,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.6722841042944e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9292110153742186,
   "eval_steps": 500,
+  "global_step": 5500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 279.83,
       "eval_steps_per_second": 5.876,
       "step": 5000
+    },
+    {
+      "epoch": 0.8464267612772428,
+      "grad_norm": 0.559013307094574,
+      "learning_rate": 0.00023592089546305216,
+      "loss": 4.576456832885742,
+      "step": 5010
+    },
+    {
+      "epoch": 0.8481162358506504,
+      "grad_norm": 0.49753278493881226,
+      "learning_rate": 0.00023552782714923343,
+      "loss": 4.614764404296875,
+      "step": 5020
+    },
+    {
+      "epoch": 0.8498057104240582,
+      "grad_norm": 0.56475430727005,
+      "learning_rate": 0.00023513388668141118,
+      "loss": 4.58197250366211,
+      "step": 5030
+    },
+    {
+      "epoch": 0.8514951849974658,
+      "grad_norm": 0.4953176975250244,
+      "learning_rate": 0.00023473907807671952,
+      "loss": 4.605267333984375,
+      "step": 5040
+    },
+    {
+      "epoch": 0.8531846595708734,
+      "grad_norm": 0.5137485861778259,
+      "learning_rate": 0.00023434340536114531,
+      "loss": 4.596157836914062,
+      "step": 5050
+    },
+    {
+      "epoch": 0.8548741341442812,
+      "grad_norm": 0.5363683104515076,
+      "learning_rate": 0.00023394687256948697,
+      "loss": 4.594855499267578,
+      "step": 5060
+    },
+    {
+      "epoch": 0.8565636087176888,
+      "grad_norm": 0.5634586811065674,
+      "learning_rate": 0.00023354948374531344,
+      "loss": 4.589244842529297,
+      "step": 5070
+    },
+    {
+      "epoch": 0.8582530832910965,
+      "grad_norm": 0.5043785572052002,
+      "learning_rate": 0.00023315124294092277,
+      "loss": 4.576361083984375,
+      "step": 5080
+    },
+    {
+      "epoch": 0.8599425578645041,
+      "grad_norm": 0.5204640626907349,
+      "learning_rate": 0.000232752154217301,
+      "loss": 4.570015716552734,
+      "step": 5090
+    },
+    {
+      "epoch": 0.8616320324379118,
+      "grad_norm": 0.5251067280769348,
+      "learning_rate": 0.00023235222164408076,
+      "loss": 4.598841857910156,
+      "step": 5100
+    },
+    {
+      "epoch": 0.8633215070113195,
+      "grad_norm": 0.5268970131874084,
+      "learning_rate": 0.00023195144929949953,
+      "loss": 4.574850082397461,
+      "step": 5110
+    },
+    {
+      "epoch": 0.8650109815847271,
+      "grad_norm": 0.5099704265594482,
+      "learning_rate": 0.00023154984127035823,
+      "loss": 4.602288436889649,
+      "step": 5120
+    },
+    {
+      "epoch": 0.8667004561581348,
+      "grad_norm": 0.49661147594451904,
+      "learning_rate": 0.00023114740165197957,
+      "loss": 4.56927604675293,
+      "step": 5130
+    },
+    {
+      "epoch": 0.8683899307315425,
+      "grad_norm": 0.5453396439552307,
+      "learning_rate": 0.00023074413454816619,
+      "loss": 4.587471771240234,
+      "step": 5140
+    },
+    {
+      "epoch": 0.8700794053049502,
+      "grad_norm": 0.49157091975212097,
+      "learning_rate": 0.0002303400440711589,
+      "loss": 4.580040740966797,
+      "step": 5150
+    },
+    {
+      "epoch": 0.8717688798783578,
+      "grad_norm": 0.5203030705451965,
+      "learning_rate": 0.00022993513434159464,
+      "loss": 4.604449462890625,
+      "step": 5160
+    },
+    {
+      "epoch": 0.8734583544517655,
+      "grad_norm": 0.49596408009529114,
+      "learning_rate": 0.0002295294094884646,
+      "loss": 4.597255706787109,
+      "step": 5170
+    },
+    {
+      "epoch": 0.8751478290251732,
+      "grad_norm": 0.482197642326355,
+      "learning_rate": 0.00022912287364907204,
+      "loss": 4.575711822509765,
+      "step": 5180
+    },
+    {
+      "epoch": 0.8768373035985808,
+      "grad_norm": 0.49015575647354126,
+      "learning_rate": 0.00022871553096899,
+      "loss": 4.620565032958984,
+      "step": 5190
+    },
+    {
+      "epoch": 0.8785267781719885,
+      "grad_norm": 0.49912846088409424,
+      "learning_rate": 0.00022830738560201911,
+      "loss": 4.575767898559571,
+      "step": 5200
+    },
+    {
+      "epoch": 0.8802162527453962,
+      "grad_norm": 0.5013103485107422,
+      "learning_rate": 0.00022789844171014557,
+      "loss": 4.5688629150390625,
+      "step": 5210
+    },
+    {
+      "epoch": 0.8819057273188039,
+      "grad_norm": 0.5322986245155334,
+      "learning_rate": 0.00022748870346349796,
+      "loss": 4.590381622314453,
+      "step": 5220
+    },
+    {
+      "epoch": 0.8835952018922115,
+      "grad_norm": 0.5467557311058044,
+      "learning_rate": 0.00022707817504030538,
+      "loss": 4.598742294311523,
+      "step": 5230
+    },
+    {
+      "epoch": 0.8852846764656191,
+      "grad_norm": 0.5180667638778687,
+      "learning_rate": 0.0002266668606268545,
+      "loss": 4.550464630126953,
+      "step": 5240
+    },
+    {
+      "epoch": 0.8869741510390269,
+      "grad_norm": 0.5265566110610962,
+      "learning_rate": 0.00022625476441744715,
+      "loss": 4.595706176757813,
+      "step": 5250
+    },
+    {
+      "epoch": 0.8886636256124345,
+      "grad_norm": 0.5108802318572998,
+      "learning_rate": 0.00022584189061435725,
+      "loss": 4.564280700683594,
+      "step": 5260
+    },
+    {
+      "epoch": 0.8903531001858422,
+      "grad_norm": 0.5016060471534729,
+      "learning_rate": 0.00022542824342778806,
+      "loss": 4.561073303222656,
+      "step": 5270
+    },
+    {
+      "epoch": 0.8920425747592499,
+      "grad_norm": 0.531934916973114,
+      "learning_rate": 0.0002250138270758293,
+      "loss": 4.576354598999023,
+      "step": 5280
+    },
+    {
+      "epoch": 0.8937320493326576,
+      "grad_norm": 0.5116508603096008,
+      "learning_rate": 0.00022459864578441415,
+      "loss": 4.586645889282226,
+      "step": 5290
+    },
+    {
+      "epoch": 0.8954215239060652,
+      "grad_norm": 0.5074120163917542,
+      "learning_rate": 0.0002241827037872761,
+      "loss": 4.564454650878906,
+      "step": 5300
+    },
+    {
+      "epoch": 0.8971109984794728,
+      "grad_norm": 0.47892510890960693,
+      "learning_rate": 0.00022376600532590578,
+      "loss": 4.534092712402344,
+      "step": 5310
+    },
+    {
+      "epoch": 0.8988004730528806,
+      "grad_norm": 0.4657728970050812,
+      "learning_rate": 0.00022334855464950775,
+      "loss": 4.5831245422363285,
+      "step": 5320
+    },
+    {
+      "epoch": 0.9004899476262882,
+      "grad_norm": 0.4809263348579407,
+      "learning_rate": 0.00022293035601495708,
+      "loss": 4.578067398071289,
+      "step": 5330
+    },
+    {
+      "epoch": 0.9021794221996959,
+      "grad_norm": 0.5298095941543579,
+      "learning_rate": 0.00022251141368675607,
+      "loss": 4.576302719116211,
+      "step": 5340
+    },
+    {
+      "epoch": 0.9038688967731036,
+      "grad_norm": 0.5315806865692139,
+      "learning_rate": 0.00022209173193699067,
+      "loss": 4.582082748413086,
+      "step": 5350
+    },
+    {
+      "epoch": 0.9055583713465112,
+      "grad_norm": 0.5084795355796814,
+      "learning_rate": 0.00022167131504528695,
+      "loss": 4.5921672821044925,
+      "step": 5360
+    },
+    {
+      "epoch": 0.9072478459199189,
+      "grad_norm": 0.4921436011791229,
+      "learning_rate": 0.00022125016729876743,
+      "loss": 4.573263931274414,
+      "step": 5370
+    },
+    {
+      "epoch": 0.9089373204933265,
+      "grad_norm": 0.4985114336013794,
+      "learning_rate": 0.00022082829299200743,
+      "loss": 4.580036163330078,
+      "step": 5380
+    },
+    {
+      "epoch": 0.9106267950667343,
+      "grad_norm": 0.486751526594162,
+      "learning_rate": 0.00022040569642699112,
+      "loss": 4.554470062255859,
+      "step": 5390
+    },
+    {
+      "epoch": 0.9123162696401419,
+      "grad_norm": 0.49089571833610535,
+      "learning_rate": 0.00021998238191306798,
+      "loss": 4.53393783569336,
+      "step": 5400
+    },
+    {
+      "epoch": 0.9140057442135496,
+      "grad_norm": 0.5415358543395996,
+      "learning_rate": 0.00021955835376690841,
+      "loss": 4.545674514770508,
+      "step": 5410
+    },
+    {
+      "epoch": 0.9156952187869573,
+      "grad_norm": 0.5360416173934937,
+      "learning_rate": 0.00021913361631246004,
+      "loss": 4.559771728515625,
+      "step": 5420
+    },
+    {
+      "epoch": 0.9173846933603649,
+      "grad_norm": 0.5081850290298462,
+      "learning_rate": 0.0002187081738809036,
+      "loss": 4.550452804565429,
+      "step": 5430
+    },
+    {
+      "epoch": 0.9190741679337726,
+      "grad_norm": 0.5538184642791748,
+      "learning_rate": 0.00021828203081060858,
+      "loss": 4.558630752563476,
+      "step": 5440
+    },
+    {
+      "epoch": 0.9207636425071802,
+      "grad_norm": 0.4913816452026367,
+      "learning_rate": 0.00021785519144708912,
+      "loss": 4.532632446289062,
+      "step": 5450
+    },
+    {
+      "epoch": 0.922453117080588,
+      "grad_norm": 0.4864713251590729,
+      "learning_rate": 0.00021742766014295976,
+      "loss": 4.546042251586914,
+      "step": 5460
+    },
+    {
+      "epoch": 0.9241425916539956,
+      "grad_norm": 0.5309963822364807,
+      "learning_rate": 0.00021699944125789096,
+      "loss": 4.532254791259765,
+      "step": 5470
+    },
+    {
+      "epoch": 0.9258320662274033,
+      "grad_norm": 0.47752541303634644,
+      "learning_rate": 0.00021657053915856455,
+      "loss": 4.558838272094727,
+      "step": 5480
+    },
+    {
+      "epoch": 0.927521540800811,
+      "grad_norm": 0.47722741961479187,
+      "learning_rate": 0.0002161409582186294,
+      "loss": 4.583000183105469,
+      "step": 5490
+    },
+    {
+      "epoch": 0.9292110153742186,
+      "grad_norm": 0.5001320838928223,
+      "learning_rate": 0.0002157107028186567,
+      "loss": 4.5466560363769535,
+      "step": 5500
+    },
+    {
+      "epoch": 0.9292110153742186,
+      "eval_loss": 4.510837554931641,
+      "eval_runtime": 3.6228,
+      "eval_samples_per_second": 276.03,
+      "eval_steps_per_second": 5.797,
+      "step": 5500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.83951251472384e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null