Training in progress, step 5493, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +347 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f4ce5b1352e184a82241e863626c4a62ae31746f2d8177ffeaa8982c170f6186
 size 2718107304

 version https://git-lfs.github.com/spec/v1
+oid sha256:2807387c3f7c038eca212dca41a58ecfff1755585862e7e2318b6286dd29cb8f
 size 2718107304

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a213c86e42b52fb36eb4043f5ff5fb85cd6c856634144ab014cad96b1d38a5f1
 size 145486330

 version https://git-lfs.github.com/spec/v1
+oid sha256:71dd57ecc32f710a2531c2b41f6cbb162801c9ad0f9bb31b277daffb7fe2f9b4
 size 145486330

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f8c6a28f5372544493b855a8a168867b253fd6506322bb138a55ab7d729a0e7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0ad39c2fd71a09f27709f37c0a489f4c2b0a997a89343f75cb61234192319689
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7b957bbd71ba88b1567b81dfc55fdf0d0c49eef2d758e0863ecbda488001df2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2455594c5b90eff022ef3ec1c714caddd8dcf4c8dacec82303fc8c5605b9f1d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9102494083378846,
   "eval_steps": 1000,
-  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3547,6 +3547,349 @@
       "eval_samples_per_second": 9.646,
       "eval_steps_per_second": 1.206,
       "step": 5000
     }
   ],
   "logging_steps": 10,
@@ -3561,12 +3904,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.16907232985088e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 1000,
+  "global_step": 5493,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.646,
       "eval_steps_per_second": 1.206,
       "step": 5000
+    },
+    {
+      "epoch": 0.9120699071545604,
+      "grad_norm": 4.849244117736816,
+      "learning_rate": 2.291214703757982e-06,
+      "loss": 2.3958,
+      "step": 5010
+    },
+    {
+      "epoch": 0.9138904059712362,
+      "grad_norm": 4.8128204345703125,
+      "learning_rate": 2.1980167035280163e-06,
+      "loss": 2.4288,
+      "step": 5020
+    },
+    {
+      "epoch": 0.9157109047879118,
+      "grad_norm": 5.573403835296631,
+      "learning_rate": 2.1067111388414163e-06,
+      "loss": 2.4134,
+      "step": 5030
+    },
+    {
+      "epoch": 0.9175314036045876,
+      "grad_norm": 4.9487504959106445,
+      "learning_rate": 2.0173016243995866e-06,
+      "loss": 2.4095,
+      "step": 5040
+    },
+    {
+      "epoch": 0.9193519024212634,
+      "grad_norm": 4.933927536010742,
+      "learning_rate": 1.929791699841066e-06,
+      "loss": 2.4014,
+      "step": 5050
+    },
+    {
+      "epoch": 0.9211724012379392,
+      "grad_norm": 5.116062641143799,
+      "learning_rate": 1.844184829601453e-06,
+      "loss": 2.4196,
+      "step": 5060
+    },
+    {
+      "epoch": 0.922992900054615,
+      "grad_norm": 4.888516902923584,
+      "learning_rate": 1.7604844027761802e-06,
+      "loss": 2.4418,
+      "step": 5070
+    },
+    {
+      "epoch": 0.9248133988712908,
+      "grad_norm": 4.990447998046875,
+      "learning_rate": 1.6786937329864027e-06,
+      "loss": 2.4049,
+      "step": 5080
+    },
+    {
+      "epoch": 0.9266338976879666,
+      "grad_norm": 4.672518253326416,
+      "learning_rate": 1.5988160582477818e-06,
+      "loss": 2.3873,
+      "step": 5090
+    },
+    {
+      "epoch": 0.9284543965046422,
+      "grad_norm": 5.029353618621826,
+      "learning_rate": 1.5208545408423092e-06,
+      "loss": 2.4754,
+      "step": 5100
+    },
+    {
+      "epoch": 0.930274895321318,
+      "grad_norm": 4.660059928894043,
+      "learning_rate": 1.444812267193102e-06,
+      "loss": 2.4081,
+      "step": 5110
+    },
+    {
+      "epoch": 0.9320953941379938,
+      "grad_norm": 5.001034259796143,
+      "learning_rate": 1.3706922477422336e-06,
+      "loss": 2.4014,
+      "step": 5120
+    },
+    {
+      "epoch": 0.9339158929546696,
+      "grad_norm": 5.1275858879089355,
+      "learning_rate": 1.2984974168315234e-06,
+      "loss": 2.4251,
+      "step": 5130
+    },
+    {
+      "epoch": 0.9357363917713454,
+      "grad_norm": 4.893324375152588,
+      "learning_rate": 1.2282306325864135e-06,
+      "loss": 2.4196,
+      "step": 5140
+    },
+    {
+      "epoch": 0.9375568905880212,
+      "grad_norm": 4.734968662261963,
+      "learning_rate": 1.1598946768027863e-06,
+      "loss": 2.401,
+      "step": 5150
+    },
+    {
+      "epoch": 0.9393773894046968,
+      "grad_norm": 4.66255521774292,
+      "learning_rate": 1.0934922548368254e-06,
+      "loss": 2.3846,
+      "step": 5160
+    },
+    {
+      "epoch": 0.9411978882213726,
+      "grad_norm": 4.771427631378174,
+      "learning_rate": 1.0290259954979397e-06,
+      "loss": 2.3953,
+      "step": 5170
+    },
+    {
+      "epoch": 0.9430183870380484,
+      "grad_norm": 4.673166275024414,
+      "learning_rate": 9.664984509446917e-07,
+      "loss": 2.3694,
+      "step": 5180
+    },
+    {
+      "epoch": 0.9448388858547242,
+      "grad_norm": 4.778134346008301,
+      "learning_rate": 9.059120965837331e-07,
+      "loss": 2.3948,
+      "step": 5190
+    },
+    {
+      "epoch": 0.9466593846714,
+      "grad_norm": 4.706231594085693,
+      "learning_rate": 8.472693309718283e-07,
+      "loss": 2.4153,
+      "step": 5200
+    },
+    {
+      "epoch": 0.9484798834880758,
+      "grad_norm": 4.645259380340576,
+      "learning_rate": 7.905724757208965e-07,
+      "loss": 2.3806,
+      "step": 5210
+    },
+    {
+      "epoch": 0.9503003823047516,
+      "grad_norm": 5.04796838760376,
+      "learning_rate": 7.358237754060915e-07,
+      "loss": 2.454,
+      "step": 5220
+    },
+    {
+      "epoch": 0.9521208811214272,
+      "grad_norm": 4.7881646156311035,
+      "learning_rate": 6.830253974769496e-07,
+      "loss": 2.4161,
+      "step": 5230
+    },
+    {
+      "epoch": 0.953941379938103,
+      "grad_norm": 4.7254743576049805,
+      "learning_rate": 6.321794321715757e-07,
+      "loss": 2.4715,
+      "step": 5240
+    },
+    {
+      "epoch": 0.9557618787547788,
+      "grad_norm": 5.13754415512085,
+      "learning_rate": 5.832878924338869e-07,
+      "loss": 2.4191,
+      "step": 5250
+    },
+    {
+      "epoch": 0.9575823775714546,
+      "grad_norm": 4.781599998474121,
+      "learning_rate": 5.363527138339597e-07,
+      "loss": 2.4127,
+      "step": 5260
+    },
+    {
+      "epoch": 0.9594028763881304,
+      "grad_norm": 4.541421413421631,
+      "learning_rate": 4.913757544913355e-07,
+      "loss": 2.3908,
+      "step": 5270
+    },
+    {
+      "epoch": 0.9612233752048062,
+      "grad_norm": 5.078845500946045,
+      "learning_rate": 4.4835879500153556e-07,
+      "loss": 2.4303,
+      "step": 5280
+    },
+    {
+      "epoch": 0.9630438740214818,
+      "grad_norm": 4.745322227478027,
+      "learning_rate": 4.0730353836549993e-07,
+      "loss": 2.4046,
+      "step": 5290
+    },
+    {
+      "epoch": 0.9648643728381576,
+      "grad_norm": 4.688536643981934,
+      "learning_rate": 3.6821160992221993e-07,
+      "loss": 2.4456,
+      "step": 5300
+    },
+    {
+      "epoch": 0.9666848716548334,
+      "grad_norm": 4.9088592529296875,
+      "learning_rate": 3.310845572843557e-07,
+      "loss": 2.3846,
+      "step": 5310
+    },
+    {
+      "epoch": 0.9685053704715092,
+      "grad_norm": 5.126766681671143,
+      "learning_rate": 2.959238502769912e-07,
+      "loss": 2.4093,
+      "step": 5320
+    },
+    {
+      "epoch": 0.970325869288185,
+      "grad_norm": 4.49152946472168,
+      "learning_rate": 2.6273088087943597e-07,
+      "loss": 2.3837,
+      "step": 5330
+    },
+    {
+      "epoch": 0.9721463681048608,
+      "grad_norm": 4.944559097290039,
+      "learning_rate": 2.315069631701139e-07,
+      "loss": 2.3791,
+      "step": 5340
+    },
+    {
+      "epoch": 0.9739668669215366,
+      "grad_norm": 4.91040563583374,
+      "learning_rate": 2.022533332745602e-07,
+      "loss": 2.4035,
+      "step": 5350
+    },
+    {
+      "epoch": 0.9757873657382122,
+      "grad_norm": 4.91538143157959,
+      "learning_rate": 1.7497114931644965e-07,
+      "loss": 2.4057,
+      "step": 5360
+    },
+    {
+      "epoch": 0.977607864554888,
+      "grad_norm": 5.63076114654541,
+      "learning_rate": 1.496614913717831e-07,
+      "loss": 2.3627,
+      "step": 5370
+    },
+    {
+      "epoch": 0.9794283633715638,
+      "grad_norm": 4.944591045379639,
+      "learning_rate": 1.2632536142609397e-07,
+      "loss": 2.3662,
+      "step": 5380
+    },
+    {
+      "epoch": 0.9812488621882396,
+      "grad_norm": 4.864638328552246,
+      "learning_rate": 1.0496368333482442e-07,
+      "loss": 2.3704,
+      "step": 5390
+    },
+    {
+      "epoch": 0.9830693610049154,
+      "grad_norm": 4.991931438446045,
+      "learning_rate": 8.557730278669906e-08,
+      "loss": 2.3767,
+      "step": 5400
+    },
+    {
+      "epoch": 0.9848898598215912,
+      "grad_norm": 4.382468223571777,
+      "learning_rate": 6.816698727029614e-08,
+      "loss": 2.4112,
+      "step": 5410
+    },
+    {
+      "epoch": 0.9867103586382668,
+      "grad_norm": 44.841453552246094,
+      "learning_rate": 5.273342604361631e-08,
+      "loss": 2.4092,
+      "step": 5420
+    },
+    {
+      "epoch": 0.9885308574549426,
+      "grad_norm": 4.815988063812256,
+      "learning_rate": 3.9277230106832264e-08,
+      "loss": 2.4256,
+      "step": 5430
+    },
+    {
+      "epoch": 0.9903513562716184,
+      "grad_norm": 4.87392520904541,
+      "learning_rate": 2.7798932178080274e-08,
+      "loss": 2.3936,
+      "step": 5440
+    },
+    {
+      "epoch": 0.9921718550882942,
+      "grad_norm": 5.1465559005737305,
+      "learning_rate": 1.829898667237151e-08,
+      "loss": 2.3805,
+      "step": 5450
+    },
+    {
+      "epoch": 0.99399235390497,
+      "grad_norm": 4.486802101135254,
+      "learning_rate": 1.0777769683617544e-08,
+      "loss": 2.3492,
+      "step": 5460
+    },
+    {
+      "epoch": 0.9958128527216458,
+      "grad_norm": 5.0049614906311035,
+      "learning_rate": 5.2355789697144945e-09,
+      "loss": 2.4414,
+      "step": 5470
+    },
+    {
+      "epoch": 0.9976333515383216,
+      "grad_norm": 4.7070441246032715,
+      "learning_rate": 1.6726339407857616e-09,
+      "loss": 2.4294,
+      "step": 5480
+    },
+    {
+      "epoch": 0.9994538503549972,
+      "grad_norm": 4.9832539558410645,
+      "learning_rate": 8.907565046678557e-11,
+      "loss": 2.3724,
+      "step": 5490
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.2843428615741768e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null