Training in progress, step 56000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e053ce558edbe337e73306d7fbe26e01c3b3739e9654841d2ecf6ce0bd40f9e
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:5fe736c4aa2a01e7bdd450b3f5ad17d22bd6d998c21f3be88229c094c87c7e31
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dff8d75ea38f47739184ddb4f15d59881c040611b60a73f3dec49872f53b5d63
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:f13c5595cffc9acc4fba913e67571bbfa169120e968c56adede64d35dc4a9983
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d696ac112fd37df04d162968bc7673857914fcc279bf6c80c805e522e1b12140
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:eefd6eabe10776e158c26b037c833f0a538e87ecc5b41f3ec5b83db2ee085222
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:19cc7a4a8979e8130433cf8829e3378f90d0f833f68939500ebb226cc11a1bb7
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:61b622224d0429fa788320c8e5bf7b4fa226b91d5779b03ff807c7a77c5801ff
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f437dbdcbf6f4690950881f29edcf36177f37c3662f7c0fbeeb5b9a736d2d9b
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:13eaed908712a1c285ee1e1812b438bbabf64c8443377b65c97ba88f1f1659c5
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34e8b215e64db84cfc27164b1a2e2325c8274d4b1e175ee122414e5295198a22
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:511643e6249f4ea9212a1dfdf8dd72a9148b63815d4ef9de03948ab4598161f3
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1887d8c6d2dc250cfb0b7c57e61e4fa0abc40fda0dbe8977a6841b90daceb70
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0989f631c4201212ca348622ae2d095f9b6b69c39f42732c5c97cef21592c5a6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.08147230830306514,
   "eval_steps": 500,
-  "global_step": 55000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -19258,6 +19258,356 @@
       "learning_rate": 0.0004865416490511318,
       "loss": 17.1327,
       "step": 55000
     }
   ],
   "logging_steps": 20,
@@ -19277,7 +19627,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.043591890870731e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0829536229994845,
   "eval_steps": 500,
+  "global_step": 56000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004865416490511318,
       "loss": 17.1327,
       "step": 55000
+    },
+    {
+      "epoch": 0.08150193459699352,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.00048653671011612425,
+      "loss": 17.1478,
+      "step": 55020
+    },
+    {
+      "epoch": 0.08153156089092191,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.0004865317711811167,
+      "loss": 17.1158,
+      "step": 55040
+    },
+    {
+      "epoch": 0.0815611871848503,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.0004865268322461091,
+      "loss": 17.1082,
+      "step": 55060
+    },
+    {
+      "epoch": 0.08159081347877868,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.00048652189331110154,
+      "loss": 17.1178,
+      "step": 55080
+    },
+    {
+      "epoch": 0.08162043977270707,
+      "grad_norm": 7.125,
+      "learning_rate": 0.000486516954376094,
+      "loss": 17.0626,
+      "step": 55100
+    },
+    {
+      "epoch": 0.08165006606663545,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.00048651201544108643,
+      "loss": 17.1654,
+      "step": 55120
+    },
+    {
+      "epoch": 0.08167969236056384,
+      "grad_norm": 6.375,
+      "learning_rate": 0.0004865070765060788,
+      "loss": 17.1095,
+      "step": 55140
+    },
+    {
+      "epoch": 0.08170931865449224,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.0004865021375710713,
+      "loss": 17.1073,
+      "step": 55160
+    },
+    {
+      "epoch": 0.08173894494842063,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.0004864971986360637,
+      "loss": 17.1573,
+      "step": 55180
+    },
+    {
+      "epoch": 0.08176857124234901,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.00048649225970105617,
+      "loss": 17.1434,
+      "step": 55200
+    },
+    {
+      "epoch": 0.0817981975362774,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.00048648732076604856,
+      "loss": 17.0566,
+      "step": 55220
+    },
+    {
+      "epoch": 0.08182782383020579,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.00048648238183104106,
+      "loss": 17.1302,
+      "step": 55240
+    },
+    {
+      "epoch": 0.08185745012413417,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.00048647744289603346,
+      "loss": 17.1324,
+      "step": 55260
+    },
+    {
+      "epoch": 0.08188707641806256,
+      "grad_norm": 12.125,
+      "learning_rate": 0.0004864725039610259,
+      "loss": 17.0792,
+      "step": 55280
+    },
+    {
+      "epoch": 0.08191670271199095,
+      "grad_norm": 7.90625,
+      "learning_rate": 0.0004864675650260183,
+      "loss": 17.1382,
+      "step": 55300
+    },
+    {
+      "epoch": 0.08194632900591933,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.00048646262609101075,
+      "loss": 17.1347,
+      "step": 55320
+    },
+    {
+      "epoch": 0.08197595529984772,
+      "grad_norm": 7.25,
+      "learning_rate": 0.0004864576871560032,
+      "loss": 17.0667,
+      "step": 55340
+    },
+    {
+      "epoch": 0.0820055815937761,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004864527482209956,
+      "loss": 17.0842,
+      "step": 55360
+    },
+    {
+      "epoch": 0.08203520788770449,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.00048644780928598804,
+      "loss": 17.1114,
+      "step": 55380
+    },
+    {
+      "epoch": 0.08206483418163288,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.0004864428703509805,
+      "loss": 17.089,
+      "step": 55400
+    },
+    {
+      "epoch": 0.08209446047556127,
+      "grad_norm": 7.875,
+      "learning_rate": 0.00048643793141597293,
+      "loss": 17.1292,
+      "step": 55420
+    },
+    {
+      "epoch": 0.08212408676948965,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.0004864329924809653,
+      "loss": 17.1258,
+      "step": 55440
+    },
+    {
+      "epoch": 0.08215371306341804,
+      "grad_norm": 6.28125,
+      "learning_rate": 0.0004864280535459578,
+      "loss": 17.1261,
+      "step": 55460
+    },
+    {
+      "epoch": 0.08218333935734644,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004864231146109502,
+      "loss": 17.0684,
+      "step": 55480
+    },
+    {
+      "epoch": 0.08221296565127482,
+      "grad_norm": 7.625,
+      "learning_rate": 0.00048641817567594267,
+      "loss": 17.0862,
+      "step": 55500
+    },
+    {
+      "epoch": 0.08224259194520321,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.00048641323674093506,
+      "loss": 17.1307,
+      "step": 55520
+    },
+    {
+      "epoch": 0.0822722182391316,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.00048640829780592756,
+      "loss": 17.0831,
+      "step": 55540
+    },
+    {
+      "epoch": 0.08230184453305998,
+      "grad_norm": 7.0,
+      "learning_rate": 0.00048640335887091996,
+      "loss": 17.0549,
+      "step": 55560
+    },
+    {
+      "epoch": 0.08233147082698837,
+      "grad_norm": 6.625,
+      "learning_rate": 0.0004863984199359124,
+      "loss": 17.1213,
+      "step": 55580
+    },
+    {
+      "epoch": 0.08236109712091676,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.0004863934810009048,
+      "loss": 17.1498,
+      "step": 55600
+    },
+    {
+      "epoch": 0.08239072341484514,
+      "grad_norm": 6.25,
+      "learning_rate": 0.0004863885420658973,
+      "loss": 17.0782,
+      "step": 55620
+    },
+    {
+      "epoch": 0.08242034970877353,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.0004863836031308897,
+      "loss": 17.1037,
+      "step": 55640
+    },
+    {
+      "epoch": 0.08244997600270192,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.0004863786641958821,
+      "loss": 17.1475,
+      "step": 55660
+    },
+    {
+      "epoch": 0.0824796022966303,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.00048637372526087454,
+      "loss": 17.1295,
+      "step": 55680
+    },
+    {
+      "epoch": 0.08250922859055869,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.000486368786325867,
+      "loss": 17.0528,
+      "step": 55700
+    },
+    {
+      "epoch": 0.08253885488448708,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.00048636384739085943,
+      "loss": 17.0928,
+      "step": 55720
+    },
+    {
+      "epoch": 0.08256848117841546,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.0004863589084558518,
+      "loss": 17.0391,
+      "step": 55740
+    },
+    {
+      "epoch": 0.08259810747234385,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.0004863539695208443,
+      "loss": 17.1124,
+      "step": 55760
+    },
+    {
+      "epoch": 0.08262773376627225,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004863490305858367,
+      "loss": 17.1316,
+      "step": 55780
+    },
+    {
+      "epoch": 0.08265736006020064,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.00048634409165082917,
+      "loss": 17.081,
+      "step": 55800
+    },
+    {
+      "epoch": 0.08268698635412902,
+      "grad_norm": 7.625,
+      "learning_rate": 0.00048633915271582156,
+      "loss": 17.064,
+      "step": 55820
+    },
+    {
+      "epoch": 0.08271661264805741,
+      "grad_norm": 5.96875,
+      "learning_rate": 0.00048633421378081406,
+      "loss": 17.0937,
+      "step": 55840
+    },
+    {
+      "epoch": 0.0827462389419858,
+      "grad_norm": 7.75,
+      "learning_rate": 0.00048632927484580646,
+      "loss": 17.0401,
+      "step": 55860
+    },
+    {
+      "epoch": 0.08277586523591418,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.0004863243359107989,
+      "loss": 17.0646,
+      "step": 55880
+    },
+    {
+      "epoch": 0.08280549152984257,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.0004863193969757913,
+      "loss": 17.0864,
+      "step": 55900
+    },
+    {
+      "epoch": 0.08283511782377095,
+      "grad_norm": 7.90625,
+      "learning_rate": 0.0004863144580407838,
+      "loss": 17.0813,
+      "step": 55920
+    },
+    {
+      "epoch": 0.08286474411769934,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004863095191057762,
+      "loss": 17.0242,
+      "step": 55940
+    },
+    {
+      "epoch": 0.08289437041162773,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.00048630458017076864,
+      "loss": 17.1233,
+      "step": 55960
+    },
+    {
+      "epoch": 0.08292399670555611,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.00048629964123576104,
+      "loss": 17.0682,
+      "step": 55980
+    },
+    {
+      "epoch": 0.0829536229994845,
+      "grad_norm": 7.375,
+      "learning_rate": 0.0004862947023007535,
+      "loss": 17.0628,
+      "step": 56000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 4.117128426295394e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null