Training in progress, step 57000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5fe736c4aa2a01e7bdd450b3f5ad17d22bd6d998c21f3be88229c094c87c7e31
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:972aa91ec388a1f2f04b57475bbe0ef1d7a488751339adb89aa78c0871d0f22b
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f13c5595cffc9acc4fba913e67571bbfa169120e968c56adede64d35dc4a9983
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:1f588ba0d0b39a0c0daf2cb6afacca8a7aef1f4bc72fe4409ce0b2281d2e356a
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eefd6eabe10776e158c26b037c833f0a538e87ecc5b41f3ec5b83db2ee085222
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:93c5029373839975c8e2ce486239c3c93c8bcc84856a9726f25e6b39e80d4bdb
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61b622224d0429fa788320c8e5bf7b4fa226b91d5779b03ff807c7a77c5801ff
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba86940b99fa7512a6bd263e7bdaf7ba94fc8e695324bdfda4c03882f64aa78d
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13eaed908712a1c285ee1e1812b438bbabf64c8443377b65c97ba88f1f1659c5
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf8627c515e0a9fd4095a16f3cf6f960eebbddd06bd5667ffafe332a0150e802
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:511643e6249f4ea9212a1dfdf8dd72a9148b63815d4ef9de03948ab4598161f3
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:55937ecae83bb1b9ebb2721682f64ea1aca1aefba9e61d245b7d516977f878f9
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0989f631c4201212ca348622ae2d095f9b6b69c39f42732c5c97cef21592c5a6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:039c09879ba9a48ef7918776fd751a67234de8e6a37518ae707982e7427ed8c9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0829536229994845,
   "eval_steps": 500,
-  "global_step": 56000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -19608,6 +19608,356 @@
       "learning_rate": 0.0004862947023007535,
       "loss": 17.0628,
       "step": 56000
     }
   ],
   "logging_steps": 20,
@@ -19627,7 +19977,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.117128426295394e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.08443493769590386,
   "eval_steps": 500,
+  "global_step": 57000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004862947023007535,
       "loss": 17.0628,
       "step": 56000
+    },
+    {
+      "epoch": 0.08298324929341289,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00048628976336574593,
+      "loss": 17.0877,
+      "step": 56020
+    },
+    {
+      "epoch": 0.08301287558734127,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.0004862848244307383,
+      "loss": 17.0776,
+      "step": 56040
+    },
+    {
+      "epoch": 0.08304250188126966,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004862798854957308,
+      "loss": 17.0322,
+      "step": 56060
+    },
+    {
+      "epoch": 0.08307212817519805,
+      "grad_norm": 6.5,
+      "learning_rate": 0.0004862749465607232,
+      "loss": 17.0593,
+      "step": 56080
+    },
+    {
+      "epoch": 0.08310175446912645,
+      "grad_norm": 7.0,
+      "learning_rate": 0.00048627000762571567,
+      "loss": 17.0977,
+      "step": 56100
+    },
+    {
+      "epoch": 0.08313138076305483,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.00048626506869070806,
+      "loss": 17.0193,
+      "step": 56120
+    },
+    {
+      "epoch": 0.08316100705698322,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.00048626012975570057,
+      "loss": 17.0925,
+      "step": 56140
+    },
+    {
+      "epoch": 0.0831906333509116,
+      "grad_norm": 7.5,
+      "learning_rate": 0.00048625519082069296,
+      "loss": 17.04,
+      "step": 56160
+    },
+    {
+      "epoch": 0.08322025964483999,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.0004862502518856854,
+      "loss": 17.1382,
+      "step": 56180
+    },
+    {
+      "epoch": 0.08324988593876838,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.0004862453129506778,
+      "loss": 17.0622,
+      "step": 56200
+    },
+    {
+      "epoch": 0.08327951223269676,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004862403740156703,
+      "loss": 17.0424,
+      "step": 56220
+    },
+    {
+      "epoch": 0.08330913852662515,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.0004862354350806627,
+      "loss": 17.0722,
+      "step": 56240
+    },
+    {
+      "epoch": 0.08333876482055354,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.00048623049614565514,
+      "loss": 17.036,
+      "step": 56260
+    },
+    {
+      "epoch": 0.08336839111448192,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.00048622555721064754,
+      "loss": 17.0758,
+      "step": 56280
+    },
+    {
+      "epoch": 0.08339801740841031,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00048622061827564004,
+      "loss": 17.1354,
+      "step": 56300
+    },
+    {
+      "epoch": 0.0834276437023387,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.00048621567934063243,
+      "loss": 17.1577,
+      "step": 56320
+    },
+    {
+      "epoch": 0.08345726999626708,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.0004862107404056248,
+      "loss": 17.0566,
+      "step": 56340
+    },
+    {
+      "epoch": 0.08348689629019547,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.0004862058014706173,
+      "loss": 17.0633,
+      "step": 56360
+    },
+    {
+      "epoch": 0.08351652258412386,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004862008625356097,
+      "loss": 17.0427,
+      "step": 56380
+    },
+    {
+      "epoch": 0.08354614887805224,
+      "grad_norm": 6.875,
+      "learning_rate": 0.00048619592360060217,
+      "loss": 17.068,
+      "step": 56400
+    },
+    {
+      "epoch": 0.08357577517198064,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.00048619098466559456,
+      "loss": 17.0289,
+      "step": 56420
+    },
+    {
+      "epoch": 0.08360540146590903,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.00048618604573058707,
+      "loss": 17.0595,
+      "step": 56440
+    },
+    {
+      "epoch": 0.08363502775983742,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.00048618110679557946,
+      "loss": 17.0464,
+      "step": 56460
+    },
+    {
+      "epoch": 0.0836646540537658,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004861761678605719,
+      "loss": 17.0377,
+      "step": 56480
+    },
+    {
+      "epoch": 0.08369428034769419,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.0004861712289255643,
+      "loss": 17.1083,
+      "step": 56500
+    },
+    {
+      "epoch": 0.08372390664162258,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.0004861662899905568,
+      "loss": 17.0156,
+      "step": 56520
+    },
+    {
+      "epoch": 0.08375353293555096,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.0004861613510555492,
+      "loss": 17.1009,
+      "step": 56540
+    },
+    {
+      "epoch": 0.08378315922947935,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.00048615641212054164,
+      "loss": 17.0541,
+      "step": 56560
+    },
+    {
+      "epoch": 0.08381278552340773,
+      "grad_norm": 6.875,
+      "learning_rate": 0.00048615147318553404,
+      "loss": 17.0052,
+      "step": 56580
+    },
+    {
+      "epoch": 0.08384241181733612,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.00048614653425052654,
+      "loss": 17.0689,
+      "step": 56600
+    },
+    {
+      "epoch": 0.08387203811126451,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.00048614159531551893,
+      "loss": 17.0825,
+      "step": 56620
+    },
+    {
+      "epoch": 0.0839016644051929,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.0004861366563805114,
+      "loss": 17.0745,
+      "step": 56640
+    },
+    {
+      "epoch": 0.08393129069912128,
+      "grad_norm": 6.375,
+      "learning_rate": 0.0004861317174455038,
+      "loss": 17.0943,
+      "step": 56660
+    },
+    {
+      "epoch": 0.08396091699304967,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.0004861267785104962,
+      "loss": 16.9957,
+      "step": 56680
+    },
+    {
+      "epoch": 0.08399054328697805,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.00048612183957548867,
+      "loss": 17.0411,
+      "step": 56700
+    },
+    {
+      "epoch": 0.08402016958090644,
+      "grad_norm": 7.25,
+      "learning_rate": 0.00048611690064048106,
+      "loss": 17.0705,
+      "step": 56720
+    },
+    {
+      "epoch": 0.08404979587483484,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.00048611196170547357,
+      "loss": 17.0302,
+      "step": 56740
+    },
+    {
+      "epoch": 0.08407942216876323,
+      "grad_norm": 7.25,
+      "learning_rate": 0.00048610702277046596,
+      "loss": 17.0697,
+      "step": 56760
+    },
+    {
+      "epoch": 0.08410904846269161,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.0004861020838354584,
+      "loss": 16.9995,
+      "step": 56780
+    },
+    {
+      "epoch": 0.08413867475662,
+      "grad_norm": 6.625,
+      "learning_rate": 0.0004860971449004508,
+      "loss": 17.0243,
+      "step": 56800
+    },
+    {
+      "epoch": 0.08416830105054839,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.0004860922059654433,
+      "loss": 17.0269,
+      "step": 56820
+    },
+    {
+      "epoch": 0.08419792734447677,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.0004860872670304357,
+      "loss": 17.095,
+      "step": 56840
+    },
+    {
+      "epoch": 0.08422755363840516,
+      "grad_norm": 6.375,
+      "learning_rate": 0.00048608232809542814,
+      "loss": 16.9837,
+      "step": 56860
+    },
+    {
+      "epoch": 0.08425717993233355,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.00048607738916042054,
+      "loss": 17.0325,
+      "step": 56880
+    },
+    {
+      "epoch": 0.08428680622626193,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00048607245022541304,
+      "loss": 17.0513,
+      "step": 56900
+    },
+    {
+      "epoch": 0.08431643252019032,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.00048606751129040543,
+      "loss": 17.0624,
+      "step": 56920
+    },
+    {
+      "epoch": 0.0843460588141187,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004860625723553979,
+      "loss": 17.0052,
+      "step": 56940
+    },
+    {
+      "epoch": 0.08437568510804709,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.0004860576334203903,
+      "loss": 17.0705,
+      "step": 56960
+    },
+    {
+      "epoch": 0.08440531140197548,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.0004860526944853828,
+      "loss": 16.9932,
+      "step": 56980
+    },
+    {
+      "epoch": 0.08443493769590386,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.00048604775555037517,
+      "loss": 16.9712,
+      "step": 57000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 4.190665024641329e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null