Training in progress, step 3000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:283d5276f6c4df703f732f168423b8fd8e7cd8727b10c58aea9987cee9cff3c3
 size 448472762

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b93ceca8e88ff460f8ccb50f4380d6798124eb004993f41675d14510f8c47b7
 size 448472762

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15a028a8560b407920cd062f27de45149caa035962dd5aa0d563d68cc54d245d
 size 151589028

 version https://git-lfs.github.com/spec/v1
+oid sha256:5791df358e1c8a02bbb41e3d1e52d823a2a78d0ff48fd6f7de4f19e14e0bb520
 size 151589028

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5286b6772872b246ec7aba38755e4e29fdb152f506f0b8ad4b7accb9f2790bc1
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:cccb7abf0f8614f3fc64c31710fad6c824dca1edbb4986a5b9fb1ad1d2d802cb
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4da1f09a29f3dd222ad957789d5f90eb8fe01dcb86f4982648291ad5d61d7102
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:655ec14a75109d5e8c18da96c3a0f554fd551816773411140b362973eb5b2691
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0db29c17cd52fafd23b298fd61944bf267910041db4c417490e7a0ffd0ca7a3f
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:697c100484a8888e919d71fa6c0aefff1702c654a32d364f7623997e3c0d9e2d
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:76f91f6d25e2803893956b2bd4e8c56a3cc36b10bac74766bcd0ed3ea01b8d59
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ffb6ed56578248732f2cb9d5be51bee1d41b9fd8c2fcf9ccf47064ba796dd60
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21aaf28acccb9c3e0cc31ead108b163bffa2f4c4cf7745a201b283b65c5b5d34
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8dea3a8122c383e315053a97f608c6689c05237886892101fcacb12765eef233
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.005270897792020915,
   "eval_steps": 500,
-  "global_step": 2500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1758,6 +1758,356 @@
       "learning_rate": 0.0004992973173622721,
       "loss": 2.8356,
       "step": 2500
     }
   ],
   "logging_steps": 10,
@@ -1777,7 +2127,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.174730659936338e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.006325077350425098,
   "eval_steps": 500,
+  "global_step": 3000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004992973173622721,
       "loss": 2.8356,
       "step": 2500
+    },
+    {
+      "epoch": 0.005291981383188998,
+      "grad_norm": 0.921875,
+      "learning_rate": 0.0004992938021914981,
+      "loss": 2.8107,
+      "step": 2510
+    },
+    {
+      "epoch": 0.005313064974357082,
+      "grad_norm": 0.8203125,
+      "learning_rate": 0.000499290287020724,
+      "loss": 2.8029,
+      "step": 2520
+    },
+    {
+      "epoch": 0.005334148565525166,
+      "grad_norm": 0.69140625,
+      "learning_rate": 0.00049928677184995,
+      "loss": 2.8216,
+      "step": 2530
+    },
+    {
+      "epoch": 0.005355232156693249,
+      "grad_norm": 0.8203125,
+      "learning_rate": 0.0004992832566791761,
+      "loss": 2.8112,
+      "step": 2540
+    },
+    {
+      "epoch": 0.005376315747861333,
+      "grad_norm": 0.67578125,
+      "learning_rate": 0.000499279741508402,
+      "loss": 2.8168,
+      "step": 2550
+    },
+    {
+      "epoch": 0.005397399339029417,
+      "grad_norm": 0.734375,
+      "learning_rate": 0.0004992762263376279,
+      "loss": 2.8118,
+      "step": 2560
+    },
+    {
+      "epoch": 0.0054184829301975,
+      "grad_norm": 0.75390625,
+      "learning_rate": 0.000499272711166854,
+      "loss": 2.8016,
+      "step": 2570
+    },
+    {
+      "epoch": 0.005439566521365584,
+      "grad_norm": 0.72265625,
+      "learning_rate": 0.0004992691959960799,
+      "loss": 2.8125,
+      "step": 2580
+    },
+    {
+      "epoch": 0.005460650112533668,
+      "grad_norm": 0.76171875,
+      "learning_rate": 0.0004992656808253059,
+      "loss": 2.8082,
+      "step": 2590
+    },
+    {
+      "epoch": 0.005481733703701751,
+      "grad_norm": 0.77734375,
+      "learning_rate": 0.0004992621656545319,
+      "loss": 2.7916,
+      "step": 2600
+    },
+    {
+      "epoch": 0.005502817294869835,
+      "grad_norm": 0.765625,
+      "learning_rate": 0.0004992586504837578,
+      "loss": 2.8011,
+      "step": 2610
+    },
+    {
+      "epoch": 0.005523900886037919,
+      "grad_norm": 0.7734375,
+      "learning_rate": 0.0004992551353129838,
+      "loss": 2.7912,
+      "step": 2620
+    },
+    {
+      "epoch": 0.005544984477206002,
+      "grad_norm": 0.71484375,
+      "learning_rate": 0.0004992516201422098,
+      "loss": 2.7906,
+      "step": 2630
+    },
+    {
+      "epoch": 0.005566068068374086,
+      "grad_norm": 0.75390625,
+      "learning_rate": 0.0004992481049714357,
+      "loss": 2.7852,
+      "step": 2640
+    },
+    {
+      "epoch": 0.005587151659542169,
+      "grad_norm": 0.71875,
+      "learning_rate": 0.0004992445898006617,
+      "loss": 2.7901,
+      "step": 2650
+    },
+    {
+      "epoch": 0.005608235250710253,
+      "grad_norm": 0.7890625,
+      "learning_rate": 0.0004992410746298877,
+      "loss": 2.777,
+      "step": 2660
+    },
+    {
+      "epoch": 0.005629318841878337,
+      "grad_norm": 0.76953125,
+      "learning_rate": 0.0004992375594591136,
+      "loss": 2.7892,
+      "step": 2670
+    },
+    {
+      "epoch": 0.0056504024330464205,
+      "grad_norm": 0.83203125,
+      "learning_rate": 0.0004992340442883396,
+      "loss": 2.7684,
+      "step": 2680
+    },
+    {
+      "epoch": 0.005671486024214504,
+      "grad_norm": 0.671875,
+      "learning_rate": 0.0004992305291175657,
+      "loss": 2.7866,
+      "step": 2690
+    },
+    {
+      "epoch": 0.005692569615382588,
+      "grad_norm": 0.8203125,
+      "learning_rate": 0.0004992270139467916,
+      "loss": 2.7855,
+      "step": 2700
+    },
+    {
+      "epoch": 0.0057136532065506715,
+      "grad_norm": 0.73046875,
+      "learning_rate": 0.0004992234987760175,
+      "loss": 2.7744,
+      "step": 2710
+    },
+    {
+      "epoch": 0.0057347367977187555,
+      "grad_norm": 0.66796875,
+      "learning_rate": 0.0004992199836052436,
+      "loss": 2.7805,
+      "step": 2720
+    },
+    {
+      "epoch": 0.005755820388886839,
+      "grad_norm": 0.7734375,
+      "learning_rate": 0.0004992164684344695,
+      "loss": 2.768,
+      "step": 2730
+    },
+    {
+      "epoch": 0.0057769039800549225,
+      "grad_norm": 0.6796875,
+      "learning_rate": 0.0004992129532636955,
+      "loss": 2.7693,
+      "step": 2740
+    },
+    {
+      "epoch": 0.0057979875712230065,
+      "grad_norm": 0.81640625,
+      "learning_rate": 0.0004992094380929214,
+      "loss": 2.7611,
+      "step": 2750
+    },
+    {
+      "epoch": 0.0058190711623910905,
+      "grad_norm": 0.8125,
+      "learning_rate": 0.0004992059229221474,
+      "loss": 2.7758,
+      "step": 2760
+    },
+    {
+      "epoch": 0.005840154753559174,
+      "grad_norm": 0.76953125,
+      "learning_rate": 0.0004992024077513734,
+      "loss": 2.7578,
+      "step": 2770
+    },
+    {
+      "epoch": 0.0058612383447272575,
+      "grad_norm": 0.7421875,
+      "learning_rate": 0.0004991988925805993,
+      "loss": 2.7572,
+      "step": 2780
+    },
+    {
+      "epoch": 0.005882321935895341,
+      "grad_norm": 0.75,
+      "learning_rate": 0.0004991953774098253,
+      "loss": 2.7452,
+      "step": 2790
+    },
+    {
+      "epoch": 0.005903405527063425,
+      "grad_norm": 0.68359375,
+      "learning_rate": 0.0004991918622390514,
+      "loss": 2.7658,
+      "step": 2800
+    },
+    {
+      "epoch": 0.005924489118231509,
+      "grad_norm": 0.69140625,
+      "learning_rate": 0.0004991883470682772,
+      "loss": 2.77,
+      "step": 2810
+    },
+    {
+      "epoch": 0.005945572709399592,
+      "grad_norm": 0.7421875,
+      "learning_rate": 0.0004991848318975032,
+      "loss": 2.7502,
+      "step": 2820
+    },
+    {
+      "epoch": 0.005966656300567676,
+      "grad_norm": 0.70703125,
+      "learning_rate": 0.0004991813167267293,
+      "loss": 2.7627,
+      "step": 2830
+    },
+    {
+      "epoch": 0.00598773989173576,
+      "grad_norm": 0.671875,
+      "learning_rate": 0.0004991778015559552,
+      "loss": 2.7606,
+      "step": 2840
+    },
+    {
+      "epoch": 0.006008823482903843,
+      "grad_norm": 0.81640625,
+      "learning_rate": 0.0004991742863851812,
+      "loss": 2.7442,
+      "step": 2850
+    },
+    {
+      "epoch": 0.006029907074071927,
+      "grad_norm": 0.87109375,
+      "learning_rate": 0.0004991707712144072,
+      "loss": 2.7492,
+      "step": 2860
+    },
+    {
+      "epoch": 0.006050990665240011,
+      "grad_norm": 0.83984375,
+      "learning_rate": 0.0004991672560436331,
+      "loss": 2.734,
+      "step": 2870
+    },
+    {
+      "epoch": 0.006072074256408094,
+      "grad_norm": 0.7265625,
+      "learning_rate": 0.0004991637408728591,
+      "loss": 2.7395,
+      "step": 2880
+    },
+    {
+      "epoch": 0.006093157847576178,
+      "grad_norm": 0.65625,
+      "learning_rate": 0.0004991602257020851,
+      "loss": 2.7505,
+      "step": 2890
+    },
+    {
+      "epoch": 0.006114241438744262,
+      "grad_norm": 0.69140625,
+      "learning_rate": 0.000499156710531311,
+      "loss": 2.7593,
+      "step": 2900
+    },
+    {
+      "epoch": 0.006135325029912345,
+      "grad_norm": 0.76171875,
+      "learning_rate": 0.000499153195360537,
+      "loss": 2.734,
+      "step": 2910
+    },
+    {
+      "epoch": 0.006156408621080429,
+      "grad_norm": 0.76953125,
+      "learning_rate": 0.000499149680189763,
+      "loss": 2.763,
+      "step": 2920
+    },
+    {
+      "epoch": 0.006177492212248512,
+      "grad_norm": 0.71484375,
+      "learning_rate": 0.0004991461650189889,
+      "loss": 2.7495,
+      "step": 2930
+    },
+    {
+      "epoch": 0.006198575803416596,
+      "grad_norm": 0.7421875,
+      "learning_rate": 0.0004991426498482149,
+      "loss": 2.7354,
+      "step": 2940
+    },
+    {
+      "epoch": 0.00621965939458468,
+      "grad_norm": 0.75,
+      "learning_rate": 0.000499139134677441,
+      "loss": 2.7663,
+      "step": 2950
+    },
+    {
+      "epoch": 0.006240742985752763,
+      "grad_norm": 0.73046875,
+      "learning_rate": 0.0004991356195066668,
+      "loss": 2.7337,
+      "step": 2960
+    },
+    {
+      "epoch": 0.006261826576920847,
+      "grad_norm": 0.70703125,
+      "learning_rate": 0.0004991321043358928,
+      "loss": 2.7446,
+      "step": 2970
+    },
+    {
+      "epoch": 0.006282910168088931,
+      "grad_norm": 0.8203125,
+      "learning_rate": 0.0004991285891651189,
+      "loss": 2.7338,
+      "step": 2980
+    },
+    {
+      "epoch": 0.006303993759257014,
+      "grad_norm": 0.7734375,
+      "learning_rate": 0.0004991250739943448,
+      "loss": 2.7538,
+      "step": 2990
+    },
+    {
+      "epoch": 0.006325077350425098,
+      "grad_norm": 0.76171875,
+      "learning_rate": 0.0004991215588235708,
+      "loss": 2.716,
+      "step": 3000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 9.80967272673706e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null