Training in progress, step 16000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1a26dcf49a9adec930252211cc2ebdb39eacfeed22076666661d9caed214679
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:42301bc164cb007a8e9ffaaebd3b674826efaacc96f02799ea8c54ebdf5beff1
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c979f073b59bd49cc8c35d18515eb7ddd7a81fbd717bb34c90967a1f381c67c5
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:e166c3997353d811bb7375dab7e17cf88064b52029e8056c729ba4ae8d2e8f22
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ba6101cd4bf8f7ce3d46d2382a668adc549af4a1a22b84d941c0306451bad54
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8653c4f16bb3c4531444bd438e2a397c259c928e9f5a96f450fc3aa43ef0f5c
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c787066085cf3612c776a9db920df8b50e21936babcea06d25d76b00b6f6481
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:91185d0e7a47d1f7979000c680b3a146a800c2ff31f983b75b24ceb331884072
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d923993877b77c02936b0de7d9cac94e6fa202dfaca7291842d5e19deb2cdb37
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:be0be34d9684d804e2f3030fceca4c7b93603e6596a44aaf270c97cb1740b1da
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b72344aa5e79ea1dbe308108cb74bb1087ed0d7f8ad9e397ccfa76e1dcdde76c
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:e539799e7e99b66c33c364546118319f901c9765aa17eaf7cf8b17906c00c95a
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51acafd4471f7843be9ffc9528db012939ee248bf40f7127aa783c5f97813694
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ccc2a52ae0327def30cc40f7f273a4a1537961b9b580753fe57ec7ecdab69b35
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.029249601474179914,
   "eval_steps": 500,
-  "global_step": 15000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5258,6 +5258,356 @@
       "learning_rate": 0.0004952863526922635,
       "loss": 20.6348,
       "step": 15000
     }
   ],
   "logging_steps": 20,
@@ -5277,7 +5627,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.102761022435256e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.031199574905791908,
   "eval_steps": 500,
+  "global_step": 16000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004952863526922635,
       "loss": 20.6348,
       "step": 15000
+    },
+    {
+      "epoch": 0.029288600942812153,
+      "grad_norm": 11.0,
+      "learning_rate": 0.000495279850661354,
+      "loss": 20.6052,
+      "step": 15020
+    },
+    {
+      "epoch": 0.029327600411444395,
+      "grad_norm": 11.125,
+      "learning_rate": 0.0004952733486304447,
+      "loss": 20.6334,
+      "step": 15040
+    },
+    {
+      "epoch": 0.029366599880076633,
+      "grad_norm": 11.3125,
+      "learning_rate": 0.0004952668465995353,
+      "loss": 20.6751,
+      "step": 15060
+    },
+    {
+      "epoch": 0.029405599348708875,
+      "grad_norm": 14.75,
+      "learning_rate": 0.000495260344568626,
+      "loss": 20.5701,
+      "step": 15080
+    },
+    {
+      "epoch": 0.029444598817341114,
+      "grad_norm": 12.0,
+      "learning_rate": 0.0004952538425377166,
+      "loss": 20.5664,
+      "step": 15100
+    },
+    {
+      "epoch": 0.029483598285973352,
+      "grad_norm": 12.625,
+      "learning_rate": 0.0004952473405068073,
+      "loss": 20.5255,
+      "step": 15120
+    },
+    {
+      "epoch": 0.029522597754605594,
+      "grad_norm": 11.0,
+      "learning_rate": 0.000495240838475898,
+      "loss": 20.5584,
+      "step": 15140
+    },
+    {
+      "epoch": 0.029561597223237833,
+      "grad_norm": 13.0625,
+      "learning_rate": 0.0004952343364449886,
+      "loss": 20.6137,
+      "step": 15160
+    },
+    {
+      "epoch": 0.029600596691870075,
+      "grad_norm": 11.125,
+      "learning_rate": 0.0004952278344140793,
+      "loss": 20.5632,
+      "step": 15180
+    },
+    {
+      "epoch": 0.029639596160502314,
+      "grad_norm": 10.8125,
+      "learning_rate": 0.0004952213323831699,
+      "loss": 20.5356,
+      "step": 15200
+    },
+    {
+      "epoch": 0.029678595629134552,
+      "grad_norm": 12.25,
+      "learning_rate": 0.0004952148303522606,
+      "loss": 20.5935,
+      "step": 15220
+    },
+    {
+      "epoch": 0.029717595097766794,
+      "grad_norm": 9.875,
+      "learning_rate": 0.0004952083283213512,
+      "loss": 20.6133,
+      "step": 15240
+    },
+    {
+      "epoch": 0.029756594566399033,
+      "grad_norm": 12.75,
+      "learning_rate": 0.0004952018262904418,
+      "loss": 20.6443,
+      "step": 15260
+    },
+    {
+      "epoch": 0.02979559403503127,
+      "grad_norm": 13.4375,
+      "learning_rate": 0.0004951953242595325,
+      "loss": 20.6207,
+      "step": 15280
+    },
+    {
+      "epoch": 0.029834593503663513,
+      "grad_norm": 10.0625,
+      "learning_rate": 0.0004951888222286231,
+      "loss": 20.5027,
+      "step": 15300
+    },
+    {
+      "epoch": 0.029873592972295752,
+      "grad_norm": 11.8125,
+      "learning_rate": 0.0004951823201977138,
+      "loss": 20.534,
+      "step": 15320
+    },
+    {
+      "epoch": 0.029912592440927994,
+      "grad_norm": 11.0625,
+      "learning_rate": 0.0004951758181668044,
+      "loss": 20.5885,
+      "step": 15340
+    },
+    {
+      "epoch": 0.029951591909560232,
+      "grad_norm": 11.1875,
+      "learning_rate": 0.0004951693161358951,
+      "loss": 20.6046,
+      "step": 15360
+    },
+    {
+      "epoch": 0.02999059137819247,
+      "grad_norm": 11.6875,
+      "learning_rate": 0.0004951628141049857,
+      "loss": 20.4537,
+      "step": 15380
+    },
+    {
+      "epoch": 0.030029590846824713,
+      "grad_norm": 11.1875,
+      "learning_rate": 0.0004951563120740764,
+      "loss": 20.4366,
+      "step": 15400
+    },
+    {
+      "epoch": 0.03006859031545695,
+      "grad_norm": 12.3125,
+      "learning_rate": 0.000495149810043167,
+      "loss": 20.5313,
+      "step": 15420
+    },
+    {
+      "epoch": 0.030107589784089193,
+      "grad_norm": 12.5,
+      "learning_rate": 0.0004951433080122577,
+      "loss": 20.5595,
+      "step": 15440
+    },
+    {
+      "epoch": 0.030146589252721432,
+      "grad_norm": 10.75,
+      "learning_rate": 0.0004951368059813483,
+      "loss": 20.4318,
+      "step": 15460
+    },
+    {
+      "epoch": 0.03018558872135367,
+      "grad_norm": 12.8125,
+      "learning_rate": 0.0004951303039504389,
+      "loss": 20.5356,
+      "step": 15480
+    },
+    {
+      "epoch": 0.030224588189985913,
+      "grad_norm": 11.75,
+      "learning_rate": 0.0004951238019195296,
+      "loss": 20.594,
+      "step": 15500
+    },
+    {
+      "epoch": 0.03026358765861815,
+      "grad_norm": 11.125,
+      "learning_rate": 0.0004951172998886202,
+      "loss": 20.5289,
+      "step": 15520
+    },
+    {
+      "epoch": 0.03030258712725039,
+      "grad_norm": 12.4375,
+      "learning_rate": 0.0004951107978577109,
+      "loss": 20.4482,
+      "step": 15540
+    },
+    {
+      "epoch": 0.03034158659588263,
+      "grad_norm": 11.1875,
+      "learning_rate": 0.0004951042958268015,
+      "loss": 20.4001,
+      "step": 15560
+    },
+    {
+      "epoch": 0.03038058606451487,
+      "grad_norm": 13.0625,
+      "learning_rate": 0.0004950977937958922,
+      "loss": 20.3405,
+      "step": 15580
+    },
+    {
+      "epoch": 0.030419585533147112,
+      "grad_norm": 13.5625,
+      "learning_rate": 0.0004950912917649827,
+      "loss": 20.435,
+      "step": 15600
+    },
+    {
+      "epoch": 0.03045858500177935,
+      "grad_norm": 11.25,
+      "learning_rate": 0.0004950847897340734,
+      "loss": 20.4817,
+      "step": 15620
+    },
+    {
+      "epoch": 0.03049758447041159,
+      "grad_norm": 10.75,
+      "learning_rate": 0.0004950782877031641,
+      "loss": 20.4889,
+      "step": 15640
+    },
+    {
+      "epoch": 0.03053658393904383,
+      "grad_norm": 12.25,
+      "learning_rate": 0.0004950717856722547,
+      "loss": 20.4209,
+      "step": 15660
+    },
+    {
+      "epoch": 0.03057558340767607,
+      "grad_norm": 11.125,
+      "learning_rate": 0.0004950652836413454,
+      "loss": 20.401,
+      "step": 15680
+    },
+    {
+      "epoch": 0.030614582876308312,
+      "grad_norm": 12.0,
+      "learning_rate": 0.000495058781610436,
+      "loss": 20.4579,
+      "step": 15700
+    },
+    {
+      "epoch": 0.03065358234494055,
+      "grad_norm": 10.9375,
+      "learning_rate": 0.0004950522795795267,
+      "loss": 20.4935,
+      "step": 15720
+    },
+    {
+      "epoch": 0.03069258181357279,
+      "grad_norm": 11.75,
+      "learning_rate": 0.0004950457775486173,
+      "loss": 20.4301,
+      "step": 15740
+    },
+    {
+      "epoch": 0.03073158128220503,
+      "grad_norm": 11.6875,
+      "learning_rate": 0.0004950392755177079,
+      "loss": 20.3754,
+      "step": 15760
+    },
+    {
+      "epoch": 0.03077058075083727,
+      "grad_norm": 10.625,
+      "learning_rate": 0.0004950327734867985,
+      "loss": 20.4608,
+      "step": 15780
+    },
+    {
+      "epoch": 0.030809580219469508,
+      "grad_norm": 11.875,
+      "learning_rate": 0.0004950262714558892,
+      "loss": 20.5408,
+      "step": 15800
+    },
+    {
+      "epoch": 0.03084857968810175,
+      "grad_norm": 11.125,
+      "learning_rate": 0.0004950197694249799,
+      "loss": 20.3624,
+      "step": 15820
+    },
+    {
+      "epoch": 0.03088757915673399,
+      "grad_norm": 10.75,
+      "learning_rate": 0.0004950132673940705,
+      "loss": 20.3549,
+      "step": 15840
+    },
+    {
+      "epoch": 0.03092657862536623,
+      "grad_norm": 13.6875,
+      "learning_rate": 0.0004950067653631612,
+      "loss": 20.3933,
+      "step": 15860
+    },
+    {
+      "epoch": 0.03096557809399847,
+      "grad_norm": 12.6875,
+      "learning_rate": 0.0004950002633322518,
+      "loss": 20.3452,
+      "step": 15880
+    },
+    {
+      "epoch": 0.031004577562630708,
+      "grad_norm": 11.0625,
+      "learning_rate": 0.0004949937613013425,
+      "loss": 20.4437,
+      "step": 15900
+    },
+    {
+      "epoch": 0.03104357703126295,
+      "grad_norm": 9.6875,
+      "learning_rate": 0.0004949872592704331,
+      "loss": 20.3318,
+      "step": 15920
+    },
+    {
+      "epoch": 0.03108257649989519,
+      "grad_norm": 10.375,
+      "learning_rate": 0.0004949807572395238,
+      "loss": 20.3704,
+      "step": 15940
+    },
+    {
+      "epoch": 0.03112157596852743,
+      "grad_norm": 11.625,
+      "learning_rate": 0.0004949742552086145,
+      "loss": 20.3497,
+      "step": 15960
+    },
+    {
+      "epoch": 0.03116057543715967,
+      "grad_norm": 11.9375,
+      "learning_rate": 0.0004949677531777051,
+      "loss": 20.4226,
+      "step": 15980
+    },
+    {
+      "epoch": 0.031199574905791908,
+      "grad_norm": 14.125,
+      "learning_rate": 0.0004949612511467957,
+      "loss": 20.3333,
+      "step": 16000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 1.176271382718605e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null