Training in progress, step 11000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8529ab26bf1cb621752bfdcf39645b857532d7119197ba48911192305222533
 size 244223098

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f8dddf6de75a2669e45bc92f6a4ca08a65509177a3732a367cbfa1c80daacbe
 size 244223098

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fb1396f9891f526d75f646a5f9f0af98ffb3ccba3b7c95bab2b3cfb0d0873dcf
 size 381944306

 version https://git-lfs.github.com/spec/v1
+oid sha256:30dee1c1faa1fa9cca0bbdc3497512922f4906f4ab49d60e46fb24c934bb150d
 size 381944306

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce9029627a508f9b85fc87ef7d6b828a2c09a14ff0ca8cde1de843bdd1497dca
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:cfe43fa1be8fc23eebf6d0265c9e86d27dbe1a7183ee9ff8d290496f67f7920b
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f14204032a244f46d27ef9476a586602eebf2284e673a724ccecf10784c3b30
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:02838e3dd99a981aed96c1e46abb129b6636bb9bdc4bb3b9d32692ead8821881
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:468952d6cc10cfee225ed76168c326b4807a4cfb6b22f7910877aeca614c9cad
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:aed6a0b83373d2ca2f6ea1f1ac78752c4b8eb48d2f34a0bffe9748140ee5f947
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c7238505da0e2ba8c785fd2f6d8ef0414b8ad9ceebe196b76819975bc121ba9d
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0de41f811c47a09044e5ad93b32d48fbc2e808eb9859cb07a66f7923677574e
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1943118567ffe73158246550d682bf973f1c84396959cfa696de01aefce43288
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8fd8d6850b7427eafc7ded0e60d1d7d6419f9660dea8de7c7cbb8cd0dbd9818
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.014813146964193662,
   "eval_steps": 500,
-  "global_step": 10000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3508,6 +3508,356 @@
       "learning_rate": 0.0004976542528181563,
       "loss": 24.1171,
       "step": 10000
     }
   ],
   "logging_steps": 20,
@@ -3527,7 +3877,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.87992380367831e+18,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.016294461660613026,
   "eval_steps": 500,
+  "global_step": 11000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004976542528181563,
       "loss": 24.1171,
       "step": 10000
+    },
+    {
+      "epoch": 0.014842773258122048,
+      "grad_norm": 19.625,
+      "learning_rate": 0.0004976493138831488,
+      "loss": 24.1736,
+      "step": 10020
+    },
+    {
+      "epoch": 0.014872399552050436,
+      "grad_norm": 18.625,
+      "learning_rate": 0.0004976443749481412,
+      "loss": 24.2905,
+      "step": 10040
+    },
+    {
+      "epoch": 0.014902025845978823,
+      "grad_norm": 18.375,
+      "learning_rate": 0.0004976394360131336,
+      "loss": 24.1324,
+      "step": 10060
+    },
+    {
+      "epoch": 0.014931652139907211,
+      "grad_norm": 20.375,
+      "learning_rate": 0.0004976344970781261,
+      "loss": 24.1785,
+      "step": 10080
+    },
+    {
+      "epoch": 0.014961278433835597,
+      "grad_norm": 17.625,
+      "learning_rate": 0.0004976295581431185,
+      "loss": 24.1872,
+      "step": 10100
+    },
+    {
+      "epoch": 0.014990904727763986,
+      "grad_norm": 18.0,
+      "learning_rate": 0.0004976246192081109,
+      "loss": 24.1813,
+      "step": 10120
+    },
+    {
+      "epoch": 0.015020531021692372,
+      "grad_norm": 21.75,
+      "learning_rate": 0.0004976196802731033,
+      "loss": 24.1438,
+      "step": 10140
+    },
+    {
+      "epoch": 0.01505015731562076,
+      "grad_norm": 22.375,
+      "learning_rate": 0.0004976147413380958,
+      "loss": 24.1436,
+      "step": 10160
+    },
+    {
+      "epoch": 0.015079783609549147,
+      "grad_norm": 19.5,
+      "learning_rate": 0.0004976098024030882,
+      "loss": 24.1394,
+      "step": 10180
+    },
+    {
+      "epoch": 0.015109409903477535,
+      "grad_norm": 18.25,
+      "learning_rate": 0.0004976048634680807,
+      "loss": 24.0992,
+      "step": 10200
+    },
+    {
+      "epoch": 0.015139036197405921,
+      "grad_norm": 15.6875,
+      "learning_rate": 0.0004975999245330731,
+      "loss": 24.0464,
+      "step": 10220
+    },
+    {
+      "epoch": 0.01516866249133431,
+      "grad_norm": 17.875,
+      "learning_rate": 0.0004975949855980656,
+      "loss": 24.0805,
+      "step": 10240
+    },
+    {
+      "epoch": 0.015198288785262696,
+      "grad_norm": 16.875,
+      "learning_rate": 0.000497590046663058,
+      "loss": 24.0985,
+      "step": 10260
+    },
+    {
+      "epoch": 0.015227915079191084,
+      "grad_norm": 23.25,
+      "learning_rate": 0.0004975851077280504,
+      "loss": 24.042,
+      "step": 10280
+    },
+    {
+      "epoch": 0.01525754137311947,
+      "grad_norm": 19.375,
+      "learning_rate": 0.0004975801687930429,
+      "loss": 24.0048,
+      "step": 10300
+    },
+    {
+      "epoch": 0.015287167667047859,
+      "grad_norm": 19.75,
+      "learning_rate": 0.0004975752298580353,
+      "loss": 24.0158,
+      "step": 10320
+    },
+    {
+      "epoch": 0.015316793960976245,
+      "grad_norm": 20.875,
+      "learning_rate": 0.0004975702909230277,
+      "loss": 23.9875,
+      "step": 10340
+    },
+    {
+      "epoch": 0.015346420254904633,
+      "grad_norm": 20.75,
+      "learning_rate": 0.0004975653519880202,
+      "loss": 24.055,
+      "step": 10360
+    },
+    {
+      "epoch": 0.01537604654883302,
+      "grad_norm": 16.125,
+      "learning_rate": 0.0004975604130530126,
+      "loss": 23.9887,
+      "step": 10380
+    },
+    {
+      "epoch": 0.015405672842761408,
+      "grad_norm": 17.25,
+      "learning_rate": 0.000497555474118005,
+      "loss": 24.0268,
+      "step": 10400
+    },
+    {
+      "epoch": 0.015435299136689794,
+      "grad_norm": 18.125,
+      "learning_rate": 0.0004975505351829975,
+      "loss": 24.0453,
+      "step": 10420
+    },
+    {
+      "epoch": 0.015464925430618183,
+      "grad_norm": 21.625,
+      "learning_rate": 0.0004975455962479898,
+      "loss": 24.0189,
+      "step": 10440
+    },
+    {
+      "epoch": 0.015494551724546569,
+      "grad_norm": 15.625,
+      "learning_rate": 0.0004975406573129823,
+      "loss": 23.9409,
+      "step": 10460
+    },
+    {
+      "epoch": 0.015524178018474957,
+      "grad_norm": 18.0,
+      "learning_rate": 0.0004975357183779747,
+      "loss": 23.8996,
+      "step": 10480
+    },
+    {
+      "epoch": 0.015553804312403344,
+      "grad_norm": 21.875,
+      "learning_rate": 0.0004975307794429671,
+      "loss": 24.0183,
+      "step": 10500
+    },
+    {
+      "epoch": 0.015583430606331732,
+      "grad_norm": 19.0,
+      "learning_rate": 0.0004975258405079595,
+      "loss": 23.9392,
+      "step": 10520
+    },
+    {
+      "epoch": 0.015613056900260118,
+      "grad_norm": 16.375,
+      "learning_rate": 0.000497520901572952,
+      "loss": 23.9131,
+      "step": 10540
+    },
+    {
+      "epoch": 0.015642683194188507,
+      "grad_norm": 17.5,
+      "learning_rate": 0.0004975159626379444,
+      "loss": 23.8963,
+      "step": 10560
+    },
+    {
+      "epoch": 0.015672309488116893,
+      "grad_norm": 16.625,
+      "learning_rate": 0.0004975110237029369,
+      "loss": 23.9361,
+      "step": 10580
+    },
+    {
+      "epoch": 0.01570193578204528,
+      "grad_norm": 18.25,
+      "learning_rate": 0.0004975060847679293,
+      "loss": 23.9129,
+      "step": 10600
+    },
+    {
+      "epoch": 0.01573156207597367,
+      "grad_norm": 19.75,
+      "learning_rate": 0.0004975011458329218,
+      "loss": 23.8795,
+      "step": 10620
+    },
+    {
+      "epoch": 0.015761188369902056,
+      "grad_norm": 14.8125,
+      "learning_rate": 0.0004974962068979142,
+      "loss": 23.8412,
+      "step": 10640
+    },
+    {
+      "epoch": 0.015790814663830442,
+      "grad_norm": 19.625,
+      "learning_rate": 0.0004974912679629066,
+      "loss": 23.8545,
+      "step": 10660
+    },
+    {
+      "epoch": 0.01582044095775883,
+      "grad_norm": 17.875,
+      "learning_rate": 0.0004974863290278991,
+      "loss": 23.8848,
+      "step": 10680
+    },
+    {
+      "epoch": 0.01585006725168722,
+      "grad_norm": 18.125,
+      "learning_rate": 0.0004974813900928915,
+      "loss": 23.7463,
+      "step": 10700
+    },
+    {
+      "epoch": 0.015879693545615605,
+      "grad_norm": 17.25,
+      "learning_rate": 0.0004974764511578839,
+      "loss": 23.8657,
+      "step": 10720
+    },
+    {
+      "epoch": 0.01590931983954399,
+      "grad_norm": 17.875,
+      "learning_rate": 0.0004974715122228763,
+      "loss": 23.7865,
+      "step": 10740
+    },
+    {
+      "epoch": 0.015938946133472378,
+      "grad_norm": 18.875,
+      "learning_rate": 0.0004974665732878688,
+      "loss": 23.7971,
+      "step": 10760
+    },
+    {
+      "epoch": 0.015968572427400768,
+      "grad_norm": 19.125,
+      "learning_rate": 0.0004974616343528612,
+      "loss": 23.8342,
+      "step": 10780
+    },
+    {
+      "epoch": 0.015998198721329154,
+      "grad_norm": 16.75,
+      "learning_rate": 0.0004974566954178537,
+      "loss": 23.7571,
+      "step": 10800
+    },
+    {
+      "epoch": 0.01602782501525754,
+      "grad_norm": 16.75,
+      "learning_rate": 0.0004974517564828461,
+      "loss": 23.8034,
+      "step": 10820
+    },
+    {
+      "epoch": 0.016057451309185927,
+      "grad_norm": 17.25,
+      "learning_rate": 0.0004974468175478386,
+      "loss": 23.7763,
+      "step": 10840
+    },
+    {
+      "epoch": 0.016087077603114317,
+      "grad_norm": 18.25,
+      "learning_rate": 0.000497441878612831,
+      "loss": 23.803,
+      "step": 10860
+    },
+    {
+      "epoch": 0.016116703897042704,
+      "grad_norm": 20.875,
+      "learning_rate": 0.0004974369396778234,
+      "loss": 23.7222,
+      "step": 10880
+    },
+    {
+      "epoch": 0.01614633019097109,
+      "grad_norm": 20.5,
+      "learning_rate": 0.0004974320007428159,
+      "loss": 23.6994,
+      "step": 10900
+    },
+    {
+      "epoch": 0.016175956484899476,
+      "grad_norm": 15.3125,
+      "learning_rate": 0.0004974270618078083,
+      "loss": 23.6471,
+      "step": 10920
+    },
+    {
+      "epoch": 0.016205582778827866,
+      "grad_norm": 15.5,
+      "learning_rate": 0.0004974221228728007,
+      "loss": 23.7271,
+      "step": 10940
+    },
+    {
+      "epoch": 0.016235209072756253,
+      "grad_norm": 17.5,
+      "learning_rate": 0.0004974171839377932,
+      "loss": 23.6869,
+      "step": 10960
+    },
+    {
+      "epoch": 0.01626483536668464,
+      "grad_norm": 16.75,
+      "learning_rate": 0.0004974122450027856,
+      "loss": 23.6976,
+      "step": 10980
+    },
+    {
+      "epoch": 0.016294461660613026,
+      "grad_norm": 19.0,
+      "learning_rate": 0.0004974073060677781,
+      "loss": 23.6657,
+      "step": 11000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 7.567919072411648e+18,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null