Training in progress, step 12000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f8dddf6de75a2669e45bc92f6a4ca08a65509177a3732a367cbfa1c80daacbe
 size 244223098

 version https://git-lfs.github.com/spec/v1
+oid sha256:53a76ac9077fe4281340d991c88d870e9612c562f18291ef2fff16717ae5fa8e
 size 244223098

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:30dee1c1faa1fa9cca0bbdc3497512922f4906f4ab49d60e46fb24c934bb150d
 size 381944306

 version https://git-lfs.github.com/spec/v1
+oid sha256:91d1f43ad07d5d2506105a4a30a104f55de4d303f90118c0693725602ca12996
 size 381944306

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cfe43fa1be8fc23eebf6d0265c9e86d27dbe1a7183ee9ff8d290496f67f7920b
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ad09e3903383f043ca34c37e900cc005c86f2fe664b64f1c40d71b64081bc5f
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02838e3dd99a981aed96c1e46abb129b6636bb9bdc4bb3b9d32692ead8821881
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:394ea9776e43b9e34e0f31ed80a4339ee1363ca5d97e2a122bc469ea88e3a051
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aed6a0b83373d2ca2f6ea1f1ac78752c4b8eb48d2f34a0bffe9748140ee5f947
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:185d9bf5dd99d2fb4439e3538848f269528414d649a4a521e708b152f704c300
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0de41f811c47a09044e5ad93b32d48fbc2e808eb9859cb07a66f7923677574e
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:f8af41d2465acba77e73ddefdb8cd3bcfad2e25c1dc9b4e154733802fd9eb85a
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8fd8d6850b7427eafc7ded0e60d1d7d6419f9660dea8de7c7cbb8cd0dbd9818
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c91c39eca7bff6b168c2d221fc52c0c253070df5bf1f21ee503aeddc471ee587
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.016294461660613026,
   "eval_steps": 500,
-  "global_step": 11000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3858,6 +3858,356 @@
       "learning_rate": 0.0004974073060677781,
       "loss": 23.6657,
       "step": 11000
     }
   ],
   "logging_steps": 20,
@@ -3877,7 +4227,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.567919072411648e+18,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.017775776357032393,
   "eval_steps": 500,
+  "global_step": 12000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004974073060677781,
       "loss": 23.6657,
       "step": 11000
+    },
+    {
+      "epoch": 0.016324087954541416,
+      "grad_norm": 18.875,
+      "learning_rate": 0.0004974023671327705,
+      "loss": 23.6059,
+      "step": 11020
+    },
+    {
+      "epoch": 0.016353714248469802,
+      "grad_norm": 16.875,
+      "learning_rate": 0.0004973974281977628,
+      "loss": 23.6203,
+      "step": 11040
+    },
+    {
+      "epoch": 0.01638334054239819,
+      "grad_norm": 26.0,
+      "learning_rate": 0.0004973924892627553,
+      "loss": 23.5207,
+      "step": 11060
+    },
+    {
+      "epoch": 0.016412966836326575,
+      "grad_norm": 18.25,
+      "learning_rate": 0.0004973875503277477,
+      "loss": 23.711,
+      "step": 11080
+    },
+    {
+      "epoch": 0.016442593130254965,
+      "grad_norm": 17.125,
+      "learning_rate": 0.0004973826113927401,
+      "loss": 23.5764,
+      "step": 11100
+    },
+    {
+      "epoch": 0.01647221942418335,
+      "grad_norm": 18.125,
+      "learning_rate": 0.0004973776724577325,
+      "loss": 23.6693,
+      "step": 11120
+    },
+    {
+      "epoch": 0.016501845718111738,
+      "grad_norm": 20.875,
+      "learning_rate": 0.000497372733522725,
+      "loss": 23.5375,
+      "step": 11140
+    },
+    {
+      "epoch": 0.016531472012040124,
+      "grad_norm": 14.75,
+      "learning_rate": 0.0004973677945877174,
+      "loss": 23.5473,
+      "step": 11160
+    },
+    {
+      "epoch": 0.016561098305968514,
+      "grad_norm": 15.625,
+      "learning_rate": 0.0004973628556527099,
+      "loss": 23.5889,
+      "step": 11180
+    },
+    {
+      "epoch": 0.0165907245998969,
+      "grad_norm": 16.875,
+      "learning_rate": 0.0004973579167177023,
+      "loss": 23.5879,
+      "step": 11200
+    },
+    {
+      "epoch": 0.016620350893825287,
+      "grad_norm": 18.375,
+      "learning_rate": 0.0004973529777826948,
+      "loss": 23.4974,
+      "step": 11220
+    },
+    {
+      "epoch": 0.016649977187753674,
+      "grad_norm": 15.625,
+      "learning_rate": 0.0004973480388476872,
+      "loss": 23.4771,
+      "step": 11240
+    },
+    {
+      "epoch": 0.016679603481682063,
+      "grad_norm": 17.5,
+      "learning_rate": 0.0004973430999126796,
+      "loss": 23.4806,
+      "step": 11260
+    },
+    {
+      "epoch": 0.01670922977561045,
+      "grad_norm": 19.75,
+      "learning_rate": 0.0004973381609776721,
+      "loss": 23.651,
+      "step": 11280
+    },
+    {
+      "epoch": 0.016738856069538836,
+      "grad_norm": 16.625,
+      "learning_rate": 0.0004973332220426645,
+      "loss": 23.5367,
+      "step": 11300
+    },
+    {
+      "epoch": 0.016768482363467223,
+      "grad_norm": 19.875,
+      "learning_rate": 0.0004973282831076569,
+      "loss": 23.5171,
+      "step": 11320
+    },
+    {
+      "epoch": 0.016798108657395613,
+      "grad_norm": 17.125,
+      "learning_rate": 0.0004973233441726494,
+      "loss": 23.4766,
+      "step": 11340
+    },
+    {
+      "epoch": 0.016827734951324,
+      "grad_norm": 15.3125,
+      "learning_rate": 0.0004973184052376418,
+      "loss": 23.4622,
+      "step": 11360
+    },
+    {
+      "epoch": 0.016857361245252386,
+      "grad_norm": 19.375,
+      "learning_rate": 0.0004973134663026343,
+      "loss": 23.5135,
+      "step": 11380
+    },
+    {
+      "epoch": 0.016886987539180772,
+      "grad_norm": 17.625,
+      "learning_rate": 0.0004973085273676267,
+      "loss": 23.485,
+      "step": 11400
+    },
+    {
+      "epoch": 0.016916613833109162,
+      "grad_norm": 17.5,
+      "learning_rate": 0.0004973035884326191,
+      "loss": 23.4218,
+      "step": 11420
+    },
+    {
+      "epoch": 0.01694624012703755,
+      "grad_norm": 16.375,
+      "learning_rate": 0.0004972986494976116,
+      "loss": 23.4405,
+      "step": 11440
+    },
+    {
+      "epoch": 0.016975866420965935,
+      "grad_norm": 21.125,
+      "learning_rate": 0.000497293710562604,
+      "loss": 23.4308,
+      "step": 11460
+    },
+    {
+      "epoch": 0.01700549271489432,
+      "grad_norm": 18.875,
+      "learning_rate": 0.0004972887716275964,
+      "loss": 23.4076,
+      "step": 11480
+    },
+    {
+      "epoch": 0.01703511900882271,
+      "grad_norm": 16.25,
+      "learning_rate": 0.0004972838326925889,
+      "loss": 23.4027,
+      "step": 11500
+    },
+    {
+      "epoch": 0.017064745302751098,
+      "grad_norm": 15.9375,
+      "learning_rate": 0.0004972788937575813,
+      "loss": 23.3797,
+      "step": 11520
+    },
+    {
+      "epoch": 0.017094371596679484,
+      "grad_norm": 17.875,
+      "learning_rate": 0.0004972739548225737,
+      "loss": 23.403,
+      "step": 11540
+    },
+    {
+      "epoch": 0.01712399789060787,
+      "grad_norm": 20.25,
+      "learning_rate": 0.0004972690158875662,
+      "loss": 23.435,
+      "step": 11560
+    },
+    {
+      "epoch": 0.01715362418453626,
+      "grad_norm": 17.875,
+      "learning_rate": 0.0004972640769525586,
+      "loss": 23.3429,
+      "step": 11580
+    },
+    {
+      "epoch": 0.017183250478464647,
+      "grad_norm": 19.0,
+      "learning_rate": 0.0004972591380175511,
+      "loss": 23.3458,
+      "step": 11600
+    },
+    {
+      "epoch": 0.017212876772393033,
+      "grad_norm": 14.3125,
+      "learning_rate": 0.0004972541990825435,
+      "loss": 23.3765,
+      "step": 11620
+    },
+    {
+      "epoch": 0.01724250306632142,
+      "grad_norm": 17.375,
+      "learning_rate": 0.0004972492601475358,
+      "loss": 23.3812,
+      "step": 11640
+    },
+    {
+      "epoch": 0.01727212936024981,
+      "grad_norm": 17.375,
+      "learning_rate": 0.0004972443212125283,
+      "loss": 23.3419,
+      "step": 11660
+    },
+    {
+      "epoch": 0.017301755654178196,
+      "grad_norm": 15.9375,
+      "learning_rate": 0.0004972393822775207,
+      "loss": 23.1804,
+      "step": 11680
+    },
+    {
+      "epoch": 0.017331381948106583,
+      "grad_norm": 15.125,
+      "learning_rate": 0.0004972344433425131,
+      "loss": 23.2947,
+      "step": 11700
+    },
+    {
+      "epoch": 0.01736100824203497,
+      "grad_norm": 17.0,
+      "learning_rate": 0.0004972295044075056,
+      "loss": 23.402,
+      "step": 11720
+    },
+    {
+      "epoch": 0.01739063453596336,
+      "grad_norm": 18.5,
+      "learning_rate": 0.000497224565472498,
+      "loss": 23.2933,
+      "step": 11740
+    },
+    {
+      "epoch": 0.017420260829891746,
+      "grad_norm": 17.125,
+      "learning_rate": 0.0004972196265374904,
+      "loss": 23.296,
+      "step": 11760
+    },
+    {
+      "epoch": 0.017449887123820132,
+      "grad_norm": 15.5625,
+      "learning_rate": 0.0004972146876024829,
+      "loss": 23.2247,
+      "step": 11780
+    },
+    {
+      "epoch": 0.017479513417748522,
+      "grad_norm": 15.375,
+      "learning_rate": 0.0004972097486674753,
+      "loss": 23.1945,
+      "step": 11800
+    },
+    {
+      "epoch": 0.01750913971167691,
+      "grad_norm": 17.25,
+      "learning_rate": 0.0004972048097324678,
+      "loss": 23.2879,
+      "step": 11820
+    },
+    {
+      "epoch": 0.017538766005605295,
+      "grad_norm": 16.5,
+      "learning_rate": 0.0004971998707974602,
+      "loss": 23.2503,
+      "step": 11840
+    },
+    {
+      "epoch": 0.01756839229953368,
+      "grad_norm": 16.875,
+      "learning_rate": 0.0004971949318624526,
+      "loss": 23.2298,
+      "step": 11860
+    },
+    {
+      "epoch": 0.01759801859346207,
+      "grad_norm": 16.5,
+      "learning_rate": 0.0004971899929274451,
+      "loss": 23.2478,
+      "step": 11880
+    },
+    {
+      "epoch": 0.017627644887390458,
+      "grad_norm": 16.875,
+      "learning_rate": 0.0004971850539924375,
+      "loss": 23.2439,
+      "step": 11900
+    },
+    {
+      "epoch": 0.017657271181318844,
+      "grad_norm": 16.75,
+      "learning_rate": 0.0004971801150574299,
+      "loss": 23.2426,
+      "step": 11920
+    },
+    {
+      "epoch": 0.01768689747524723,
+      "grad_norm": 15.5,
+      "learning_rate": 0.0004971751761224224,
+      "loss": 23.2587,
+      "step": 11940
+    },
+    {
+      "epoch": 0.01771652376917562,
+      "grad_norm": 15.6875,
+      "learning_rate": 0.0004971702371874148,
+      "loss": 23.2458,
+      "step": 11960
+    },
+    {
+      "epoch": 0.017746150063104007,
+      "grad_norm": 17.875,
+      "learning_rate": 0.0004971652982524073,
+      "loss": 23.1944,
+      "step": 11980
+    },
+    {
+      "epoch": 0.017775776357032393,
+      "grad_norm": 15.5,
+      "learning_rate": 0.0004971603593173997,
+      "loss": 23.2169,
+      "step": 12000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 8.255909218322743e+18,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null