Training in progress, step 73000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fbefc43fcc2f8bf8bb8522016041f2a9a7a1389e937a0c7f9efe740c9281e923
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:82e96b382e85cf4f91a0957df390eab642f1a5b90594b054112e585987e922fb
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e10ee0c90a6cc09cdc24b1085749ee192ca52841ac52349ee023c635a106f71
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:86831135ab2a33d7609f755ab5e685a1ac6602cf0ed6e3f717ff3cd6a64064f2
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a2f1706dfc950df47249e8d65d6df596c2f98887c24dba54cde743e4804d2cf
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:dbf5a8e94cdeb9d71543994044a1496c0b99dc653812727d1f2b5879319264c4
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:488c74f8a1dc2a7148ae3d9f18c7e9fcbb141512e2f149cd1d29674d054be2f3
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:e585ae00a418f8315b98a87df365e3f31023ec6747db05d48bdc24ed26af3666
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77448ddbc0e5f35d8ef3a4b1063eb25209d701957cc23b3671796af1520e431c
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:c65df296955d0ea7a8b7df67d30426101d0bc72ddcf4935d0366aeb81991dd30
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3acb48030fde17938d59bf929c695a9b6dbd4fe2687e2cce76096a6e14351d6
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:75f3a690a6b3c19beeba0982e2eceaedb3e05582e018ecc3f8710afa643876ad
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:514d743b09cdf67b5f7ccba0c67283da3d20aa73a759bcf5ebfccf66234e08c8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1971585f96833288fec52d3fdc773fe9f57b50e9c45dc3d75ed2e10f5ab3dca7
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.10665465814219437,
   "eval_steps": 500,
-  "global_step": 72000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -25208,6 +25208,356 @@
       "learning_rate": 0.00048234355429470035,
       "loss": 16.5261,
       "step": 72000
     }
   ],
   "logging_steps": 20,
@@ -25227,7 +25577,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.293707639198528e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.10813597283861373,
   "eval_steps": 500,
+  "global_step": 73000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00048234355429470035,
       "loss": 16.5261,
       "step": 72000
+    },
+    {
+      "epoch": 0.10668428443612275,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.00048233861535969274,
+      "loss": 16.4916,
+      "step": 72020
+    },
+    {
+      "epoch": 0.10671391073005114,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004823336764246852,
+      "loss": 16.4841,
+      "step": 72040
+    },
+    {
+      "epoch": 0.10674353702397953,
+      "grad_norm": 6.09375,
+      "learning_rate": 0.00048232873748967764,
+      "loss": 16.485,
+      "step": 72060
+    },
+    {
+      "epoch": 0.10677316331790791,
+      "grad_norm": 6.375,
+      "learning_rate": 0.0004823237985546701,
+      "loss": 16.4513,
+      "step": 72080
+    },
+    {
+      "epoch": 0.1068027896118363,
+      "grad_norm": 7.78125,
+      "learning_rate": 0.0004823188596196625,
+      "loss": 16.4513,
+      "step": 72100
+    },
+    {
+      "epoch": 0.10683241590576469,
+      "grad_norm": 7.0,
+      "learning_rate": 0.000482313920684655,
+      "loss": 16.4768,
+      "step": 72120
+    },
+    {
+      "epoch": 0.10686204219969307,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.0004823089817496474,
+      "loss": 16.4426,
+      "step": 72140
+    },
+    {
+      "epoch": 0.10689166849362146,
+      "grad_norm": 7.46875,
+      "learning_rate": 0.0004823040428146398,
+      "loss": 16.4733,
+      "step": 72160
+    },
+    {
+      "epoch": 0.10692129478754984,
+      "grad_norm": 7.625,
+      "learning_rate": 0.0004822991038796322,
+      "loss": 16.459,
+      "step": 72180
+    },
+    {
+      "epoch": 0.10695092108147823,
+      "grad_norm": 6.0,
+      "learning_rate": 0.0004822941649446247,
+      "loss": 16.4675,
+      "step": 72200
+    },
+    {
+      "epoch": 0.10698054737540662,
+      "grad_norm": 6.28125,
+      "learning_rate": 0.0004822892260096171,
+      "loss": 16.517,
+      "step": 72220
+    },
+    {
+      "epoch": 0.107010173669335,
+      "grad_norm": 6.375,
+      "learning_rate": 0.0004822842870746095,
+      "loss": 16.4864,
+      "step": 72240
+    },
+    {
+      "epoch": 0.10703979996326339,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.00048227934813960195,
+      "loss": 16.4141,
+      "step": 72260
+    },
+    {
+      "epoch": 0.10706942625719178,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004822744092045944,
+      "loss": 16.5031,
+      "step": 72280
+    },
+    {
+      "epoch": 0.10709905255112016,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.00048226947026958685,
+      "loss": 16.4786,
+      "step": 72300
+    },
+    {
+      "epoch": 0.10712867884504856,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.00048226453133457924,
+      "loss": 16.4936,
+      "step": 72320
+    },
+    {
+      "epoch": 0.10715830513897695,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.0004822595923995717,
+      "loss": 16.4618,
+      "step": 72340
+    },
+    {
+      "epoch": 0.10718793143290534,
+      "grad_norm": 6.28125,
+      "learning_rate": 0.00048225465346456414,
+      "loss": 16.4663,
+      "step": 72360
+    },
+    {
+      "epoch": 0.10721755772683372,
+      "grad_norm": 6.25,
+      "learning_rate": 0.0004822497145295566,
+      "loss": 16.4996,
+      "step": 72380
+    },
+    {
+      "epoch": 0.10724718402076211,
+      "grad_norm": 6.25,
+      "learning_rate": 0.000482244775594549,
+      "loss": 16.4604,
+      "step": 72400
+    },
+    {
+      "epoch": 0.1072768103146905,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004822398366595415,
+      "loss": 16.4964,
+      "step": 72420
+    },
+    {
+      "epoch": 0.10730643660861888,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004822348977245339,
+      "loss": 16.4576,
+      "step": 72440
+    },
+    {
+      "epoch": 0.10733606290254727,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004822299587895263,
+      "loss": 16.4561,
+      "step": 72460
+    },
+    {
+      "epoch": 0.10736568919647566,
+      "grad_norm": 6.125,
+      "learning_rate": 0.0004822250198545187,
+      "loss": 16.4686,
+      "step": 72480
+    },
+    {
+      "epoch": 0.10739531549040404,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.0004822200809195112,
+      "loss": 16.477,
+      "step": 72500
+    },
+    {
+      "epoch": 0.10742494178433243,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004822151419845036,
+      "loss": 16.4681,
+      "step": 72520
+    },
+    {
+      "epoch": 0.10745456807826081,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.000482210203049496,
+      "loss": 16.4467,
+      "step": 72540
+    },
+    {
+      "epoch": 0.1074841943721892,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.00048220526411448845,
+      "loss": 16.4432,
+      "step": 72560
+    },
+    {
+      "epoch": 0.10751382066611759,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.0004822003251794809,
+      "loss": 16.4936,
+      "step": 72580
+    },
+    {
+      "epoch": 0.10754344696004597,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.00048219538624447335,
+      "loss": 16.4488,
+      "step": 72600
+    },
+    {
+      "epoch": 0.10757307325397436,
+      "grad_norm": 6.0,
+      "learning_rate": 0.00048219044730946574,
+      "loss": 16.4461,
+      "step": 72620
+    },
+    {
+      "epoch": 0.10760269954790276,
+      "grad_norm": 6.625,
+      "learning_rate": 0.0004821855083744582,
+      "loss": 16.5009,
+      "step": 72640
+    },
+    {
+      "epoch": 0.10763232584183115,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.00048218056943945064,
+      "loss": 16.4108,
+      "step": 72660
+    },
+    {
+      "epoch": 0.10766195213575953,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004821756305044431,
+      "loss": 16.4139,
+      "step": 72680
+    },
+    {
+      "epoch": 0.10769157842968792,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.0004821706915694355,
+      "loss": 16.4898,
+      "step": 72700
+    },
+    {
+      "epoch": 0.1077212047236163,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.000482165752634428,
+      "loss": 16.5149,
+      "step": 72720
+    },
+    {
+      "epoch": 0.1077508310175447,
+      "grad_norm": 6.5,
+      "learning_rate": 0.0004821608136994204,
+      "loss": 16.4915,
+      "step": 72740
+    },
+    {
+      "epoch": 0.10778045731147308,
+      "grad_norm": 7.5,
+      "learning_rate": 0.0004821558747644128,
+      "loss": 16.428,
+      "step": 72760
+    },
+    {
+      "epoch": 0.10781008360540147,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.0004821509358294052,
+      "loss": 16.4355,
+      "step": 72780
+    },
+    {
+      "epoch": 0.10783970989932985,
+      "grad_norm": 6.34375,
+      "learning_rate": 0.0004821459968943977,
+      "loss": 16.4379,
+      "step": 72800
+    },
+    {
+      "epoch": 0.10786933619325824,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.0004821410579593901,
+      "loss": 16.4117,
+      "step": 72820
+    },
+    {
+      "epoch": 0.10789896248718663,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.00048213611902438256,
+      "loss": 16.4863,
+      "step": 72840
+    },
+    {
+      "epoch": 0.10792858878111501,
+      "grad_norm": 6.875,
+      "learning_rate": 0.00048213118008937495,
+      "loss": 16.4081,
+      "step": 72860
+    },
+    {
+      "epoch": 0.1079582150750434,
+      "grad_norm": 6.03125,
+      "learning_rate": 0.0004821262411543674,
+      "loss": 16.4631,
+      "step": 72880
+    },
+    {
+      "epoch": 0.10798784136897178,
+      "grad_norm": 6.34375,
+      "learning_rate": 0.00048212130221935985,
+      "loss": 16.4837,
+      "step": 72900
+    },
+    {
+      "epoch": 0.10801746766290017,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.00048211636328435224,
+      "loss": 16.4245,
+      "step": 72920
+    },
+    {
+      "epoch": 0.10804709395682856,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.0004821114243493447,
+      "loss": 16.4369,
+      "step": 72940
+    },
+    {
+      "epoch": 0.10807672025075696,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.00048210648541433714,
+      "loss": 16.4274,
+      "step": 72960
+    },
+    {
+      "epoch": 0.10810634654468534,
+      "grad_norm": 5.90625,
+      "learning_rate": 0.0004821015464793296,
+      "loss": 16.4141,
+      "step": 72980
+    },
+    {
+      "epoch": 0.10813597283861373,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.000482096607544322,
+      "loss": 16.4235,
+      "step": 73000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 5.367243712484711e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null