Training in progress, step 28000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b43a60d737a785fc2e56f66e99945c6d2d2f51be29e80eb910c67c9e8fb0975a
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:2057e4bc4ccb7266894aa681fe099f5645555d35372ed2c2f53abaad870b8285
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9da49bf65896c36ca8776e94461400c308773518a8deaa78527ddff5ace4792
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:49e7c91022600e2317a6a9b8ec33d6b3225250425e275f6eed0bdadc714f7fa6
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bffe796412ae78a2f757b5f5f3b8aa6b56b0ed93c3906172f7ae4955983ac47b
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f92647ded7f1a6725e7ffd2310a8d2fbafb5da62cf15755b5f3e6fb2fdf499f
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3caeaaccf9ee6ca738d7fdd6554578ee61615cb827b3fead7c68315af599605e
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:5badfec76e553ebbd712f8d9135dd4df979bf9196652df1ae9ad27ae709e59c4
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:419f112add610d2e512a87d40faad38cd56b3d9f2a22af4f17fcb3ffd5123429
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:8fdddb3d61ba5e574c0c975793584282bdce7b095bac6bf2d58912967ca7933b
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba4be6c148df8273fbbe24b956dde2c97bc7fec43be39c92565ead32dfcb5f69
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:3be341579a31269cdfe494164e23b8a4ba61b71f1f432b36a2c0aef7d49c9b92
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f5d49c78930b4027fe8523bcb3c8aa6b7792a022fa04f4582d3171dc3e35af06
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:18c359f46f82e1c9ecfbab9a4532bc57a1a730dfa02c76c631eb621b98761e8a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.052649282653523845,
   "eval_steps": 500,
-  "global_step": 27000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -9458,6 +9458,356 @@
       "learning_rate": 0.0004913851341466507,
       "loss": 18.4303,
       "step": 27000
     }
   ],
   "logging_steps": 20,
@@ -9477,7 +9827,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.984944183236677e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.05459925608513584,
   "eval_steps": 500,
+  "global_step": 28000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004913851341466507,
       "loss": 18.4303,
       "step": 27000
+    },
+    {
+      "epoch": 0.05268828212215609,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004913786321157414,
+      "loss": 18.495,
+      "step": 27020
+    },
+    {
+      "epoch": 0.05272728159078833,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.000491372130084832,
+      "loss": 18.491,
+      "step": 27040
+    },
+    {
+      "epoch": 0.052766281059420564,
+      "grad_norm": 7.90625,
+      "learning_rate": 0.0004913656280539227,
+      "loss": 18.4938,
+      "step": 27060
+    },
+    {
+      "epoch": 0.052805280528052806,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004913591260230133,
+      "loss": 18.514,
+      "step": 27080
+    },
+    {
+      "epoch": 0.05284427999668505,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.000491352623992104,
+      "loss": 18.4142,
+      "step": 27100
+    },
+    {
+      "epoch": 0.05288327946531728,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004913461219611947,
+      "loss": 18.5517,
+      "step": 27120
+    },
+    {
+      "epoch": 0.052922278933949525,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004913396199302852,
+      "loss": 18.4506,
+      "step": 27140
+    },
+    {
+      "epoch": 0.05296127840258177,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.0004913331178993759,
+      "loss": 18.4626,
+      "step": 27160
+    },
+    {
+      "epoch": 0.053000277871214,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004913266158684665,
+      "loss": 18.4563,
+      "step": 27180
+    },
+    {
+      "epoch": 0.053039277339846244,
+      "grad_norm": 7.875,
+      "learning_rate": 0.0004913201138375572,
+      "loss": 18.5159,
+      "step": 27200
+    },
+    {
+      "epoch": 0.053078276808478486,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004913136118066478,
+      "loss": 18.4415,
+      "step": 27220
+    },
+    {
+      "epoch": 0.05311727627711072,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004913071097757385,
+      "loss": 18.4588,
+      "step": 27240
+    },
+    {
+      "epoch": 0.05315627574574296,
+      "grad_norm": 9.6875,
+      "learning_rate": 0.0004913006077448291,
+      "loss": 18.508,
+      "step": 27260
+    },
+    {
+      "epoch": 0.053195275214375205,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004912941057139198,
+      "loss": 18.419,
+      "step": 27280
+    },
+    {
+      "epoch": 0.05323427468300745,
+      "grad_norm": 9.5,
+      "learning_rate": 0.0004912876036830103,
+      "loss": 18.3969,
+      "step": 27300
+    },
+    {
+      "epoch": 0.05327327415163968,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.000491281101652101,
+      "loss": 18.4816,
+      "step": 27320
+    },
+    {
+      "epoch": 0.053312273620271924,
+      "grad_norm": 8.875,
+      "learning_rate": 0.0004912745996211917,
+      "loss": 18.4631,
+      "step": 27340
+    },
+    {
+      "epoch": 0.053351273088904166,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004912680975902823,
+      "loss": 18.3853,
+      "step": 27360
+    },
+    {
+      "epoch": 0.0533902725575364,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.000491261595559373,
+      "loss": 18.4279,
+      "step": 27380
+    },
+    {
+      "epoch": 0.053429272026168644,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004912550935284636,
+      "loss": 18.415,
+      "step": 27400
+    },
+    {
+      "epoch": 0.053468271494800886,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004912485914975543,
+      "loss": 18.4297,
+      "step": 27420
+    },
+    {
+      "epoch": 0.05350727096343312,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004912420894666449,
+      "loss": 18.4647,
+      "step": 27440
+    },
+    {
+      "epoch": 0.05354627043206536,
+      "grad_norm": 9.125,
+      "learning_rate": 0.0004912355874357355,
+      "loss": 18.4773,
+      "step": 27460
+    },
+    {
+      "epoch": 0.053585269900697605,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004912290854048262,
+      "loss": 18.4229,
+      "step": 27480
+    },
+    {
+      "epoch": 0.05362426936932984,
+      "grad_norm": 8.75,
+      "learning_rate": 0.0004912225833739168,
+      "loss": 18.4303,
+      "step": 27500
+    },
+    {
+      "epoch": 0.05366326883796208,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.0004912160813430075,
+      "loss": 18.5098,
+      "step": 27520
+    },
+    {
+      "epoch": 0.053702268306594324,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004912095793120981,
+      "loss": 18.4295,
+      "step": 27540
+    },
+    {
+      "epoch": 0.053741267775226566,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.0004912030772811888,
+      "loss": 18.3608,
+      "step": 27560
+    },
+    {
+      "epoch": 0.0537802672438588,
+      "grad_norm": 9.875,
+      "learning_rate": 0.0004911965752502794,
+      "loss": 18.4168,
+      "step": 27580
+    },
+    {
+      "epoch": 0.05381926671249104,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004911900732193701,
+      "loss": 18.3512,
+      "step": 27600
+    },
+    {
+      "epoch": 0.053858266181123285,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.0004911835711884607,
+      "loss": 18.3994,
+      "step": 27620
+    },
+    {
+      "epoch": 0.05389726564975552,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004911770691575514,
+      "loss": 18.3586,
+      "step": 27640
+    },
+    {
+      "epoch": 0.05393626511838776,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004911705671266421,
+      "loss": 18.3836,
+      "step": 27660
+    },
+    {
+      "epoch": 0.053975264587020004,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004911640650957326,
+      "loss": 18.366,
+      "step": 27680
+    },
+    {
+      "epoch": 0.05401426405565224,
+      "grad_norm": 9.75,
+      "learning_rate": 0.0004911575630648233,
+      "loss": 18.4281,
+      "step": 27700
+    },
+    {
+      "epoch": 0.05405326352428448,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004911510610339139,
+      "loss": 18.3837,
+      "step": 27720
+    },
+    {
+      "epoch": 0.05409226299291672,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004911445590030046,
+      "loss": 18.4365,
+      "step": 27740
+    },
+    {
+      "epoch": 0.05413126246154896,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004911380569720952,
+      "loss": 18.357,
+      "step": 27760
+    },
+    {
+      "epoch": 0.0541702619301812,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004911315549411859,
+      "loss": 18.3518,
+      "step": 27780
+    },
+    {
+      "epoch": 0.05420926139881344,
+      "grad_norm": 9.6875,
+      "learning_rate": 0.0004911250529102766,
+      "loss": 18.421,
+      "step": 27800
+    },
+    {
+      "epoch": 0.054248260867445684,
+      "grad_norm": 10.0,
+      "learning_rate": 0.0004911185508793672,
+      "loss": 18.3555,
+      "step": 27820
+    },
+    {
+      "epoch": 0.05428726033607792,
+      "grad_norm": 7.84375,
+      "learning_rate": 0.0004911120488484579,
+      "loss": 18.3645,
+      "step": 27840
+    },
+    {
+      "epoch": 0.05432625980471016,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004911055468175485,
+      "loss": 18.3514,
+      "step": 27860
+    },
+    {
+      "epoch": 0.0543652592733424,
+      "grad_norm": 7.59375,
+      "learning_rate": 0.0004910990447866392,
+      "loss": 18.4433,
+      "step": 27880
+    },
+    {
+      "epoch": 0.05440425874197464,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004910925427557298,
+      "loss": 18.3955,
+      "step": 27900
+    },
+    {
+      "epoch": 0.05444325821060688,
+      "grad_norm": 7.65625,
+      "learning_rate": 0.0004910860407248204,
+      "loss": 18.3803,
+      "step": 27920
+    },
+    {
+      "epoch": 0.05448225767923912,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.000491079538693911,
+      "loss": 18.4133,
+      "step": 27940
+    },
+    {
+      "epoch": 0.05452125714787136,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004910730366630017,
+      "loss": 18.3317,
+      "step": 27960
+    },
+    {
+      "epoch": 0.0545602566165036,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.0004910665346320924,
+      "loss": 18.3971,
+      "step": 27980
+    },
+    {
+      "epoch": 0.05459925608513584,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.000491060032601183,
+      "loss": 18.3958,
+      "step": 28000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 2.058460925948802e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null