Training in progress, step 4000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +283 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d307d63e2bd5721810a84f6c0aded4fa80bef46ee1e253293ea2ea57f13b2b35
 size 1520630616

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ed7f5192373055df50388d1e8a342b0008cc7f264c290f2f40d0816847f2899
 size 1520630616

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec9a01e5ca2a705c9e40bcd141b1b30c981cdb1ecde544437140b15a37cec10c
 size 3041448587

 version https://git-lfs.github.com/spec/v1
+oid sha256:3b8be61aa4b411ba072b5dd099697cc18dd1215103eeea9cd79dbfb70d181d7a
 size 3041448587

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:181e60f1f01e165fe1ea237e5ea1bc5e876c0b1a74e9355ac894a44ab5895cb2
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:82f11385365889b74991a13277667854d4ee120983e8addb357d466767c0b9ff
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e91a2aad683dbb34b2ed1315719473de36da58f4dbb5b8ab53f09c5f23b65cac
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:3ac42a4d50be277865df4f8c22478009406dfd138fc6ebe8a41f41d644b86db8
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.02185792349726776,
   "eval_steps": 500,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -288,6 +288,286 @@
       "learning_rate": 0.000249875,
       "loss": 3.3105,
       "step": 2000
     }
   ],
   "logging_steps": 50,
@@ -307,7 +587,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.069483900796928e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0060874316939892,
   "eval_steps": 500,
+  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.000249875,
       "loss": 3.3105,
       "step": 2000
+    },
+    {
+      "epoch": 0.022404371584699455,
+      "grad_norm": 0.86328125,
+      "learning_rate": 0.000256125,
+      "loss": 3.25,
+      "step": 2050
+    },
+    {
+      "epoch": 0.022950819672131147,
+      "grad_norm": 0.9375,
+      "learning_rate": 0.00026237499999999997,
+      "loss": 3.1414,
+      "step": 2100
+    },
+    {
+      "epoch": 0.023497267759562842,
+      "grad_norm": 0.86328125,
+      "learning_rate": 0.000268625,
+      "loss": 3.1565,
+      "step": 2150
+    },
+    {
+      "epoch": 0.024043715846994537,
+      "grad_norm": 0.80859375,
+      "learning_rate": 0.000274875,
+      "loss": 3.1131,
+      "step": 2200
+    },
+    {
+      "epoch": 0.02459016393442623,
+      "grad_norm": 0.91015625,
+      "learning_rate": 0.00028112499999999996,
+      "loss": 3.0784,
+      "step": 2250
+    },
+    {
+      "epoch": 0.025136612021857924,
+      "grad_norm": 0.80859375,
+      "learning_rate": 0.000287375,
+      "loss": 3.0332,
+      "step": 2300
+    },
+    {
+      "epoch": 0.025683060109289616,
+      "grad_norm": 0.85546875,
+      "learning_rate": 0.000293625,
+      "loss": 3.0955,
+      "step": 2350
+    },
+    {
+      "epoch": 0.02622950819672131,
+      "grad_norm": 0.734375,
+      "learning_rate": 0.000299875,
+      "loss": 3.045,
+      "step": 2400
+    },
+    {
+      "epoch": 0.026775956284153007,
+      "grad_norm": 0.8359375,
+      "learning_rate": 0.0002999997761290961,
+      "loss": 2.995,
+      "step": 2450
+    },
+    {
+      "epoch": 0.0273224043715847,
+      "grad_norm": 0.75390625,
+      "learning_rate": 0.0002999990861486685,
+      "loss": 2.9428,
+      "step": 2500
+    },
+    {
+      "epoch": 0.027868852459016394,
+      "grad_norm": 0.71484375,
+      "learning_rate": 0.00029999792996762107,
+      "loss": 2.9131,
+      "step": 2550
+    },
+    {
+      "epoch": 0.02841530054644809,
+      "grad_norm": 0.88671875,
+      "learning_rate": 0.00029999630758954706,
+      "loss": 2.896,
+      "step": 2600
+    },
+    {
+      "epoch": 0.02896174863387978,
+      "grad_norm": 0.73828125,
+      "learning_rate": 0.000299994219019489,
+      "loss": 2.8605,
+      "step": 2650
+    },
+    {
+      "epoch": 0.029508196721311476,
+      "grad_norm": 0.66796875,
+      "learning_rate": 0.0002999916642639382,
+      "loss": 2.8407,
+      "step": 2700
+    },
+    {
+      "epoch": 0.030054644808743168,
+      "grad_norm": 0.68359375,
+      "learning_rate": 0.0002999886433308348,
+      "loss": 2.8313,
+      "step": 2750
+    },
+    {
+      "epoch": 0.030601092896174863,
+      "grad_norm": 0.69140625,
+      "learning_rate": 0.00029998515622956803,
+      "loss": 2.8194,
+      "step": 2800
+    },
+    {
+      "epoch": 0.03114754098360656,
+      "grad_norm": 0.61328125,
+      "learning_rate": 0.00029998120297097586,
+      "loss": 2.7874,
+      "step": 2850
+    },
+    {
+      "epoch": 0.03169398907103825,
+      "grad_norm": 0.67578125,
+      "learning_rate": 0.00029997678356734504,
+      "loss": 2.7631,
+      "step": 2900
+    },
+    {
+      "epoch": 0.03224043715846994,
+      "grad_norm": 0.6875,
+      "learning_rate": 0.0002999718980324113,
+      "loss": 2.7603,
+      "step": 2950
+    },
+    {
+      "epoch": 0.03278688524590164,
+      "grad_norm": 0.62890625,
+      "learning_rate": 0.0002999665463813589,
+      "loss": 2.7229,
+      "step": 3000
+    },
+    {
+      "epoch": 0.03333333333333333,
+      "grad_norm": 0.671875,
+      "learning_rate": 0.00029996072863082093,
+      "loss": 2.7895,
+      "step": 3050
+    },
+    {
+      "epoch": 0.033879781420765025,
+      "grad_norm": 0.91796875,
+      "learning_rate": 0.0002999544447988791,
+      "loss": 2.6505,
+      "step": 3100
+    },
+    {
+      "epoch": 0.03442622950819672,
+      "grad_norm": 0.60546875,
+      "learning_rate": 0.0002999476949050637,
+      "loss": 2.6744,
+      "step": 3150
+    },
+    {
+      "epoch": 0.034972677595628415,
+      "grad_norm": 0.59375,
+      "learning_rate": 0.0002999404789703535,
+      "loss": 2.6869,
+      "step": 3200
+    },
+    {
+      "epoch": 0.03551912568306011,
+      "grad_norm": 0.76953125,
+      "learning_rate": 0.0002999327970171759,
+      "loss": 2.6726,
+      "step": 3250
+    },
+    {
+      "epoch": 0.036065573770491806,
+      "grad_norm": 0.66015625,
+      "learning_rate": 0.0002999246490694065,
+      "loss": 2.6444,
+      "step": 3300
+    },
+    {
+      "epoch": 0.0366120218579235,
+      "grad_norm": 0.69921875,
+      "learning_rate": 0.0002999160351523693,
+      "loss": 2.6568,
+      "step": 3350
+    },
+    {
+      "epoch": 0.03715846994535519,
+      "grad_norm": 0.625,
+      "learning_rate": 0.00029990695529283665,
+      "loss": 2.6436,
+      "step": 3400
+    },
+    {
+      "epoch": 1.0000765027322405,
+      "grad_norm": 0.5390625,
+      "learning_rate": 0.00029989740951902885,
+      "loss": 2.6468,
+      "step": 3450
+    },
+    {
+      "epoch": 1.0006229508196722,
+      "grad_norm": 0.578125,
+      "learning_rate": 0.0002998873978606145,
+      "loss": 2.5703,
+      "step": 3500
+    },
+    {
+      "epoch": 1.0011693989071038,
+      "grad_norm": 0.6171875,
+      "learning_rate": 0.0002998769203487099,
+      "loss": 2.6321,
+      "step": 3550
+    },
+    {
+      "epoch": 1.0017158469945355,
+      "grad_norm": 0.62109375,
+      "learning_rate": 0.0002998659770158796,
+      "loss": 2.5518,
+      "step": 3600
+    },
+    {
+      "epoch": 1.0022622950819673,
+      "grad_norm": 0.6171875,
+      "learning_rate": 0.0002998545678961356,
+      "loss": 2.5255,
+      "step": 3650
+    },
+    {
+      "epoch": 1.0028087431693988,
+      "grad_norm": 0.66015625,
+      "learning_rate": 0.00029984269302493776,
+      "loss": 2.4976,
+      "step": 3700
+    },
+    {
+      "epoch": 1.0033551912568306,
+      "grad_norm": 0.58984375,
+      "learning_rate": 0.0002998303524391934,
+      "loss": 2.532,
+      "step": 3750
+    },
+    {
+      "epoch": 1.0039016393442624,
+      "grad_norm": 0.6484375,
+      "learning_rate": 0.00029981754617725747,
+      "loss": 2.5321,
+      "step": 3800
+    },
+    {
+      "epoch": 1.004448087431694,
+      "grad_norm": 0.53515625,
+      "learning_rate": 0.0002998042742789319,
+      "loss": 2.4924,
+      "step": 3850
+    },
+    {
+      "epoch": 1.0049945355191257,
+      "grad_norm": 0.6015625,
+      "learning_rate": 0.0002997905367854663,
+      "loss": 2.492,
+      "step": 3900
+    },
+    {
+      "epoch": 1.0055409836065574,
+      "grad_norm": 0.61328125,
+      "learning_rate": 0.00029977633373955696,
+      "loss": 2.5266,
+      "step": 3950
+    },
+    {
+      "epoch": 1.0060874316939892,
+      "grad_norm": 0.58984375,
+      "learning_rate": 0.00029976166518534735,
+      "loss": 2.4739,
+      "step": 4000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 2.1391181977674056e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null