Training in progress, step 22000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +283 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:375e4b9cfa9de09d8057f42e98dbc192a0866e06789a8ec7b0e9091572c996e2
 size 1520630616

 version https://git-lfs.github.com/spec/v1
+oid sha256:299b24fe69c89f19141b9f985a9ac826c3a53ad4e1b08b8aba5729be39c93c43
 size 1520630616

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cbebb029cca703c9435c8129fccc4b3f8d45e60881ec4b04f2d6acf25bec8c42
 size 3041448587

 version https://git-lfs.github.com/spec/v1
+oid sha256:2578fa210b28417d8f969fa905bceff91b35a10909b4f603355ac6d743992a10
 size 3041448587

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d849fd0ed2b4c55b2499c77003ae5987968969429cd3a8cafdd43ae46b463c3e
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:59dbdf3564f71a619277fad1d7b29f944b0a8aee767f1ee531e2a42c249a6709
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e17b5806922786f9c39beaa8475b0a348452b2fd43fba768c2f31b4cb13e074a
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5b97fc3e9888373aed6e862ae95add028b1c9773804bea656915decaab6270d
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 5.030437158469946,
   "eval_steps": 500,
-  "global_step": 20000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2808,6 +2808,286 @@
       "learning_rate": 0.0002720358859033514,
       "loss": 1.6249,
       "step": 20000
     }
   ],
   "logging_steps": 50,
@@ -2827,7 +3107,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.0695590988837028e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 6.014666666666667,
   "eval_steps": 500,
+  "global_step": 22000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0002720358859033514,
       "loss": 1.6249,
       "step": 20000
+    },
+    {
+      "epoch": 5.030983606557377,
+      "grad_norm": 0.59375,
+      "learning_rate": 0.00027188193193538625,
+      "loss": 1.6114,
+      "step": 20050
+    },
+    {
+      "epoch": 5.031530054644809,
+      "grad_norm": 0.671875,
+      "learning_rate": 0.00027172759915556504,
+      "loss": 1.6081,
+      "step": 20100
+    },
+    {
+      "epoch": 5.032076502732241,
+      "grad_norm": 0.62890625,
+      "learning_rate": 0.0002715728880435577,
+      "loss": 1.627,
+      "step": 20150
+    },
+    {
+      "epoch": 5.032622950819672,
+      "grad_norm": 0.62890625,
+      "learning_rate": 0.00027141779908020986,
+      "loss": 1.5912,
+      "step": 20200
+    },
+    {
+      "epoch": 5.033169398907104,
+      "grad_norm": 0.65234375,
+      "learning_rate": 0.00027126233274754163,
+      "loss": 1.6476,
+      "step": 20250
+    },
+    {
+      "epoch": 5.033715846994536,
+      "grad_norm": 0.59375,
+      "learning_rate": 0.00027110648952874595,
+      "loss": 1.6051,
+      "step": 20300
+    },
+    {
+      "epoch": 5.034262295081967,
+      "grad_norm": 0.65234375,
+      "learning_rate": 0.0002709502699081871,
+      "loss": 1.5705,
+      "step": 20350
+    },
+    {
+      "epoch": 5.034808743169399,
+      "grad_norm": 0.578125,
+      "learning_rate": 0.00027079367437139935,
+      "loss": 1.6533,
+      "step": 20400
+    },
+    {
+      "epoch": 5.035355191256831,
+      "grad_norm": 0.59765625,
+      "learning_rate": 0.00027063670340508514,
+      "loss": 1.6099,
+      "step": 20450
+    },
+    {
+      "epoch": 5.035901639344262,
+      "grad_norm": 0.5703125,
+      "learning_rate": 0.00027047935749711395,
+      "loss": 1.6018,
+      "step": 20500
+    },
+    {
+      "epoch": 5.036448087431694,
+      "grad_norm": 0.59375,
+      "learning_rate": 0.0002703216371365204,
+      "loss": 1.637,
+      "step": 20550
+    },
+    {
+      "epoch": 5.036994535519126,
+      "grad_norm": 0.56640625,
+      "learning_rate": 0.00027016354281350315,
+      "loss": 1.6394,
+      "step": 20600
+    },
+    {
+      "epoch": 5.037540983606557,
+      "grad_norm": 0.6015625,
+      "learning_rate": 0.00027000507501942283,
+      "loss": 1.6253,
+      "step": 20650
+    },
+    {
+      "epoch": 6.000459016393442,
+      "grad_norm": 0.57421875,
+      "learning_rate": 0.0002698462342468011,
+      "loss": 1.5897,
+      "step": 20700
+    },
+    {
+      "epoch": 6.001005464480874,
+      "grad_norm": 0.56640625,
+      "learning_rate": 0.0002696870209893187,
+      "loss": 1.569,
+      "step": 20750
+    },
+    {
+      "epoch": 6.001551912568306,
+      "grad_norm": 0.59765625,
+      "learning_rate": 0.00026952743574181414,
+      "loss": 1.5482,
+      "step": 20800
+    },
+    {
+      "epoch": 6.002098360655737,
+      "grad_norm": 0.59765625,
+      "learning_rate": 0.00026936747900028205,
+      "loss": 1.5458,
+      "step": 20850
+    },
+    {
+      "epoch": 6.002644808743169,
+      "grad_norm": 0.734375,
+      "learning_rate": 0.00026920715126187167,
+      "loss": 1.4787,
+      "step": 20900
+    },
+    {
+      "epoch": 6.003191256830601,
+      "grad_norm": 0.7109375,
+      "learning_rate": 0.0002690464530248853,
+      "loss": 1.5565,
+      "step": 20950
+    },
+    {
+      "epoch": 6.0037377049180325,
+      "grad_norm": 0.64453125,
+      "learning_rate": 0.00026888538478877675,
+      "loss": 1.5588,
+      "step": 21000
+    },
+    {
+      "epoch": 6.0042841530054645,
+      "grad_norm": 0.6953125,
+      "learning_rate": 0.0002687239470541498,
+      "loss": 1.5347,
+      "step": 21050
+    },
+    {
+      "epoch": 6.0048306010928965,
+      "grad_norm": 0.58984375,
+      "learning_rate": 0.00026856214032275675,
+      "loss": 1.5341,
+      "step": 21100
+    },
+    {
+      "epoch": 6.0053770491803276,
+      "grad_norm": 0.73046875,
+      "learning_rate": 0.00026839996509749655,
+      "loss": 1.5441,
+      "step": 21150
+    },
+    {
+      "epoch": 6.0059234972677595,
+      "grad_norm": 0.58984375,
+      "learning_rate": 0.00026823742188241366,
+      "loss": 1.5405,
+      "step": 21200
+    },
+    {
+      "epoch": 6.0064699453551915,
+      "grad_norm": 0.64453125,
+      "learning_rate": 0.000268074511182696,
+      "loss": 1.5327,
+      "step": 21250
+    },
+    {
+      "epoch": 6.007016393442623,
+      "grad_norm": 0.61328125,
+      "learning_rate": 0.00026791123350467384,
+      "loss": 1.5338,
+      "step": 21300
+    },
+    {
+      "epoch": 6.007562841530055,
+      "grad_norm": 0.58984375,
+      "learning_rate": 0.000267747589355818,
+      "loss": 1.5663,
+      "step": 21350
+    },
+    {
+      "epoch": 6.008109289617487,
+      "grad_norm": 0.67578125,
+      "learning_rate": 0.0002675835792447382,
+      "loss": 1.5519,
+      "step": 21400
+    },
+    {
+      "epoch": 6.008655737704918,
+      "grad_norm": 0.66015625,
+      "learning_rate": 0.0002674192036811818,
+      "loss": 1.5256,
+      "step": 21450
+    },
+    {
+      "epoch": 6.00920218579235,
+      "grad_norm": 0.578125,
+      "learning_rate": 0.0002672544631760317,
+      "loss": 1.5428,
+      "step": 21500
+    },
+    {
+      "epoch": 6.009748633879782,
+      "grad_norm": 0.75,
+      "learning_rate": 0.00026708935824130514,
+      "loss": 1.5806,
+      "step": 21550
+    },
+    {
+      "epoch": 6.010295081967213,
+      "grad_norm": 0.57421875,
+      "learning_rate": 0.00026692388939015226,
+      "loss": 1.5628,
+      "step": 21600
+    },
+    {
+      "epoch": 6.010841530054645,
+      "grad_norm": 0.61328125,
+      "learning_rate": 0.00026675805713685387,
+      "loss": 1.5275,
+      "step": 21650
+    },
+    {
+      "epoch": 6.011387978142077,
+      "grad_norm": 0.61328125,
+      "learning_rate": 0.0002665918619968206,
+      "loss": 1.5547,
+      "step": 21700
+    },
+    {
+      "epoch": 6.011934426229508,
+      "grad_norm": 0.57421875,
+      "learning_rate": 0.0002664253044865907,
+      "loss": 1.575,
+      "step": 21750
+    },
+    {
+      "epoch": 6.01248087431694,
+      "grad_norm": 0.59765625,
+      "learning_rate": 0.0002662583851238287,
+      "loss": 1.5386,
+      "step": 21800
+    },
+    {
+      "epoch": 6.013027322404372,
+      "grad_norm": 0.6484375,
+      "learning_rate": 0.000266091104427324,
+      "loss": 1.5107,
+      "step": 21850
+    },
+    {
+      "epoch": 6.013573770491803,
+      "grad_norm": 0.625,
+      "learning_rate": 0.00026592346291698864,
+      "loss": 1.5516,
+      "step": 21900
+    },
+    {
+      "epoch": 6.014120218579235,
+      "grad_norm": 0.5703125,
+      "learning_rate": 0.00026575546111385647,
+      "loss": 1.5431,
+      "step": 21950
+    },
+    {
+      "epoch": 6.014666666666667,
+      "grad_norm": 0.6015625,
+      "learning_rate": 0.00026558709954008095,
+      "loss": 1.566,
+      "step": 22000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 1.1765225285807505e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null