Training in progress, step 550, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.bin +1 -1
last-checkpoint/pytorch_model_fsdp.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +157 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1b3e41169f1e067e1d0fc5b433e28209599b88ec7a1f300f0d620bd5f1fea11
 size 1039483968

 version https://git-lfs.github.com/spec/v1
+oid sha256:098e670d3e5545c295765450469b25f278e85309584bd5521b048fbfd0738e33
 size 1039483968

last-checkpoint/optimizer.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b38c5653577dce8d43daffaa1d997a170a0534000c5adda1ce07181b3ae0588
 size 2079349513

 version https://git-lfs.github.com/spec/v1
+oid sha256:db8c189ebc2276dbc5a8793affc7b015267813ce7ff1c1cecdb692a76ff78107
 size 2079349513

last-checkpoint/pytorch_model_fsdp.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3084904f376c934f6e1074299150e1257b4859fec0f98e9e5fc02fd4fd0c9969
 size 1039629811

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a1777f717103649abb1d286d70ad5ded7dcba9897f65a6565e80193aecd999f
 size 1039629811

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:441648197c9c043681f2314411742b150ffff3ca10b3a89fc2e21d86709ea05c
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:ccea8299800024b78e5d1ec043b4c36fae734e757f1a43a7a2255a29b414dc4f
 size 14917

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c4728894dd7fa940d14c3112093f8d6d68941114d14e3f2945f3d934cb2228f
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:42978a3ff9e161a48be56df954484177012b3d081b4eedca7e2148205852aca3
 size 14917

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43af7aa6b99f5741e0bd6f3de2d6923856f8afd0580427a79e4f0846abad72ae
 size 1529

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8993fe857a67f14a1c28a148c1bef4b188f7d1af8900ff5c015c57364dd79df
 size 1529

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.2025604551920341,
   "eval_steps": 88,
-  "global_step": 528,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3760,6 +3760,160 @@
       "eval_samples_per_second": 3.275,
       "eval_steps_per_second": 0.819,
       "step": 528
     }
   ],
   "logging_steps": 1,
@@ -3779,7 +3933,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.4403370387041681e+19,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.2526315789473683,
   "eval_steps": 88,
+  "global_step": 550,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.275,
       "eval_steps_per_second": 0.819,
       "step": 528
+    },
+    {
+      "epoch": 1.2048364153627311,
+      "grad_norm": 0.4168463349342346,
+      "learning_rate": 8.964330256921927e-06,
+      "loss": 0.4631,
+      "step": 529
+    },
+    {
+      "epoch": 1.207112375533428,
+      "grad_norm": 0.8588109612464905,
+      "learning_rate": 8.960000000000002e-06,
+      "loss": 0.5207,
+      "step": 530
+    },
+    {
+      "epoch": 1.209388335704125,
+      "grad_norm": 0.5915808081626892,
+      "learning_rate": 8.955650212979206e-06,
+      "loss": 0.5108,
+      "step": 531
+    },
+    {
+      "epoch": 1.2116642958748223,
+      "grad_norm": 0.47360390424728394,
+      "learning_rate": 8.951280763435461e-06,
+      "loss": 0.5034,
+      "step": 532
+    },
+    {
+      "epoch": 1.2139402560455193,
+      "grad_norm": 0.39106452465057373,
+      "learning_rate": 8.946891517744779e-06,
+      "loss": 0.5013,
+      "step": 533
+    },
+    {
+      "epoch": 1.2162162162162162,
+      "grad_norm": 0.9861881136894226,
+      "learning_rate": 8.942482341069628e-06,
+      "loss": 0.4678,
+      "step": 534
+    },
+    {
+      "epoch": 1.2184921763869132,
+      "grad_norm": 0.4086526036262512,
+      "learning_rate": 8.938053097345134e-06,
+      "loss": 0.4737,
+      "step": 535
+    },
+    {
+      "epoch": 1.2207681365576102,
+      "grad_norm": 0.7135105729103088,
+      "learning_rate": 8.93360364926508e-06,
+      "loss": 0.4806,
+      "step": 536
+    },
+    {
+      "epoch": 1.2230440967283072,
+      "grad_norm": 0.46862491965293884,
+      "learning_rate": 8.929133858267717e-06,
+      "loss": 0.5002,
+      "step": 537
+    },
+    {
+      "epoch": 1.2253200568990044,
+      "grad_norm": 0.41207781434059143,
+      "learning_rate": 8.924643584521386e-06,
+      "loss": 0.4633,
+      "step": 538
+    },
+    {
+      "epoch": 1.2275960170697013,
+      "grad_norm": 0.5379470586776733,
+      "learning_rate": 8.920132686909926e-06,
+      "loss": 0.5418,
+      "step": 539
+    },
+    {
+      "epoch": 1.2298719772403983,
+      "grad_norm": 0.6593378782272339,
+      "learning_rate": 8.915601023017903e-06,
+      "loss": 0.5227,
+      "step": 540
+    },
+    {
+      "epoch": 1.2321479374110953,
+      "grad_norm": 0.5798733234405518,
+      "learning_rate": 8.911048449115613e-06,
+      "loss": 0.4734,
+      "step": 541
+    },
+    {
+      "epoch": 1.2344238975817923,
+      "grad_norm": 0.3299521803855896,
+      "learning_rate": 8.906474820143887e-06,
+      "loss": 0.4953,
+      "step": 542
+    },
+    {
+      "epoch": 1.2366998577524893,
+      "grad_norm": 0.7939162850379944,
+      "learning_rate": 8.901879989698689e-06,
+      "loss": 0.5127,
+      "step": 543
+    },
+    {
+      "epoch": 1.2389758179231865,
+      "grad_norm": 0.4086650311946869,
+      "learning_rate": 8.897263810015489e-06,
+      "loss": 0.5214,
+      "step": 544
+    },
+    {
+      "epoch": 1.2412517780938834,
+      "grad_norm": 0.589647650718689,
+      "learning_rate": 8.892626131953428e-06,
+      "loss": 0.5008,
+      "step": 545
+    },
+    {
+      "epoch": 1.2435277382645804,
+      "grad_norm": 0.3771437704563141,
+      "learning_rate": 8.887966804979256e-06,
+      "loss": 0.5281,
+      "step": 546
+    },
+    {
+      "epoch": 1.2458036984352774,
+      "grad_norm": 1.033705472946167,
+      "learning_rate": 8.883285677151027e-06,
+      "loss": 0.5148,
+      "step": 547
+    },
+    {
+      "epoch": 1.2480796586059744,
+      "grad_norm": 0.5085345506668091,
+      "learning_rate": 8.878582595101617e-06,
+      "loss": 0.5196,
+      "step": 548
+    },
+    {
+      "epoch": 1.2503556187766713,
+      "grad_norm": 0.2777687609195709,
+      "learning_rate": 8.873857404021936e-06,
+      "loss": 0.509,
+      "step": 549
+    },
+    {
+      "epoch": 1.2526315789473683,
+      "grad_norm": 0.5570908784866333,
+      "learning_rate": 8.869109947643979e-06,
+      "loss": 0.4756,
+      "step": 550
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.5003226796869026e+19,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null