Training in progress, step 24000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +283 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:299b24fe69c89f19141b9f985a9ac826c3a53ad4e1b08b8aba5729be39c93c43
 size 1520630616

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a5e5a99de9ebd5a605eb747b364712a2371d3faff976fb42a4e2c4eff124586
 size 1520630616

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2578fa210b28417d8f969fa905bceff91b35a10909b4f603355ac6d743992a10
 size 3041448587

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b90f9e040c4d2727932a8fb5449e7e222d2b34c261ceee7f00e20f78d73acb5
 size 3041448587

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:59dbdf3564f71a619277fad1d7b29f944b0a8aee767f1ee531e2a42c249a6709
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf31faff1a59206513a6140313f60a81b0b7bbfaceaf131da05eee348e2a75b6
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c5b97fc3e9888373aed6e862ae95add028b1c9773804bea656915decaab6270d
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:a5b896146d5d8a1fece26c83d1cdd06bac435f33fada598258a6302b90095e53
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 6.014666666666667,
   "eval_steps": 500,
-  "global_step": 22000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3088,6 +3088,286 @@
       "learning_rate": 0.00026558709954008095,
       "loss": 1.566,
       "step": 22000
     }
   ],
   "logging_steps": 50,
@@ -3107,7 +3387,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.1765225285807505e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 6.036524590163935,
   "eval_steps": 500,
+  "global_step": 24000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00026558709954008095,
       "loss": 1.566,
       "step": 22000
+    },
+    {
+      "epoch": 6.015213114754098,
+      "grad_norm": 0.62890625,
+      "learning_rate": 0.00026541837871893367,
+      "loss": 1.5595,
+      "step": 22050
+    },
+    {
+      "epoch": 6.01575956284153,
+      "grad_norm": 0.5625,
+      "learning_rate": 0.0002652492991748029,
+      "loss": 1.5206,
+      "step": 22100
+    },
+    {
+      "epoch": 6.016306010928962,
+      "grad_norm": 0.640625,
+      "learning_rate": 0.00026507986143319164,
+      "loss": 1.5374,
+      "step": 22150
+    },
+    {
+      "epoch": 6.016852459016394,
+      "grad_norm": 0.703125,
+      "learning_rate": 0.0002649100660207164,
+      "loss": 1.522,
+      "step": 22200
+    },
+    {
+      "epoch": 6.017398907103825,
+      "grad_norm": 0.65625,
+      "learning_rate": 0.0002647399134651053,
+      "loss": 1.5532,
+      "step": 22250
+    },
+    {
+      "epoch": 6.017945355191257,
+      "grad_norm": 0.63671875,
+      "learning_rate": 0.0002645694042951963,
+      "loss": 1.5274,
+      "step": 22300
+    },
+    {
+      "epoch": 6.018491803278689,
+      "grad_norm": 0.65625,
+      "learning_rate": 0.00026439853904093586,
+      "loss": 1.517,
+      "step": 22350
+    },
+    {
+      "epoch": 6.01903825136612,
+      "grad_norm": 0.6875,
+      "learning_rate": 0.00026422731823337717,
+      "loss": 1.5197,
+      "step": 22400
+    },
+    {
+      "epoch": 6.019584699453552,
+      "grad_norm": 0.59765625,
+      "learning_rate": 0.0002640557424046784,
+      "loss": 1.5081,
+      "step": 22450
+    },
+    {
+      "epoch": 6.020131147540984,
+      "grad_norm": 0.65234375,
+      "learning_rate": 0.0002638838120881012,
+      "loss": 1.5526,
+      "step": 22500
+    },
+    {
+      "epoch": 6.020677595628415,
+      "grad_norm": 0.62109375,
+      "learning_rate": 0.000263711527818009,
+      "loss": 1.534,
+      "step": 22550
+    },
+    {
+      "epoch": 6.021224043715847,
+      "grad_norm": 0.60546875,
+      "learning_rate": 0.0002635388901298652,
+      "loss": 1.5156,
+      "step": 22600
+    },
+    {
+      "epoch": 6.021770491803279,
+      "grad_norm": 0.71875,
+      "learning_rate": 0.0002633658995602318,
+      "loss": 1.5402,
+      "step": 22650
+    },
+    {
+      "epoch": 6.02231693989071,
+      "grad_norm": 0.68359375,
+      "learning_rate": 0.0002631925566467674,
+      "loss": 1.5367,
+      "step": 22700
+    },
+    {
+      "epoch": 6.022863387978142,
+      "grad_norm": 0.62890625,
+      "learning_rate": 0.00026301886192822585,
+      "loss": 1.5126,
+      "step": 22750
+    },
+    {
+      "epoch": 6.023409836065574,
+      "grad_norm": 0.625,
+      "learning_rate": 0.00026284481594445434,
+      "loss": 1.5097,
+      "step": 22800
+    },
+    {
+      "epoch": 6.023956284153005,
+      "grad_norm": 0.60546875,
+      "learning_rate": 0.00026267041923639175,
+      "loss": 1.5058,
+      "step": 22850
+    },
+    {
+      "epoch": 6.024502732240437,
+      "grad_norm": 0.703125,
+      "learning_rate": 0.00026249567234606707,
+      "loss": 1.5004,
+      "step": 22900
+    },
+    {
+      "epoch": 6.025049180327869,
+      "grad_norm": 0.60546875,
+      "learning_rate": 0.00026232057581659777,
+      "loss": 1.4884,
+      "step": 22950
+    },
+    {
+      "epoch": 6.0255956284153,
+      "grad_norm": 0.65234375,
+      "learning_rate": 0.0002621451301921878,
+      "loss": 1.5884,
+      "step": 23000
+    },
+    {
+      "epoch": 6.026142076502732,
+      "grad_norm": 0.6484375,
+      "learning_rate": 0.00026196933601812616,
+      "loss": 1.565,
+      "step": 23050
+    },
+    {
+      "epoch": 6.026688524590164,
+      "grad_norm": 0.69140625,
+      "learning_rate": 0.00026179319384078535,
+      "loss": 1.5399,
+      "step": 23100
+    },
+    {
+      "epoch": 6.027234972677595,
+      "grad_norm": 0.57421875,
+      "learning_rate": 0.0002616167042076192,
+      "loss": 1.5319,
+      "step": 23150
+    },
+    {
+      "epoch": 6.027781420765027,
+      "grad_norm": 0.62890625,
+      "learning_rate": 0.0002614398676671616,
+      "loss": 1.5379,
+      "step": 23200
+    },
+    {
+      "epoch": 6.028327868852459,
+      "grad_norm": 0.69921875,
+      "learning_rate": 0.0002612626847690247,
+      "loss": 1.5344,
+      "step": 23250
+    },
+    {
+      "epoch": 6.02887431693989,
+      "grad_norm": 0.59765625,
+      "learning_rate": 0.0002610851560638968,
+      "loss": 1.5054,
+      "step": 23300
+    },
+    {
+      "epoch": 6.029420765027322,
+      "grad_norm": 0.6484375,
+      "learning_rate": 0.0002609072821035415,
+      "loss": 1.5421,
+      "step": 23350
+    },
+    {
+      "epoch": 6.029967213114754,
+      "grad_norm": 0.69921875,
+      "learning_rate": 0.00026072906344079484,
+      "loss": 1.5625,
+      "step": 23400
+    },
+    {
+      "epoch": 6.0305136612021855,
+      "grad_norm": 0.59765625,
+      "learning_rate": 0.0002605505006295648,
+      "loss": 1.5476,
+      "step": 23450
+    },
+    {
+      "epoch": 6.031060109289617,
+      "grad_norm": 0.609375,
+      "learning_rate": 0.00026037159422482865,
+      "loss": 1.537,
+      "step": 23500
+    },
+    {
+      "epoch": 6.031606557377049,
+      "grad_norm": 0.60546875,
+      "learning_rate": 0.00026019234478263155,
+      "loss": 1.5204,
+      "step": 23550
+    },
+    {
+      "epoch": 6.0321530054644805,
+      "grad_norm": 0.69921875,
+      "learning_rate": 0.000260012752860085,
+      "loss": 1.5479,
+      "step": 23600
+    },
+    {
+      "epoch": 6.0326994535519125,
+      "grad_norm": 0.6171875,
+      "learning_rate": 0.00025983281901536474,
+      "loss": 1.5304,
+      "step": 23650
+    },
+    {
+      "epoch": 6.0332459016393445,
+      "grad_norm": 0.6171875,
+      "learning_rate": 0.00025965254380770945,
+      "loss": 1.5738,
+      "step": 23700
+    },
+    {
+      "epoch": 6.033792349726776,
+      "grad_norm": 0.62109375,
+      "learning_rate": 0.0002594719277974185,
+      "loss": 1.5168,
+      "step": 23750
+    },
+    {
+      "epoch": 6.034338797814208,
+      "grad_norm": 0.6328125,
+      "learning_rate": 0.0002592909715458506,
+      "loss": 1.4984,
+      "step": 23800
+    },
+    {
+      "epoch": 6.0348852459016395,
+      "grad_norm": 0.57421875,
+      "learning_rate": 0.0002591096756154221,
+      "loss": 1.5721,
+      "step": 23850
+    },
+    {
+      "epoch": 6.035431693989071,
+      "grad_norm": 0.6015625,
+      "learning_rate": 0.0002589280405696048,
+      "loss": 1.5369,
+      "step": 23900
+    },
+    {
+      "epoch": 6.035978142076503,
+      "grad_norm": 0.61328125,
+      "learning_rate": 0.00025874606697292473,
+      "loss": 1.5236,
+      "step": 23950
+    },
+    {
+      "epoch": 6.036524590163935,
+      "grad_norm": 0.6953125,
+      "learning_rate": 0.00025856375539095986,
+      "loss": 1.577,
+      "step": 24000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 1.2834709186604433e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null