Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +2 -2
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d98e97b81b6c1fe48cad320f2a890e7e61e83dfff7d26d334198e8a74f852b74
 size 194563400

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0ddf70e3b717c4942eb29e5cb233383ecd1afc20c58cc985636fa0e06d5b136
 size 194563400

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d6595f37c512d7abd41b810217cfc99365ebf48ebe28d646eced18519db33b8
 size 99240837

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b1d2da135690fff7c0e120e786351cf9750b8ea31f819ca37b532af0ea60ef3
 size 99240837

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f374b74741f6041ab3fbc6fa2f1ad297bd742776ae9f7be70ea73be713dbbd9f
-size 14581

 version https://git-lfs.github.com/spec/v1
+oid sha256:11ed94202eb6e3eaeb8f032cfd9fd46e9b4657a59638d69479f047c360d252a9
+size 14709

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ef0a7b786a9a7cce22464cb50d85b3bb30f4b7314d78a4eeb6d98db65c58909
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:95f3a67d47428d0f6084a0c2e68b54a6b89dcabf900532b585b519c3b42aa7fc
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2382015780854548,
   "eval_steps": 500,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -77,6 +77,76 @@
       "learning_rate": 8.90915741234015e-05,
       "loss": 1.0931,
       "step": 100
     }
   ],
   "logging_steps": 10,
@@ -96,7 +166,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.45623326670848e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.4764031561709096,
   "eval_steps": 500,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.90915741234015e-05,
       "loss": 1.0931,
       "step": 100
+    },
+    {
+      "epoch": 0.2620217358940003,
+      "grad_norm": 0.09928414970636368,
+      "learning_rate": 8.656475314362148e-05,
+      "loss": 1.0993,
+      "step": 110
+    },
+    {
+      "epoch": 0.28584189370254576,
+      "grad_norm": 0.1041741743683815,
+      "learning_rate": 8.3819108836604e-05,
+      "loss": 1.0937,
+      "step": 120
+    },
+    {
+      "epoch": 0.3096620515110913,
+      "grad_norm": 0.10851814597845078,
+      "learning_rate": 8.087107262799855e-05,
+      "loss": 1.0663,
+      "step": 130
+    },
+    {
+      "epoch": 0.33348220931963674,
+      "grad_norm": 0.10271850228309631,
+      "learning_rate": 7.773828716643591e-05,
+      "loss": 1.0555,
+      "step": 140
+    },
+    {
+      "epoch": 0.3573023671281822,
+      "grad_norm": 0.11661435663700104,
+      "learning_rate": 7.443950074034368e-05,
+      "loss": 1.0421,
+      "step": 150
+    },
+    {
+      "epoch": 0.3811225249367277,
+      "grad_norm": 0.1197165921330452,
+      "learning_rate": 7.099445507801323e-05,
+      "loss": 1.0214,
+      "step": 160
+    },
+    {
+      "epoch": 0.4049426827452732,
+      "grad_norm": 0.1095174252986908,
+      "learning_rate": 6.742376720238346e-05,
+      "loss": 1.0386,
+      "step": 170
+    },
+    {
+      "epoch": 0.4287628405538187,
+      "grad_norm": 0.10334830731153488,
+      "learning_rate": 6.374880604758615e-05,
+      "loss": 1.0204,
+      "step": 180
+    },
+    {
+      "epoch": 0.45258299836236415,
+      "grad_norm": 0.10270854085683823,
+      "learning_rate": 5.9991564575646855e-05,
+      "loss": 1.0138,
+      "step": 190
+    },
+    {
+      "epoch": 0.4764031561709096,
+      "grad_norm": 0.10575564205646515,
+      "learning_rate": 5.6174528158664096e-05,
+      "loss": 1.0359,
+      "step": 200
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.691246653341696e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null