Training in progress, step 29560, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +158 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:198cf9e2f7b9b672052b35384c96e423fbbed0604b744e435fcce6f796ded998
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:44d8b9c5f0b247c388d0afbe8907171a0d81b141a933b1e9361f9bbf2d35e204
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:595036a57cc0d3ee525f8dc47ce9b2dac7c500c9adb6353185e143c6de89824a
 size 168150738

 version https://git-lfs.github.com/spec/v1
+oid sha256:f82ac87fc2dd1538493c584b6bfaa34aacaf4f8dd89f4d5c843f7d1407dde93d
 size 168150738

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e3c64eefa3f7dd16ade383bee02b051776ec0d884de60d623c9f3616c93f904
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef6ee5c960556f49ab0201cef6ec598647c83cf5eebbbd5fbf9582e268f90127
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72573ab543ab6543a4c72300875bab9524df97382858e2308190d2fdd5358e35
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7c7fd32361ba3725f77c3361e30c891765755fcbe92da53fa4ea9fbbad5a7c8
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.99702300405954,
   "eval_steps": 3282,
-  "global_step": 29538,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -206953,6 +206953,160 @@
       "eval_test_samples_per_second": 12.934,
       "eval_test_steps_per_second": 0.809,
       "step": 29538
     }
   ],
   "logging_steps": 1,
@@ -206967,12 +207121,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.209966655963202e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.0,
   "eval_steps": 3282,
+  "global_step": 29560,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_test_samples_per_second": 12.934,
       "eval_test_steps_per_second": 0.809,
       "step": 29538
+    },
+    {
+      "epoch": 3.9971583220568334,
+      "grad_norm": 1.4479291439056396,
+      "learning_rate": 3.0004566050305867e-06,
+      "loss": 1.7956,
+      "step": 29539
+    },
+    {
+      "epoch": 3.997293640054127,
+      "grad_norm": 1.3905441761016846,
+      "learning_rate": 3.000414154242476e-06,
+      "loss": 1.8403,
+      "step": 29540
+    },
+    {
+      "epoch": 3.9974289580514206,
+      "grad_norm": 1.563751220703125,
+      "learning_rate": 3.0003737742207762e-06,
+      "loss": 1.9486,
+      "step": 29541
+    },
+    {
+      "epoch": 3.9975642760487142,
+      "grad_norm": 1.3970868587493896,
+      "learning_rate": 3.000335464966031e-06,
+      "loss": 1.9456,
+      "step": 29542
+    },
+    {
+      "epoch": 3.9976995940460083,
+      "grad_norm": 1.3398752212524414,
+      "learning_rate": 3.000299226478785e-06,
+      "loss": 1.8753,
+      "step": 29543
+    },
+    {
+      "epoch": 3.997834912043302,
+      "grad_norm": 1.3150476217269897,
+      "learning_rate": 3.0002650587595492e-06,
+      "loss": 1.9388,
+      "step": 29544
+    },
+    {
+      "epoch": 3.9979702300405955,
+      "grad_norm": 1.5918689966201782,
+      "learning_rate": 3.0002329618087684e-06,
+      "loss": 1.917,
+      "step": 29545
+    },
+    {
+      "epoch": 3.998105548037889,
+      "grad_norm": 1.2473838329315186,
+      "learning_rate": 3.0002029356269215e-06,
+      "loss": 1.9181,
+      "step": 29546
+    },
+    {
+      "epoch": 3.9982408660351827,
+      "grad_norm": 1.546493649482727,
+      "learning_rate": 3.000174980214419e-06,
+      "loss": 2.154,
+      "step": 29547
+    },
+    {
+      "epoch": 3.9983761840324763,
+      "grad_norm": 1.1951613426208496,
+      "learning_rate": 3.0001490955716424e-06,
+      "loss": 1.7965,
+      "step": 29548
+    },
+    {
+      "epoch": 3.99851150202977,
+      "grad_norm": 1.4026210308074951,
+      "learning_rate": 3.000125281698969e-06,
+      "loss": 1.9001,
+      "step": 29549
+    },
+    {
+      "epoch": 3.9986468200270635,
+      "grad_norm": 1.2616246938705444,
+      "learning_rate": 3.000103538596713e-06,
+      "loss": 1.9108,
+      "step": 29550
+    },
+    {
+      "epoch": 3.998782138024357,
+      "grad_norm": 1.3047972917556763,
+      "learning_rate": 3.000083866265187e-06,
+      "loss": 1.9664,
+      "step": 29551
+    },
+    {
+      "epoch": 3.9989174560216507,
+      "grad_norm": 1.5460106134414673,
+      "learning_rate": 3.0000662647046716e-06,
+      "loss": 2.0249,
+      "step": 29552
+    },
+    {
+      "epoch": 3.9990527740189448,
+      "grad_norm": 1.267030119895935,
+      "learning_rate": 3.0000507339153984e-06,
+      "loss": 1.9507,
+      "step": 29553
+    },
+    {
+      "epoch": 3.9991880920162384,
+      "grad_norm": 1.4216227531433105,
+      "learning_rate": 3.000037273897597e-06,
+      "loss": 1.9319,
+      "step": 29554
+    },
+    {
+      "epoch": 3.999323410013532,
+      "grad_norm": 1.4820796251296997,
+      "learning_rate": 3.0000258846514327e-06,
+      "loss": 2.0095,
+      "step": 29555
+    },
+    {
+      "epoch": 3.9994587280108256,
+      "grad_norm": 1.5175132751464844,
+      "learning_rate": 3.000016566177087e-06,
+      "loss": 1.853,
+      "step": 29556
+    },
+    {
+      "epoch": 3.999594046008119,
+      "grad_norm": 1.2719131708145142,
+      "learning_rate": 3.000009318474692e-06,
+      "loss": 1.7825,
+      "step": 29557
+    },
+    {
+      "epoch": 3.999729364005413,
+      "grad_norm": 1.266430377960205,
+      "learning_rate": 3.000004141544329e-06,
+      "loss": 1.7838,
+      "step": 29558
+    },
+    {
+      "epoch": 3.9998646820027064,
+      "grad_norm": 1.3060740232467651,
+      "learning_rate": 3.0000010353860824e-06,
+      "loss": 1.8332,
+      "step": 29559
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 3.18343448638916,
+      "learning_rate": 2.9999999999999997e-06,
+      "loss": 1.9557,
+      "step": 29560
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.215024419447243e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null