Training in progress, step 594, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +130 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:324b2ab791b80076dbf4c8d3f97f114006208a69a2652ad5188a7e7e501a6879
 size 2147605960

 version https://git-lfs.github.com/spec/v1
+oid sha256:44ec759cdb5f06b9e582baac1d961ceb3d11fcd7f8b6bcd68bb72968ad4fc853
 size 2147605960

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e52928f7d960a14ccf21a39ad4b199d3b7b559526f1790f2d09fe590dfa86ee
 size 1091573733

 version https://git-lfs.github.com/spec/v1
+oid sha256:e81b8e84d0a507cddcee392d3cd6e9ef9a76ed6e9f3e1b078a9c884d9902f9fd
 size 1091573733

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:353dcb334997a0dcc8538fc5cb1c6093fe5eb610323591c3e82b29a32637f485
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:04541f08bf69ac7fe480c21fd1822bf1bcd1879399508271123652fdf993af0e
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9bb81aea6c6946ec583b15472a291bc37e3ab21ba03b6112dd3b6ea20bd2a7b8
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:d74f2392731f9bfe3e3fcd2fa4bb9529cf683336ccaf940189be2143efabe210
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8421052631578947,
   "eval_steps": 500,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -708,6 +708,132 @@
       "learning_rate": 1.3126041392116772e-05,
       "loss": 0.5162,
       "step": 500
     }
   ],
   "logging_steps": 5,
@@ -722,12 +848,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.184190182619474e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 594,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.3126041392116772e-05,
       "loss": 0.5162,
       "step": 500
+    },
+    {
+      "epoch": 0.8505263157894737,
+      "grad_norm": 0.11415216326713562,
+      "learning_rate": 1.1807873565164506e-05,
+      "loss": 0.5486,
+      "step": 505
+    },
+    {
+      "epoch": 0.8589473684210527,
+      "grad_norm": 0.12088897824287415,
+      "learning_rate": 1.0555289705749483e-05,
+      "loss": 0.5531,
+      "step": 510
+    },
+    {
+      "epoch": 0.8673684210526316,
+      "grad_norm": 0.1071295514702797,
+      "learning_rate": 9.369221296335006e-06,
+      "loss": 0.5187,
+      "step": 515
+    },
+    {
+      "epoch": 0.8757894736842106,
+      "grad_norm": 0.11686161160469055,
+      "learning_rate": 8.250550355250875e-06,
+      "loss": 0.5589,
+      "step": 520
+    },
+    {
+      "epoch": 0.8842105263157894,
+      "grad_norm": 0.11512535065412521,
+      "learning_rate": 7.200108780781556e-06,
+      "loss": 0.5505,
+      "step": 525
+    },
+    {
+      "epoch": 0.8926315789473684,
+      "grad_norm": 0.10691066831350327,
+      "learning_rate": 6.218677732526035e-06,
+      "loss": 0.538,
+      "step": 530
+    },
+    {
+      "epoch": 0.9010526315789473,
+      "grad_norm": 0.10703324526548386,
+      "learning_rate": 5.306987050489442e-06,
+      "loss": 0.5487,
+      "step": 535
+    },
+    {
+      "epoch": 0.9094736842105263,
+      "grad_norm": 0.11932916939258575,
+      "learning_rate": 4.465714712338398e-06,
+      "loss": 0.5522,
+      "step": 540
+    },
+    {
+      "epoch": 0.9178947368421052,
+      "grad_norm": 0.11946967244148254,
+      "learning_rate": 3.6954863292237297e-06,
+      "loss": 0.5326,
+      "step": 545
+    },
+    {
+      "epoch": 0.9263157894736842,
+      "grad_norm": 0.11607641726732254,
+      "learning_rate": 2.996874680545603e-06,
+      "loss": 0.5393,
+      "step": 550
+    },
+    {
+      "epoch": 0.9347368421052632,
+      "grad_norm": 0.11775229871273041,
+      "learning_rate": 2.3703992880066638e-06,
+      "loss": 0.5182,
+      "step": 555
+    },
+    {
+      "epoch": 0.9431578947368421,
+      "grad_norm": 0.1001369059085846,
+      "learning_rate": 1.8165260292704711e-06,
+      "loss": 0.5487,
+      "step": 560
+    },
+    {
+      "epoch": 0.9515789473684211,
+      "grad_norm": 0.12595337629318237,
+      "learning_rate": 1.3356667915121025e-06,
+      "loss": 0.5291,
+      "step": 565
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 0.09997394680976868,
+      "learning_rate": 9.281791651187366e-07,
+      "loss": 0.5309,
+      "step": 570
+    },
+    {
+      "epoch": 0.968421052631579,
+      "grad_norm": 0.12577302753925323,
+      "learning_rate": 5.943661777680354e-07,
+      "loss": 0.5805,
+      "step": 575
+    },
+    {
+      "epoch": 0.9768421052631578,
+      "grad_norm": 0.11574060469865799,
+      "learning_rate": 3.3447606908196817e-07,
+      "loss": 0.5498,
+      "step": 580
+    },
+    {
+      "epoch": 0.9852631578947368,
+      "grad_norm": 0.1203237846493721,
+      "learning_rate": 1.487021060236904e-07,
+      "loss": 0.5443,
+      "step": 585
+    },
+    {
+      "epoch": 0.9936842105263158,
+      "grad_norm": 0.11638718843460083,
+      "learning_rate": 3.7182439174832106e-08,
+      "loss": 0.5372,
+      "step": 590
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.781344564135076e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null