checkpoint-4455

Files changed (6) hide show

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:421b1c3566897b50359cc94a1212bb9f4a46e09806ff05c974a4d25863395330
 size 2225188480

 version https://git-lfs.github.com/spec/v1
+oid sha256:cfe87e2f6d6074d8d86b668507a39edce3e7e72b9348c1748c61c17b5edcdb2c
 size 2225188480

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:010fd42cc33bfa044c7c52d0b6996c0334b0b753ff77599a9987f1177a2ca3ec
 size 4450498267

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ffeb7ace51550edaa0d04986ab4cb9c1fcaed2503bc0e748aaf54df166ad8e6
 size 4450498267

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d12912a41eb6d74668aa9acfb4ff7cd99f55b1aa02423c599a95b3ebc3006011
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:6538b00e91d4c4de4a0c17d2634d57eabc754fc7df85477b8c6ecad7f0cf41a3
 size 14645

scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0033c7745b46bdca3ecab5787678834ca68f7f7e1288869dceeb38812abc253
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4bcb7279143a7253dbdae3618e4ac776d6e4a1395b9ed9fcb9bc00d72d1520b
 size 1383

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7cf6a257871b46c5a112aa215608ac0c00c2d1cd54b28ecf333a6131a03c71f6
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a24e34950d39b6dd56b12cce4107aa557148473c4972111b61f5d0bfe94716d
 size 1465

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.6936026936026938,
   "eval_steps": 500,
-  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -352,6 +352,34 @@
       "eval_samples_per_second": 13.431,
       "eval_steps_per_second": 3.358,
       "step": 4000
     }
   ],
   "logging_steps": 100,
@@ -366,12 +394,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.541159116578816e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.0,
   "eval_steps": 500,
+  "global_step": 4455,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 13.431,
       "eval_steps_per_second": 3.358,
       "step": 4000
+    },
+    {
+      "epoch": 2.760942760942761,
+      "grad_norm": 21.673240661621094,
+      "learning_rate": 2.5099882491186838e-05,
+      "loss": 50.46708984375,
+      "step": 4100
+    },
+    {
+      "epoch": 2.8282828282828283,
+      "grad_norm": 21.410390853881836,
+      "learning_rate": 1.8049353701527613e-05,
+      "loss": 50.4184521484375,
+      "step": 4200
+    },
+    {
+      "epoch": 2.8956228956228958,
+      "grad_norm": 21.75411033630371,
+      "learning_rate": 1.0998824911868389e-05,
+      "loss": 50.68787109375,
+      "step": 4300
+    },
+    {
+      "epoch": 2.962962962962963,
+      "grad_norm": 21.319902420043945,
+      "learning_rate": 3.948296122209165e-06,
+      "loss": 50.4838671875,
+      "step": 4400
     }
   ],
   "logging_steps": 100,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.714151970521088e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null