Training in progress, step 135, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:777125d8bd12de5f7ed18971ab031a8c25535a9628c33bf91a5fc02cd48f84a0
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:f892944e1d6553c2988900130a3362ea080ce87049af159434348e43983a67f7
 size 45118424

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a85d722f100b15fa6de8db1b7863c44b71b3fce19bc20b18ae46f8b628ed0a26
 size 23159290

 version https://git-lfs.github.com/spec/v1
+oid sha256:f294a2b1a687df224adc5ac3e37eb23eab0bfe8458ff9f9b4712852a5997cf77
 size 23159290

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab76824ef5f4a03a5fc43923056d7e1a2adea903a5e98b8bb7f651e3d75cd0f7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbd0b0d00d8a6ce2af47f7a318c5367a4519b639c67ff4d1f9441e0f3c04db1f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e182d30fc85938f253f4b0ba7702798b872e5ce41399e7a5462adca4c40ff6e4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3002a39ac6502366eefa64e828fe85e0b7d2b42f2ce52a223a7439ad2a05fd9b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.736842105263158,
   "eval_steps": 8,
-  "global_step": 130,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1053,6 +1053,41 @@
       "learning_rate": 9.903113209758096e-06,
       "loss": 1.0679,
       "step": 130
     }
   ],
   "logging_steps": 1,
@@ -1072,7 +1107,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.315875884630016e+16,
   "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.8421052631578947,
   "eval_steps": 8,
+  "global_step": 135,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.903113209758096e-06,
       "loss": 1.0679,
       "step": 130
+    },
+    {
+      "epoch": 2.7578947368421054,
+      "grad_norm": 0.5930253267288208,
+      "learning_rate": 8.952245334118414e-06,
+      "loss": 0.8819,
+      "step": 131
+    },
+    {
+      "epoch": 2.7789473684210524,
+      "grad_norm": 0.6247056126594543,
+      "learning_rate": 8.047222744854943e-06,
+      "loss": 0.991,
+      "step": 132
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 0.5282688736915588,
+      "learning_rate": 7.1885011480961164e-06,
+      "loss": 0.9508,
+      "step": 133
+    },
+    {
+      "epoch": 2.8210526315789473,
+      "grad_norm": 0.4279923141002655,
+      "learning_rate": 6.37651293602628e-06,
+      "loss": 0.9463,
+      "step": 134
+    },
+    {
+      "epoch": 2.8421052631578947,
+      "grad_norm": 0.4681239426136017,
+      "learning_rate": 5.611666969163243e-06,
+      "loss": 1.1093,
+      "step": 135
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.366908129509376e+16,
   "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null