Training in progress, step 160, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.bin +1 -1
last-checkpoint/pytorch_model_fsdp.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +151 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:918768fd81ec1f6d0b6a14800750fff3bd0659bef1c4646e80aa92d97afde2e2
 size 1824599104

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b940ff3c10efda3484d8154d06829b625f4661e41ccae9b4f19346cb07b313a
 size 1824599104

last-checkpoint/optimizer.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51c893d47ba51df78c0e8e0a151e2c27326009267c048825c44f824560f32fbb
 size 3649546931

 version https://git-lfs.github.com/spec/v1
+oid sha256:daa1c395c04b88c3560659b77a7658fbb3720dd025b9258a7d325ce415d84fe3
 size 3649546931

last-checkpoint/pytorch_model_fsdp.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f364fd837ca5ff32dc0301544a5dbae56e1fc6f2c04c5d52ab320e7be8eb302
 size 1824732017

 version https://git-lfs.github.com/spec/v1
+oid sha256:99127e94c2effa149a5652f46296157ec1072a023025a99704d613e9107afb0b
 size 1824732017

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1fe494caee29827121ede25c140b98d732abcef295793011a9da3e63f9f787f1
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:328fe9c5ce5de7cfc64522510573505020717e04aec0eb1a953b29b716e65835
 size 14917

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d9f54950f1d5722d2dfd26fbe8ca22a21f30a3202910f114a3f83d950b2f3659
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3e386176596a4a6fe816025b7392c8664cea5fdfad2019e05665976ee0b3112
 size 14917

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b6455e29f08e0afb47c24dca5da42d6d008549a09137c0332f3f89df0df981b
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d034473a6f1844782ea448b09a1d7345e766bf381ca00de81b2eda370185c5a
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7115628970775095,
   "eval_steps": 20,
-  "global_step": 140,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1052,6 +1052,154 @@
       "eval_samples_per_second": 0.261,
       "eval_steps_per_second": 0.138,
       "step": 140
     }
   ],
   "logging_steps": 1,
@@ -1071,7 +1219,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.3758826801004544e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8132147395171537,
   "eval_steps": 20,
+  "global_step": 160,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 0.261,
       "eval_steps_per_second": 0.138,
       "step": 140
+    },
+    {
+      "epoch": 0.7166454891994918,
+      "grad_norm": 0.2736265957355499,
+      "learning_rate": 8.017492711370262e-06,
+      "loss": 2.4641,
+      "step": 141
+    },
+    {
+      "epoch": 0.7217280813214739,
+      "grad_norm": 0.3174920678138733,
+      "learning_rate": 7.976366322008863e-06,
+      "loss": 2.4185,
+      "step": 142
+    },
+    {
+      "epoch": 0.7268106734434562,
+      "grad_norm": 0.37216174602508545,
+      "learning_rate": 7.934131736526946e-06,
+      "loss": 2.3133,
+      "step": 143
+    },
+    {
+      "epoch": 0.7318932655654383,
+      "grad_norm": 0.36393406987190247,
+      "learning_rate": 7.890743550834599e-06,
+      "loss": 2.5207,
+      "step": 144
+    },
+    {
+      "epoch": 0.7369758576874206,
+      "grad_norm": 0.5867409706115723,
+      "learning_rate": 7.846153846153847e-06,
+      "loss": 2.328,
+      "step": 145
+    },
+    {
+      "epoch": 0.7420584498094028,
+      "grad_norm": 0.4749656915664673,
+      "learning_rate": 7.8003120124805e-06,
+      "loss": 2.4006,
+      "step": 146
+    },
+    {
+      "epoch": 0.747141041931385,
+      "grad_norm": 0.3379077613353729,
+      "learning_rate": 7.753164556962026e-06,
+      "loss": 2.3301,
+      "step": 147
+    },
+    {
+      "epoch": 0.7522236340533672,
+      "grad_norm": 0.30944380164146423,
+      "learning_rate": 7.704654895666132e-06,
+      "loss": 2.3985,
+      "step": 148
+    },
+    {
+      "epoch": 0.7573062261753494,
+      "grad_norm": 0.3790503442287445,
+      "learning_rate": 7.65472312703583e-06,
+      "loss": 2.3055,
+      "step": 149
+    },
+    {
+      "epoch": 0.7623888182973316,
+      "grad_norm": 0.34668728709220886,
+      "learning_rate": 7.603305785123969e-06,
+      "loss": 2.3896,
+      "step": 150
+    },
+    {
+      "epoch": 0.7674714104193139,
+      "grad_norm": 0.3420522212982178,
+      "learning_rate": 7.550335570469799e-06,
+      "loss": 2.4232,
+      "step": 151
+    },
+    {
+      "epoch": 0.772554002541296,
+      "grad_norm": 0.3033508062362671,
+      "learning_rate": 7.4957410562180576e-06,
+      "loss": 2.2478,
+      "step": 152
+    },
+    {
+      "epoch": 0.7776365946632783,
+      "grad_norm": 0.2843773663043976,
+      "learning_rate": 7.439446366782007e-06,
+      "loss": 2.3415,
+      "step": 153
+    },
+    {
+      "epoch": 0.7827191867852605,
+      "grad_norm": 0.3108879327774048,
+      "learning_rate": 7.3813708260105456e-06,
+      "loss": 2.3628,
+      "step": 154
+    },
+    {
+      "epoch": 0.7878017789072427,
+      "grad_norm": 0.3458247184753418,
+      "learning_rate": 7.321428571428573e-06,
+      "loss": 2.3096,
+      "step": 155
+    },
+    {
+      "epoch": 0.7928843710292249,
+      "grad_norm": 0.32452043890953064,
+      "learning_rate": 7.259528130671507e-06,
+      "loss": 2.2148,
+      "step": 156
+    },
+    {
+      "epoch": 0.7979669631512071,
+      "grad_norm": 0.3931266665458679,
+      "learning_rate": 7.195571955719557e-06,
+      "loss": 2.3311,
+      "step": 157
+    },
+    {
+      "epoch": 0.8030495552731893,
+      "grad_norm": 0.341151624917984,
+      "learning_rate": 7.129455909943714e-06,
+      "loss": 2.4496,
+      "step": 158
+    },
+    {
+      "epoch": 0.8081321473951716,
+      "grad_norm": 0.35213160514831543,
+      "learning_rate": 7.061068702290077e-06,
+      "loss": 2.3066,
+      "step": 159
+    },
+    {
+      "epoch": 0.8132147395171537,
+      "grad_norm": 0.35280901193618774,
+      "learning_rate": 6.990291262135923e-06,
+      "loss": 2.365,
+      "step": 160
+    },
+    {
+      "epoch": 0.8132147395171537,
+      "eval_loss": 2.241875410079956,
+      "eval_runtime": 64.1026,
+      "eval_samples_per_second": 0.265,
+      "eval_steps_per_second": 0.14,
+      "step": 160
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.5724373486862336e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null