Training in progress, step 2400, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +212 -2

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15f22026b9de091fea7c6677cc4018fa6aa370338d59d131114563939cf90b17
 size 2558403928

 version https://git-lfs.github.com/spec/v1
+oid sha256:2239a0845b46b95928bcbeed801573f294fc2cb9b100e0b10c5cbdef012ea8a5
 size 2558403928

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d65b81af14a5a134ba9a873731d09629269da26fa8b87244dac0d9301cc842b1
 size 1313638993

 version https://git-lfs.github.com/spec/v1
+oid sha256:acb8d32926cd3273ed47be6ba17df67bab0f6dec6fb9885f960b784af33bbee9
 size 1313638993

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ac2c5f32acf6ee420ceadbd9ac0d52af52081eb0d8301506f11df8d08763b6e
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:8eb3097812b21e90074b0ecab86ca2780198581fab1852bdc56627bd62753aa6
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc99556bf27209385963813e3570510732839e6002d61d657266050e280a33eb
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:7da7c5085795b13d2bf0030671cbddb9f62ae43221bf1424a3830d4cf8c19012
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9206e9d78f9c7162a065f53b3bc585a6b187e23b8f10b603eff8a1391fd60fea
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba415cf49e6d172669840b43b2ceb814d3524d31ee7518919fd94765d7715e45
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.6800000000000002,
   "eval_steps": 500,
-  "global_step": 2100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1485,6 +1485,216 @@
       "learning_rate": 1.3506931508887333e-05,
       "loss": 4.0234,
       "step": 2100
     }
   ],
   "logging_steps": 10,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.92,
   "eval_steps": 500,
+  "global_step": 2400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.3506931508887333e-05,
       "loss": 4.0234,
       "step": 2100
+    },
+    {
+      "epoch": 1.688,
+      "grad_norm": 6.967901706695557,
+      "learning_rate": 1.2856314193601216e-05,
+      "loss": 4.2137,
+      "step": 2110
+    },
+    {
+      "epoch": 1.696,
+      "grad_norm": 7.635384559631348,
+      "learning_rate": 1.2220678396326678e-05,
+      "loss": 4.38,
+      "step": 2120
+    },
+    {
+      "epoch": 1.704,
+      "grad_norm": 9.351762771606445,
+      "learning_rate": 1.160013339393281e-05,
+      "loss": 4.4418,
+      "step": 2130
+    },
+    {
+      "epoch": 1.712,
+      "grad_norm": 7.01410436630249,
+      "learning_rate": 1.0994785868918101e-05,
+      "loss": 4.4396,
+      "step": 2140
+    },
+    {
+      "epoch": 1.72,
+      "grad_norm": 6.974796772003174,
+      "learning_rate": 1.040473989106988e-05,
+      "loss": 4.5075,
+      "step": 2150
+    },
+    {
+      "epoch": 1.728,
+      "grad_norm": 7.545105934143066,
+      "learning_rate": 9.830096899572927e-06,
+      "loss": 4.7132,
+      "step": 2160
+    },
+    {
+      "epoch": 1.736,
+      "grad_norm": 7.074202537536621,
+      "learning_rate": 9.270955685570226e-06,
+      "loss": 4.2393,
+      "step": 2170
+    },
+    {
+      "epoch": 1.744,
+      "grad_norm": 6.088277339935303,
+      "learning_rate": 8.727412375179156e-06,
+      "loss": 4.8092,
+      "step": 2180
+    },
+    {
+      "epoch": 1.752,
+      "grad_norm": 5.859469413757324,
+      "learning_rate": 8.199560412965634e-06,
+      "loss": 4.6403,
+      "step": 2190
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 6.770120143890381,
+      "learning_rate": 7.687490545879461e-06,
+      "loss": 4.2544,
+      "step": 2200
+    },
+    {
+      "epoch": 1.768,
+      "grad_norm": 8.092095375061035,
+      "learning_rate": 7.191290807653251e-06,
+      "loss": 4.454,
+      "step": 2210
+    },
+    {
+      "epoch": 1.776,
+      "grad_norm": 6.498937606811523,
+      "learning_rate": 6.711046503667983e-06,
+      "loss": 4.0243,
+      "step": 2220
+    },
+    {
+      "epoch": 1.784,
+      "grad_norm": 5.894200801849365,
+      "learning_rate": 6.24684019628744e-06,
+      "loss": 4.0666,
+      "step": 2230
+    },
+    {
+      "epoch": 1.792,
+      "grad_norm": 6.309505462646484,
+      "learning_rate": 5.79875169066435e-06,
+      "loss": 4.2441,
+      "step": 2240
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 6.20737886428833,
+      "learning_rate": 5.366858021020471e-06,
+      "loss": 4.1951,
+      "step": 2250
+    },
+    {
+      "epoch": 1.808,
+      "grad_norm": 8.314438819885254,
+      "learning_rate": 4.951233437403102e-06,
+      "loss": 4.4043,
+      "step": 2260
+    },
+    {
+      "epoch": 1.8159999999999998,
+      "grad_norm": 6.5354108810424805,
+      "learning_rate": 4.551949392920118e-06,
+      "loss": 4.1528,
+      "step": 2270
+    },
+    {
+      "epoch": 1.8239999999999998,
+      "grad_norm": 8.503190040588379,
+      "learning_rate": 4.169074531456063e-06,
+      "loss": 3.8358,
+      "step": 2280
+    },
+    {
+      "epoch": 1.8319999999999999,
+      "grad_norm": 7.091247081756592,
+      "learning_rate": 3.802674675870932e-06,
+      "loss": 4.3702,
+      "step": 2290
+    },
+    {
+      "epoch": 1.8399999999999999,
+      "grad_norm": 6.265818119049072,
+      "learning_rate": 3.4528128166842033e-06,
+      "loss": 4.3354,
+      "step": 2300
+    },
+    {
+      "epoch": 1.8479999999999999,
+      "grad_norm": 7.026565074920654,
+      "learning_rate": 3.119549101245567e-06,
+      "loss": 3.9832,
+      "step": 2310
+    },
+    {
+      "epoch": 1.8559999999999999,
+      "grad_norm": 6.407781600952148,
+      "learning_rate": 2.8029408233946177e-06,
+      "loss": 4.1522,
+      "step": 2320
+    },
+    {
+      "epoch": 1.8639999999999999,
+      "grad_norm": 13.10364055633545,
+      "learning_rate": 2.503042413611001e-06,
+      "loss": 4.354,
+      "step": 2330
+    },
+    {
+      "epoch": 1.8719999999999999,
+      "grad_norm": 12.458951950073242,
+      "learning_rate": 2.219905429656899e-06,
+      "loss": 4.5043,
+      "step": 2340
+    },
+    {
+      "epoch": 1.88,
+      "grad_norm": 9.545763969421387,
+      "learning_rate": 1.9535785477133195e-06,
+      "loss": 4.2234,
+      "step": 2350
+    },
+    {
+      "epoch": 1.888,
+      "grad_norm": 9.243309020996094,
+      "learning_rate": 1.7041075540118578e-06,
+      "loss": 4.2382,
+      "step": 2360
+    },
+    {
+      "epoch": 1.896,
+      "grad_norm": 6.87458610534668,
+      "learning_rate": 1.4715353369631924e-06,
+      "loss": 3.9852,
+      "step": 2370
+    },
+    {
+      "epoch": 1.904,
+      "grad_norm": 6.957976341247559,
+      "learning_rate": 1.2559018797838384e-06,
+      "loss": 3.9335,
+      "step": 2380
+    },
+    {
+      "epoch": 1.912,
+      "grad_norm": 5.689143657684326,
+      "learning_rate": 1.0572442536223692e-06,
+      "loss": 4.0,
+      "step": 2390
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 5.896793842315674,
+      "learning_rate": 8.755966111861913e-07,
+      "loss": 4.2807,
+      "step": 2400
     }
   ],
   "logging_steps": 10,