Training in progress, step 2100, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +213 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b08b1672e2ea4211707e7ae1fc3be628d1c4cfcbac08051e5ed075820a85d750
 size 3237829088

 version https://git-lfs.github.com/spec/v1
+oid sha256:801632b13dd650035b8637c9af213bc74194a9ef5cf8b6b65c2a509a34782c30
 size 3237829088

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:74263b5f8e059e873949491dec9e7a943acdde886eacc4fbfc309d2296ab82b6
 size 2062272049

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2776246849e9530b854f7a1af3e71fc651a2697598de16a64d897fa8530e760
 size 2062272049

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c31bfa9c06956c0b54891b4da88a92b0061c8af3e34c97336d1d69755faea146
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6725ac8cfcdd5ed2e94a6dc5c8d88f80e593c5d3e8324e00ee31281fa51f86e
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1547aae10ac7691e1716f567b08e3b4d274fa923879a48af8c2bb55c815a28a2
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:f13dd54935d4d1876d05824ed5aab8e787b691f2aec583b5a7e328fd2bead633
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:64f93a5d98422b9aaabc9ecb62e3fb6f0d27288e6198f54c3576af914532e165
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:207e07bf53e1f3c020ec2dfd378c4461a481edafdba7a64484be4547457af2b3
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5165733964700818,
   "eval_steps": 300,
-  "global_step": 1800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1268,6 +1268,216 @@
       "learning_rate": 9.825271595683548e-05,
       "loss": 0.8072,
       "step": 1800
     }
   ],
   "logging_steps": 10,
@@ -1287,7 +1497,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.3653889794048e+19,
   "train_batch_size": 6,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.6026689625484287,
   "eval_steps": 300,
+  "global_step": 2100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.825271595683548e-05,
       "loss": 0.8072,
       "step": 1800
+    },
+    {
+      "epoch": 0.5194432486726933,
+      "grad_norm": 4.466314315795898,
+      "learning_rate": 9.73332732283226e-05,
+      "loss": 0.7936,
+      "step": 1810
+    },
+    {
+      "epoch": 0.5223131008753049,
+      "grad_norm": 6.21898078918457,
+      "learning_rate": 9.641405604806983e-05,
+      "loss": 0.8018,
+      "step": 1820
+    },
+    {
+      "epoch": 0.5251829530779165,
+      "grad_norm": 3.505802869796753,
+      "learning_rate": 9.549514216226311e-05,
+      "loss": 0.823,
+      "step": 1830
+    },
+    {
+      "epoch": 0.528052805280528,
+      "grad_norm": 4.254824161529541,
+      "learning_rate": 9.45766092914363e-05,
+      "loss": 0.824,
+      "step": 1840
+    },
+    {
+      "epoch": 0.5309226574831396,
+      "grad_norm": 10.659527778625488,
+      "learning_rate": 9.365853512389735e-05,
+      "loss": 0.8169,
+      "step": 1850
+    },
+    {
+      "epoch": 0.5337925096857512,
+      "grad_norm": 5.28292989730835,
+      "learning_rate": 9.274099730915778e-05,
+      "loss": 0.8076,
+      "step": 1860
+    },
+    {
+      "epoch": 0.5366623618883628,
+      "grad_norm": 5.907596588134766,
+      "learning_rate": 9.182407345136506e-05,
+      "loss": 0.7863,
+      "step": 1870
+    },
+    {
+      "epoch": 0.5395322140909743,
+      "grad_norm": 4.142882347106934,
+      "learning_rate": 9.090784110273896e-05,
+      "loss": 0.8133,
+      "step": 1880
+    },
+    {
+      "epoch": 0.5424020662935859,
+      "grad_norm": 4.616401195526123,
+      "learning_rate": 8.99923777570124e-05,
+      "loss": 0.7853,
+      "step": 1890
+    },
+    {
+      "epoch": 0.5452719184961975,
+      "grad_norm": 7.957604885101318,
+      "learning_rate": 8.907776084287693e-05,
+      "loss": 0.8275,
+      "step": 1900
+    },
+    {
+      "epoch": 0.548141770698809,
+      "grad_norm": 3.326878070831299,
+      "learning_rate": 8.816406771743412e-05,
+      "loss": 0.7724,
+      "step": 1910
+    },
+    {
+      "epoch": 0.5510116229014206,
+      "grad_norm": 4.447857856750488,
+      "learning_rate": 8.725137565965262e-05,
+      "loss": 0.8049,
+      "step": 1920
+    },
+    {
+      "epoch": 0.5538814751040322,
+      "grad_norm": 5.452672004699707,
+      "learning_rate": 8.633976186383217e-05,
+      "loss": 0.8034,
+      "step": 1930
+    },
+    {
+      "epoch": 0.5567513273066437,
+      "grad_norm": 5.054596900939941,
+      "learning_rate": 8.542930343307444e-05,
+      "loss": 0.7745,
+      "step": 1940
+    },
+    {
+      "epoch": 0.5596211795092553,
+      "grad_norm": 25.82883071899414,
+      "learning_rate": 8.452007737276191e-05,
+      "loss": 0.7756,
+      "step": 1950
+    },
+    {
+      "epoch": 0.5624910317118669,
+      "grad_norm": 4.046459197998047,
+      "learning_rate": 8.361216058404468e-05,
+      "loss": 0.7597,
+      "step": 1960
+    },
+    {
+      "epoch": 0.5653608839144784,
+      "grad_norm": 18.29205894470215,
+      "learning_rate": 8.270562985733652e-05,
+      "loss": 0.7863,
+      "step": 1970
+    },
+    {
+      "epoch": 0.56823073611709,
+      "grad_norm": 7.219738006591797,
+      "learning_rate": 8.180056186581976e-05,
+      "loss": 0.7651,
+      "step": 1980
+    },
+    {
+      "epoch": 0.5711005883197016,
+      "grad_norm": 4.146981716156006,
+      "learning_rate": 8.089703315896058e-05,
+      "loss": 0.7578,
+      "step": 1990
+    },
+    {
+      "epoch": 0.573970440522313,
+      "grad_norm": 4.7924675941467285,
+      "learning_rate": 7.999512015603438e-05,
+      "loss": 0.7974,
+      "step": 2000
+    },
+    {
+      "epoch": 0.5768402927249247,
+      "grad_norm": 5.102847576141357,
+      "learning_rate": 7.909489913966261e-05,
+      "loss": 0.805,
+      "step": 2010
+    },
+    {
+      "epoch": 0.5797101449275363,
+      "grad_norm": 5.353450298309326,
+      "learning_rate": 7.819644624936051e-05,
+      "loss": 0.7895,
+      "step": 2020
+    },
+    {
+      "epoch": 0.5825799971301477,
+      "grad_norm": 5.74714469909668,
+      "learning_rate": 7.72998374750977e-05,
+      "loss": 0.8029,
+      "step": 2030
+    },
+    {
+      "epoch": 0.5854498493327593,
+      "grad_norm": 4.67111873626709,
+      "learning_rate": 7.640514865087077e-05,
+      "loss": 0.7763,
+      "step": 2040
+    },
+    {
+      "epoch": 0.5883197015353709,
+      "grad_norm": 4.226963996887207,
+      "learning_rate": 7.551245544828944e-05,
+      "loss": 0.7935,
+      "step": 2050
+    },
+    {
+      "epoch": 0.5911895537379825,
+      "grad_norm": 6.067037105560303,
+      "learning_rate": 7.46218333701765e-05,
+      "loss": 0.7835,
+      "step": 2060
+    },
+    {
+      "epoch": 0.594059405940594,
+      "grad_norm": 6.7161736488342285,
+      "learning_rate": 7.373335774418158e-05,
+      "loss": 0.7793,
+      "step": 2070
+    },
+    {
+      "epoch": 0.5969292581432056,
+      "grad_norm": 4.633667945861816,
+      "learning_rate": 7.28471037164103e-05,
+      "loss": 0.793,
+      "step": 2080
+    },
+    {
+      "epoch": 0.5997991103458172,
+      "grad_norm": 5.508072376251221,
+      "learning_rate": 7.196314624506834e-05,
+      "loss": 0.7589,
+      "step": 2090
+    },
+    {
+      "epoch": 0.6026689625484287,
+      "grad_norm": 4.465757369995117,
+      "learning_rate": 7.108156009412176e-05,
+      "loss": 0.7569,
+      "step": 2100
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 8.5929538093056e+19,
   "train_batch_size": 6,
   "trial_name": null,
   "trial_params": null