Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12de36f02475ba36424b6cbbc78a99fb5d247b1f59b0671ec136b90196dbc42e
 size 3826461296

 version https://git-lfs.github.com/spec/v1
+oid sha256:f80a24ef27cb8d65adc7ffae4be6c2732033a5e9141dfd66815315f593ae70b8
 size 3826461296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71a973f442004b75157ae01481531805c844e77a68190e59a5218c09d8d6df94
 size 2479123301

 version https://git-lfs.github.com/spec/v1
+oid sha256:8da859acf7ac6600d1453a6ad96ad20e3865a29e481bed11e5055dfb249272fa
 size 2479123301

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc1a0da602f8abf4bf342932694d528cc1f0baa4d5027de58ad34f4d9855d085
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f0a73f4374b5e05c786b790825aa9c223b53f3ee4551182bcb497e6cf37794f
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38dd85de4e747e5477e492c54af5b212cebc40d19045c2dfc5361392de0ed8a7
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:98e8d05c49d91a8f0b7f5e1dfd9876ebcb94dbdc34961d432fb69d2b41418c55
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.12,
   "eval_steps": 500,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -218,6 +218,76 @@
       "learning_rate": 4.4168336673346694e-05,
       "loss": 1.4183,
       "step": 300
     }
   ],
   "logging_steps": 10,
@@ -237,7 +307,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5404828786993152.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.16,
   "eval_steps": 500,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.4168336673346694e-05,
       "loss": 1.4183,
       "step": 300
+    },
+    {
+      "epoch": 0.124,
+      "grad_norm": 22.46800422668457,
+      "learning_rate": 4.3967935871743486e-05,
+      "loss": 1.1226,
+      "step": 310
+    },
+    {
+      "epoch": 0.128,
+      "grad_norm": 24.424856185913086,
+      "learning_rate": 4.3767535070140284e-05,
+      "loss": 1.3413,
+      "step": 320
+    },
+    {
+      "epoch": 0.132,
+      "grad_norm": 14.698283195495605,
+      "learning_rate": 4.3567134268537076e-05,
+      "loss": 1.2009,
+      "step": 330
+    },
+    {
+      "epoch": 0.136,
+      "grad_norm": 23.5820369720459,
+      "learning_rate": 4.336673346693387e-05,
+      "loss": 1.0799,
+      "step": 340
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 26.510631561279297,
+      "learning_rate": 4.316633266533066e-05,
+      "loss": 1.0801,
+      "step": 350
+    },
+    {
+      "epoch": 0.144,
+      "grad_norm": 18.498275756835938,
+      "learning_rate": 4.296593186372745e-05,
+      "loss": 1.1631,
+      "step": 360
+    },
+    {
+      "epoch": 0.148,
+      "grad_norm": 35.2937126159668,
+      "learning_rate": 4.2765531062124256e-05,
+      "loss": 1.3777,
+      "step": 370
+    },
+    {
+      "epoch": 0.152,
+      "grad_norm": 40.05356216430664,
+      "learning_rate": 4.256513026052105e-05,
+      "loss": 1.1189,
+      "step": 380
+    },
+    {
+      "epoch": 0.156,
+      "grad_norm": 18.918344497680664,
+      "learning_rate": 4.236472945891784e-05,
+      "loss": 1.0721,
+      "step": 390
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 20.29583168029785,
+      "learning_rate": 4.216432865731463e-05,
+      "loss": 0.9337,
+      "step": 400
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 7213993175126016.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null