Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6da720ded8a1790196006900a33f5feace3793698528740ce7024e10c4401eb5
 size 3826461296

 version https://git-lfs.github.com/spec/v1
+oid sha256:ccd1607970824f8435dff0ec490e2197f145ee38b543dfac0f375baae91f3e84
 size 3826461296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e57022989b1cc7d3b10c99234ae80d8c4ad9cac35357f91704bca82ec26be329
 size 2479122661

 version https://git-lfs.github.com/spec/v1
+oid sha256:05aa8a7bf492a3ad20e0d6edfff8c0717a4f0bbc1219a587cbf095503cf2d00e
 size 2479122661

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b73d4cd42a94a1103ceefa33eaa6ef4fb3cf4f32efbe8707ea89780e8b5d2e9
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:09b7290d8ca07041eee19d5fa227aba688d13ea17f7d6f3c0e4a7903d483d295
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6d58568e157b4e33c18eb009b274df7561c47b6a7f0984ca3989b2a6bc67549
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:19ed95effe7569c75627b601fa080ba53727e518015156dc63042342eab93ca8
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.04,
   "eval_steps": 500,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -78,6 +78,76 @@
       "learning_rate": 4.8176352705410824e-05,
       "loss": 1.3992,
       "step": 100
     }
   ],
   "logging_steps": 10,
@@ -97,7 +167,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1819402434478080.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.08,
   "eval_steps": 500,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.8176352705410824e-05,
       "loss": 1.3992,
       "step": 100
+    },
+    {
+      "epoch": 0.044,
+      "grad_norm": 42.20634841918945,
+      "learning_rate": 4.797595190380762e-05,
+      "loss": 1.6443,
+      "step": 110
+    },
+    {
+      "epoch": 0.048,
+      "grad_norm": 24.657821655273438,
+      "learning_rate": 4.7775551102204415e-05,
+      "loss": 1.8471,
+      "step": 120
+    },
+    {
+      "epoch": 0.052,
+      "grad_norm": 38.727420806884766,
+      "learning_rate": 4.7575150300601207e-05,
+      "loss": 1.5293,
+      "step": 130
+    },
+    {
+      "epoch": 0.056,
+      "grad_norm": 31.97869873046875,
+      "learning_rate": 4.7374749498998e-05,
+      "loss": 1.6212,
+      "step": 140
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 31.056962966918945,
+      "learning_rate": 4.717434869739479e-05,
+      "loss": 1.4407,
+      "step": 150
+    },
+    {
+      "epoch": 0.064,
+      "grad_norm": 29.63347053527832,
+      "learning_rate": 4.697394789579159e-05,
+      "loss": 1.1833,
+      "step": 160
+    },
+    {
+      "epoch": 0.068,
+      "grad_norm": 44.844268798828125,
+      "learning_rate": 4.677354709418838e-05,
+      "loss": 1.5756,
+      "step": 170
+    },
+    {
+      "epoch": 0.072,
+      "grad_norm": 31.4070987701416,
+      "learning_rate": 4.657314629258517e-05,
+      "loss": 1.4358,
+      "step": 180
+    },
+    {
+      "epoch": 0.076,
+      "grad_norm": 26.982776641845703,
+      "learning_rate": 4.6372745490981964e-05,
+      "loss": 1.234,
+      "step": 190
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 19.802730560302734,
+      "learning_rate": 4.617234468937876e-05,
+      "loss": 1.3504,
+      "step": 200
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3595273633732608.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null