Training in progress, step 2500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +83 -5

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ef30dc9a100b240120af62823ca4707e4a35c361f060e8d6c15efa77b5e60f1
 size 891558696

 version https://git-lfs.github.com/spec/v1
+oid sha256:1a37da1bec04a539e9083a0690c020dd6b5a85ca6ba96130597ced0a592b992f
 size 891558696

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12c3c123305a27e77f9affa5e1e0fa48210446cf570c456de0079a60956cc284
 size 1783272762

 version https://git-lfs.github.com/spec/v1
+oid sha256:b600d17da29b382922c79850abe38cc3a0b9b7af51d7d358a1ba2bfc872d80d7
 size 1783272762

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29621a933fe39840d93ef11565a36bb6ba3b5a377ed5e55e63eb1777b2373cd7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d49a79be8c359f422cb59c77ee0154f081dfd7e588f93c61b503afdc15d8e88d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b929b010a2bf1a7268bbc9d5744f2ae71afa768419f9ef267d54626a2e8ef40d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d33bcf6e84bd960ce66a36a7bd45e4c58615ca69233e24115c69f6a6b57693ba
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.08910445868968964,
-  "best_model_checkpoint": "./fine-tuned/checkpoint-2000",
-  "epoch": 0.16,
   "eval_steps": 500,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -319,6 +319,84 @@
       "eval_samples_per_second": 22.763,
       "eval_steps_per_second": 5.691,
       "step": 2000
     }
   ],
   "logging_steps": 50,
@@ -338,7 +416,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4871663124480000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.08841572701931,
+  "best_model_checkpoint": "./fine-tuned/checkpoint-2500",
+  "epoch": 0.2,
   "eval_steps": 500,
+  "global_step": 2500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.763,
       "eval_steps_per_second": 5.691,
       "step": 2000
+    },
+    {
+      "epoch": 0.164,
+      "grad_norm": 0.2411368191242218,
+      "learning_rate": 2.75412e-05,
+      "loss": 0.0786,
+      "step": 2050
+    },
+    {
+      "epoch": 0.168,
+      "grad_norm": 0.16663742065429688,
+      "learning_rate": 2.74812e-05,
+      "loss": 0.0724,
+      "step": 2100
+    },
+    {
+      "epoch": 0.172,
+      "grad_norm": 0.23420193791389465,
+      "learning_rate": 2.74212e-05,
+      "loss": 0.0653,
+      "step": 2150
+    },
+    {
+      "epoch": 0.176,
+      "grad_norm": 0.1807372272014618,
+      "learning_rate": 2.7361199999999998e-05,
+      "loss": 0.0676,
+      "step": 2200
+    },
+    {
+      "epoch": 0.18,
+      "grad_norm": 0.16474364697933197,
+      "learning_rate": 2.73012e-05,
+      "loss": 0.0767,
+      "step": 2250
+    },
+    {
+      "epoch": 0.184,
+      "grad_norm": 0.17184095084667206,
+      "learning_rate": 2.72412e-05,
+      "loss": 0.0658,
+      "step": 2300
+    },
+    {
+      "epoch": 0.188,
+      "grad_norm": 0.16993258893489838,
+      "learning_rate": 2.71812e-05,
+      "loss": 0.0755,
+      "step": 2350
+    },
+    {
+      "epoch": 0.192,
+      "grad_norm": 0.1555277407169342,
+      "learning_rate": 2.71212e-05,
+      "loss": 0.0698,
+      "step": 2400
+    },
+    {
+      "epoch": 0.196,
+      "grad_norm": 0.09040562808513641,
+      "learning_rate": 2.7061199999999998e-05,
+      "loss": 0.0757,
+      "step": 2450
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 0.12910398840904236,
+      "learning_rate": 2.7001199999999998e-05,
+      "loss": 0.0688,
+      "step": 2500
+    },
+    {
+      "epoch": 0.2,
+      "eval_loss": 0.08841572701931,
+      "eval_runtime": 87.7555,
+      "eval_samples_per_second": 22.791,
+      "eval_steps_per_second": 5.698,
+      "step": 2500
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 6089578905600000.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null