Training in progress, step 2250, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +178 -3
last-checkpoint/training_args.bin +1 -1

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:114b891a762a51a9adf99795ed8a1397abbd097711bfd2ff3927ad599e912fbe
 size 3809184360

 version https://git-lfs.github.com/spec/v1
+oid sha256:c62db0277afdc3f2ad6dbafa0dd57f53ea9debb5ee9712f0b1547cf8523f1070
 size 3809184360

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0467314eaf329ce64fa294c5b636d1d0a5db236ce1684099429a56bad1f1c530
 size 2458291491

 version https://git-lfs.github.com/spec/v1
+oid sha256:994a3b059f463b00db236586003b2652100023cbd4f39b1b1ac679076c611649
 size 2458291491

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0367ed1b35f65855ad993f74c56f185b353ad034ccb1dbb7df8ac313fc044216
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:376b730bb310b4f7540caf50ba2d9485c55172240b565241043b8847f1833fe8
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5daf118c104c253ac47840aed00a104c21470bc6d0bd2a07133bec544d92037c
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:444dae11008b250d18996da8350dc235efbc33e7070670e4ec0778a449b281a5
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8,
   "eval_steps": 500,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1408,6 +1408,181 @@
       "learning_rate": 1.0181451612903227e-05,
       "loss": 0.2809,
       "step": 2000
     }
   ],
   "logging_steps": 10,
@@ -1427,7 +1602,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.61046831887872e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9,
   "eval_steps": 500,
+  "global_step": 2250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.0181451612903227e-05,
       "loss": 0.2809,
       "step": 2000
+    },
+    {
+      "epoch": 0.804,
+      "grad_norm": 7.253849029541016,
+      "learning_rate": 9.97983870967742e-06,
+      "loss": 0.2885,
+      "step": 2010
+    },
+    {
+      "epoch": 0.808,
+      "grad_norm": 12.506342887878418,
+      "learning_rate": 9.778225806451613e-06,
+      "loss": 0.3105,
+      "step": 2020
+    },
+    {
+      "epoch": 0.812,
+      "grad_norm": 6.241955757141113,
+      "learning_rate": 9.576612903225806e-06,
+      "loss": 0.3399,
+      "step": 2030
+    },
+    {
+      "epoch": 0.816,
+      "grad_norm": 10.079781532287598,
+      "learning_rate": 9.375000000000001e-06,
+      "loss": 0.2337,
+      "step": 2040
+    },
+    {
+      "epoch": 0.82,
+      "grad_norm": 5.7255377769470215,
+      "learning_rate": 9.173387096774194e-06,
+      "loss": 0.3242,
+      "step": 2050
+    },
+    {
+      "epoch": 0.824,
+      "grad_norm": 8.949894905090332,
+      "learning_rate": 8.971774193548389e-06,
+      "loss": 0.3267,
+      "step": 2060
+    },
+    {
+      "epoch": 0.828,
+      "grad_norm": 9.275047302246094,
+      "learning_rate": 8.770161290322582e-06,
+      "loss": 0.3194,
+      "step": 2070
+    },
+    {
+      "epoch": 0.832,
+      "grad_norm": 7.940761089324951,
+      "learning_rate": 8.568548387096773e-06,
+      "loss": 0.3428,
+      "step": 2080
+    },
+    {
+      "epoch": 0.836,
+      "grad_norm": 9.835103988647461,
+      "learning_rate": 8.366935483870968e-06,
+      "loss": 0.5326,
+      "step": 2090
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 4.066821098327637,
+      "learning_rate": 8.165322580645161e-06,
+      "loss": 0.3065,
+      "step": 2100
+    },
+    {
+      "epoch": 0.844,
+      "grad_norm": 12.258496284484863,
+      "learning_rate": 7.963709677419356e-06,
+      "loss": 0.3009,
+      "step": 2110
+    },
+    {
+      "epoch": 0.848,
+      "grad_norm": 5.6699700355529785,
+      "learning_rate": 7.762096774193549e-06,
+      "loss": 0.265,
+      "step": 2120
+    },
+    {
+      "epoch": 0.852,
+      "grad_norm": 12.048685073852539,
+      "learning_rate": 7.560483870967743e-06,
+      "loss": 0.3388,
+      "step": 2130
+    },
+    {
+      "epoch": 0.856,
+      "grad_norm": 4.888071060180664,
+      "learning_rate": 7.358870967741936e-06,
+      "loss": 0.3849,
+      "step": 2140
+    },
+    {
+      "epoch": 0.86,
+      "grad_norm": 9.446249961853027,
+      "learning_rate": 7.15725806451613e-06,
+      "loss": 0.2319,
+      "step": 2150
+    },
+    {
+      "epoch": 0.864,
+      "grad_norm": 40.13935852050781,
+      "learning_rate": 6.955645161290322e-06,
+      "loss": 0.3925,
+      "step": 2160
+    },
+    {
+      "epoch": 0.868,
+      "grad_norm": 9.776718139648438,
+      "learning_rate": 6.754032258064516e-06,
+      "loss": 0.2317,
+      "step": 2170
+    },
+    {
+      "epoch": 0.872,
+      "grad_norm": 11.500304222106934,
+      "learning_rate": 6.55241935483871e-06,
+      "loss": 0.3169,
+      "step": 2180
+    },
+    {
+      "epoch": 0.876,
+      "grad_norm": 13.514867782592773,
+      "learning_rate": 6.350806451612904e-06,
+      "loss": 0.3805,
+      "step": 2190
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 6.3404765129089355,
+      "learning_rate": 6.149193548387097e-06,
+      "loss": 0.2765,
+      "step": 2200
+    },
+    {
+      "epoch": 0.884,
+      "grad_norm": 13.954924583435059,
+      "learning_rate": 5.947580645161291e-06,
+      "loss": 0.2185,
+      "step": 2210
+    },
+    {
+      "epoch": 0.888,
+      "grad_norm": 17.032840728759766,
+      "learning_rate": 5.745967741935484e-06,
+      "loss": 0.4505,
+      "step": 2220
+    },
+    {
+      "epoch": 0.892,
+      "grad_norm": 6.66541862487793,
+      "learning_rate": 5.544354838709678e-06,
+      "loss": 0.3105,
+      "step": 2230
+    },
+    {
+      "epoch": 0.896,
+      "grad_norm": 18.18755531311035,
+      "learning_rate": 5.342741935483872e-06,
+      "loss": 0.3225,
+      "step": 2240
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 7.183579444885254,
+      "learning_rate": 5.141129032258065e-06,
+      "loss": 0.2152,
+      "step": 2250
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 4.06788487884288e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20bf013736b049630a8dcb1c77b612997cdb572d3055f50be6f6d59b1bf9eaaa
 size 6289

 version https://git-lfs.github.com/spec/v1
+oid sha256:ffb5eae08d182d38c5142a692457f7672389d2a610d34241a5f8c8d5e6b351af
 size 6289