Training in progress, step 500, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +93 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db923703ef289ea2b7f0ed89f414100bbacd7933a8175b3546192c70f2a0dbdc
 size 35668592

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc89b84ff994a416b1fd8a35db8384b6b9a40381f903264827d17ecdff5f45be
 size 35668592

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:364bd8ce2b599a8e19aee7f20074b51e7dc5e2b7715f29a114265f76ea1f9d15
 size 18257163

 version https://git-lfs.github.com/spec/v1
+oid sha256:d6ffe29712d5eeee8f43f601ab13dd23dc7df7e12fb41454b57276f9b97c3680
 size 18257163

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e5049d02e7af5cc4d06887e6c9cc2ef9b2dd211b1bb6f0170770aee213d071d
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a17d91ff6dcca4633791a0e119c48601550130760f9eabb15146d59647aafb1
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b25f828af166f81a33163f34ab272a85139e04116e81b2518c52956ce2a16287
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c680537e123ff05619f00235a6bb4e1115b680be2ad94388dedf9dffc0968a0
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b81a6838d8230cd65883dd5365346eaa99ff0ef44d4f6c714bc03cc2ae120638
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:1024392986cac23a001e3e4a426a85b67203cda9404b22609e539557db80bbac
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.12034599473486272,
   "eval_steps": 500,
-  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -368,6 +368,96 @@
       "mean_token_accuracy": 0.625,
       "num_tokens": 1385961.0,
       "step": 400
     }
   ],
   "logging_steps": 10,
@@ -387,7 +477,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.103021297234944e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.15043249341857842,
   "eval_steps": 500,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "mean_token_accuracy": 0.625,
       "num_tokens": 1385961.0,
       "step": 400
+    },
+    {
+      "epoch": 0.1233546446032343,
+      "grad_norm": 0.06864658743143082,
+      "learning_rate": 2.0290581162324652e-05,
+      "loss": 0.0724,
+      "mean_token_accuracy": 0.55,
+      "num_tokens": 1421542.0,
+      "step": 410
+    },
+    {
+      "epoch": 0.12636329447160588,
+      "grad_norm": 0.0036715222522616386,
+      "learning_rate": 2.079158316633267e-05,
+      "loss": 0.007,
+      "mean_token_accuracy": 0.55,
+      "num_tokens": 1457078.0,
+      "step": 420
+    },
+    {
+      "epoch": 0.12937194433997742,
+      "grad_norm": 0.00313239055685699,
+      "learning_rate": 2.1292585170340683e-05,
+      "loss": 0.001,
+      "mean_token_accuracy": 0.6375,
+      "num_tokens": 1490852.0,
+      "step": 430
+    },
+    {
+      "epoch": 0.132380594208349,
+      "grad_norm": 0.0015605625230818987,
+      "learning_rate": 2.1793587174348697e-05,
+      "loss": 0.2283,
+      "mean_token_accuracy": 0.5125,
+      "num_tokens": 1525241.0,
+      "step": 440
+    },
+    {
+      "epoch": 0.13538924407672057,
+      "grad_norm": 0.0009020116995088756,
+      "learning_rate": 2.2294589178356714e-05,
+      "loss": 0.0017,
+      "mean_token_accuracy": 0.5875,
+      "num_tokens": 1561253.0,
+      "step": 450
+    },
+    {
+      "epoch": 0.13839789394509214,
+      "grad_norm": 0.0005520946579053998,
+      "learning_rate": 2.279559118236473e-05,
+      "loss": 0.0005,
+      "mean_token_accuracy": 0.6375,
+      "num_tokens": 1596990.0,
+      "step": 460
+    },
+    {
+      "epoch": 0.1414065438134637,
+      "grad_norm": 0.0006775453221052885,
+      "learning_rate": 2.3296593186372748e-05,
+      "loss": 0.0085,
+      "mean_token_accuracy": 0.5,
+      "num_tokens": 1633419.0,
+      "step": 470
+    },
+    {
+      "epoch": 0.14441519368183528,
+      "grad_norm": 0.0002734291192609817,
+      "learning_rate": 2.3797595190380762e-05,
+      "loss": 0.0014,
+      "mean_token_accuracy": 0.575,
+      "num_tokens": 1668732.0,
+      "step": 480
+    },
+    {
+      "epoch": 0.14742384355020685,
+      "grad_norm": 0.0007396186119876802,
+      "learning_rate": 2.429859719438878e-05,
+      "loss": 0.001,
+      "mean_token_accuracy": 0.6375,
+      "num_tokens": 1703568.0,
+      "step": 490
+    },
+    {
+      "epoch": 0.15043249341857842,
+      "grad_norm": 0.0009952335385605693,
+      "learning_rate": 2.4799599198396793e-05,
+      "loss": 0.0017,
+      "mean_token_accuracy": 0.7375,
+      "num_tokens": 1736202.0,
+      "step": 500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.887174157355008e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null