Training in progress, step 8000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +283 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9310a4b888df283774971e4e671540bfed2da01aea080fa39eda067305eeba86
 size 1520630616

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa2fa49e5ab01e8388f884f001a6fef59415f0afcdf8851cf32b99cba1b66f98
 size 1520630616

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1f256b63f8887aa92c9795198c14b259ff29bd76f4e601214dd8ad4add4ccd6
 size 3041448587

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7bd1f1004e066807e00b62878ad4b49df433de186c64f0a97f9237a03eb281b
 size 3041448587

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2669ee2d37691d1bc42e7a0090a126e105acbd5de1cf305e31cb6b68e55636b7
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2ea0240538fb238def027691182a688f4848085d98c59d8205c56a6ab84887c
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a30b126d1da8ae8870320a9f300ee7d428169650eb20c3a488c09fc00bef14d8
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:2054ec2901370b6a537467b9fa82f13f962dc91e80e60e56cd6658a9567a46a8
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0279453551912567,
   "eval_steps": 500,
-  "global_step": 6000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -848,6 +848,286 @@
       "learning_rate": 0.00029879389136398403,
       "loss": 2.1958,
       "step": 6000
     }
   ],
   "logging_steps": 50,
@@ -867,7 +1147,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.2086020985643336e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.0121748633879784,
   "eval_steps": 500,
+  "global_step": 8000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00029879389136398403,
       "loss": 2.1958,
       "step": 6000
+    },
+    {
+      "epoch": 1.0284918032786885,
+      "grad_norm": 0.52734375,
+      "learning_rate": 0.00029876019281020207,
+      "loss": 2.1853,
+      "step": 6050
+    },
+    {
+      "epoch": 1.0290382513661203,
+      "grad_norm": 0.52734375,
+      "learning_rate": 0.00029872603190630927,
+      "loss": 2.1753,
+      "step": 6100
+    },
+    {
+      "epoch": 1.029584699453552,
+      "grad_norm": 0.5546875,
+      "learning_rate": 0.00029869140875847847,
+      "loss": 2.1931,
+      "step": 6150
+    },
+    {
+      "epoch": 1.0301311475409836,
+      "grad_norm": 0.53125,
+      "learning_rate": 0.0002986563234743193,
+      "loss": 2.1846,
+      "step": 6200
+    },
+    {
+      "epoch": 1.0306775956284153,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.0002986207761628775,
+      "loss": 2.1928,
+      "step": 6250
+    },
+    {
+      "epoch": 1.031224043715847,
+      "grad_norm": 0.6171875,
+      "learning_rate": 0.00029858476693463506,
+      "loss": 2.1942,
+      "step": 6300
+    },
+    {
+      "epoch": 1.0317704918032786,
+      "grad_norm": 0.48828125,
+      "learning_rate": 0.0002985482959015094,
+      "loss": 2.1653,
+      "step": 6350
+    },
+    {
+      "epoch": 1.0323169398907104,
+      "grad_norm": 0.5234375,
+      "learning_rate": 0.00029851136317685345,
+      "loss": 2.1659,
+      "step": 6400
+    },
+    {
+      "epoch": 1.0328633879781421,
+      "grad_norm": 0.5234375,
+      "learning_rate": 0.00029847396887545485,
+      "loss": 2.1829,
+      "step": 6450
+    },
+    {
+      "epoch": 1.0334098360655737,
+      "grad_norm": 0.54296875,
+      "learning_rate": 0.00029843611311353597,
+      "loss": 2.1911,
+      "step": 6500
+    },
+    {
+      "epoch": 1.0339562841530054,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.00029839779600875343,
+      "loss": 2.1041,
+      "step": 6550
+    },
+    {
+      "epoch": 1.0345027322404372,
+      "grad_norm": 0.53125,
+      "learning_rate": 0.00029835901768019763,
+      "loss": 2.1634,
+      "step": 6600
+    },
+    {
+      "epoch": 1.0350491803278687,
+      "grad_norm": 0.515625,
+      "learning_rate": 0.0002983197782483926,
+      "loss": 2.1642,
+      "step": 6650
+    },
+    {
+      "epoch": 1.0355956284153005,
+      "grad_norm": 0.58203125,
+      "learning_rate": 0.00029828007783529533,
+      "loss": 2.1621,
+      "step": 6700
+    },
+    {
+      "epoch": 1.0361420765027323,
+      "grad_norm": 0.5078125,
+      "learning_rate": 0.0002982399165642956,
+      "loss": 2.1553,
+      "step": 6750
+    },
+    {
+      "epoch": 1.036688524590164,
+      "grad_norm": 0.51171875,
+      "learning_rate": 0.00029819929456021565,
+      "loss": 2.1592,
+      "step": 6800
+    },
+    {
+      "epoch": 1.0372349726775956,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.0002981582119493095,
+      "loss": 2.1527,
+      "step": 6850
+    },
+    {
+      "epoch": 2.000153005464481,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0002981166688592629,
+      "loss": 2.1674,
+      "step": 6900
+    },
+    {
+      "epoch": 2.0006994535519125,
+      "grad_norm": 0.5390625,
+      "learning_rate": 0.00029807466541919273,
+      "loss": 2.1168,
+      "step": 6950
+    },
+    {
+      "epoch": 2.0012459016393445,
+      "grad_norm": 0.53515625,
+      "learning_rate": 0.00029803220175964675,
+      "loss": 2.1439,
+      "step": 7000
+    },
+    {
+      "epoch": 2.001792349726776,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0002979892780126028,
+      "loss": 2.0952,
+      "step": 7050
+    },
+    {
+      "epoch": 2.0023387978142075,
+      "grad_norm": 0.490234375,
+      "learning_rate": 0.00029794589431146904,
+      "loss": 2.0817,
+      "step": 7100
+    },
+    {
+      "epoch": 2.0028852459016395,
+      "grad_norm": 0.462890625,
+      "learning_rate": 0.00029790205079108294,
+      "loss": 2.0643,
+      "step": 7150
+    },
+    {
+      "epoch": 2.003431693989071,
+      "grad_norm": 0.55078125,
+      "learning_rate": 0.00029785774758771114,
+      "loss": 2.0993,
+      "step": 7200
+    },
+    {
+      "epoch": 2.0039781420765026,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.00029781298483904907,
+      "loss": 2.1085,
+      "step": 7250
+    },
+    {
+      "epoch": 2.0045245901639346,
+      "grad_norm": 0.4921875,
+      "learning_rate": 0.0002977677626842204,
+      "loss": 2.0645,
+      "step": 7300
+    },
+    {
+      "epoch": 2.005071038251366,
+      "grad_norm": 0.50390625,
+      "learning_rate": 0.0002977220812637766,
+      "loss": 2.0929,
+      "step": 7350
+    },
+    {
+      "epoch": 2.0056174863387977,
+      "grad_norm": 0.53515625,
+      "learning_rate": 0.0002976759407196966,
+      "loss": 2.0919,
+      "step": 7400
+    },
+    {
+      "epoch": 2.0061639344262296,
+      "grad_norm": 0.55078125,
+      "learning_rate": 0.00029762934119538623,
+      "loss": 2.0903,
+      "step": 7450
+    },
+    {
+      "epoch": 2.006710382513661,
+      "grad_norm": 0.5625,
+      "learning_rate": 0.00029758228283567796,
+      "loss": 2.0481,
+      "step": 7500
+    },
+    {
+      "epoch": 2.0072568306010927,
+      "grad_norm": 0.5703125,
+      "learning_rate": 0.00029753476578683023,
+      "loss": 2.0737,
+      "step": 7550
+    },
+    {
+      "epoch": 2.0078032786885247,
+      "grad_norm": 0.49609375,
+      "learning_rate": 0.00029748679019652704,
+      "loss": 2.1032,
+      "step": 7600
+    },
+    {
+      "epoch": 2.0083497267759562,
+      "grad_norm": 0.5546875,
+      "learning_rate": 0.00029743835621387775,
+      "loss": 2.0722,
+      "step": 7650
+    },
+    {
+      "epoch": 2.008896174863388,
+      "grad_norm": 0.5234375,
+      "learning_rate": 0.00029738946398941623,
+      "loss": 2.057,
+      "step": 7700
+    },
+    {
+      "epoch": 2.0094426229508198,
+      "grad_norm": 0.490234375,
+      "learning_rate": 0.0002973401136751007,
+      "loss": 2.0802,
+      "step": 7750
+    },
+    {
+      "epoch": 2.0099890710382513,
+      "grad_norm": 0.5,
+      "learning_rate": 0.0002972903054243129,
+      "loss": 2.1094,
+      "step": 7800
+    },
+    {
+      "epoch": 2.010535519125683,
+      "grad_norm": 0.515625,
+      "learning_rate": 0.0002972400393918583,
+      "loss": 2.0409,
+      "step": 7850
+    },
+    {
+      "epoch": 2.011081967213115,
+      "grad_norm": 0.578125,
+      "learning_rate": 0.0002971893157339647,
+      "loss": 2.0543,
+      "step": 7900
+    },
+    {
+      "epoch": 2.0116284153005464,
+      "grad_norm": 0.5234375,
+      "learning_rate": 0.0002971381346082824,
+      "loss": 2.0776,
+      "step": 7950
+    },
+    {
+      "epoch": 2.0121748633879784,
+      "grad_norm": 0.55078125,
+      "learning_rate": 0.00029708649617388356,
+      "loss": 2.0629,
+      "step": 8000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 4.278236395534811e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null