Training in progress, step 14000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +283 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f4e60280352c486adbf497b3e2d22d1d2fda6e133edf6aa2462b19ddeb1e8fe
 size 1520630616

 version https://git-lfs.github.com/spec/v1
+oid sha256:678e7213985883fa100ce33420c0abcc086b1e5d1ebbfe59b4fc2eb98de42dad
 size 1520630616

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c5caef45bff34542930b3d8ec1dc1da634abc684197bb4717e1fd4356a90f57
 size 3041448587

 version https://git-lfs.github.com/spec/v1
+oid sha256:a6cfd935475ed18ce02ed976833b400feff2e9b2f6898bb398d54a55c1abfb69
 size 3041448587

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d62a6477ae00126d4db2168c55367d80e8a6869ee2c0b32115e2f67ad7b45e3
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:c216abaf78c8f0c9ca973ee178c53d92ffd82db7d49dbcd691d89f2e73ac2041
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b778b133577e8a02dcdd3364fe347ed16d67e4165e95d771fc0e88a64c881d14
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc468a4d295314c2bd994a0ecebe28224d0db1b0559745a94a6c0cd1ea3e5107
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.0182622950819673,
   "eval_steps": 500,
-  "global_step": 12000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1688,6 +1688,286 @@
       "learning_rate": 0.0002914904243218154,
       "loss": 1.8142,
       "step": 12000
     }
   ],
   "logging_steps": 50,
@@ -1707,7 +1987,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.417354593302217e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.002491803278689,
   "eval_steps": 500,
+  "global_step": 14000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0002914904243218154,
       "loss": 1.8142,
       "step": 12000
+    },
+    {
+      "epoch": 3.018808743169399,
+      "grad_norm": 0.55078125,
+      "learning_rate": 0.00029140240170853857,
+      "loss": 1.8505,
+      "step": 12050
+    },
+    {
+      "epoch": 3.0193551912568304,
+      "grad_norm": 0.5546875,
+      "learning_rate": 0.0002913139396133353,
+      "loss": 1.8315,
+      "step": 12100
+    },
+    {
+      "epoch": 3.0199016393442624,
+      "grad_norm": 0.53515625,
+      "learning_rate": 0.0002912250383111479,
+      "loss": 1.8337,
+      "step": 12150
+    },
+    {
+      "epoch": 3.020448087431694,
+      "grad_norm": 0.5234375,
+      "learning_rate": 0.0002911356980782837,
+      "loss": 1.8647,
+      "step": 12200
+    },
+    {
+      "epoch": 3.020994535519126,
+      "grad_norm": 0.498046875,
+      "learning_rate": 0.0002910459191924141,
+      "loss": 1.8303,
+      "step": 12250
+    },
+    {
+      "epoch": 3.0215409836065574,
+      "grad_norm": 0.515625,
+      "learning_rate": 0.00029095570193257405,
+      "loss": 1.8347,
+      "step": 12300
+    },
+    {
+      "epoch": 3.022087431693989,
+      "grad_norm": 0.515625,
+      "learning_rate": 0.0002908650465791608,
+      "loss": 1.842,
+      "step": 12350
+    },
+    {
+      "epoch": 3.022633879781421,
+      "grad_norm": 0.48046875,
+      "learning_rate": 0.00029077395341393334,
+      "loss": 1.8282,
+      "step": 12400
+    },
+    {
+      "epoch": 3.0231803278688525,
+      "grad_norm": 0.546875,
+      "learning_rate": 0.00029068242272001135,
+      "loss": 1.7943,
+      "step": 12450
+    },
+    {
+      "epoch": 3.023726775956284,
+      "grad_norm": 0.5078125,
+      "learning_rate": 0.00029059045478187424,
+      "loss": 1.8147,
+      "step": 12500
+    },
+    {
+      "epoch": 3.024273224043716,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.00029049804988536053,
+      "loss": 1.8135,
+      "step": 12550
+    },
+    {
+      "epoch": 3.0248196721311476,
+      "grad_norm": 0.52734375,
+      "learning_rate": 0.00029040520831766676,
+      "loss": 1.8067,
+      "step": 12600
+    },
+    {
+      "epoch": 3.025366120218579,
+      "grad_norm": 0.515625,
+      "learning_rate": 0.00029031193036734666,
+      "loss": 1.8333,
+      "step": 12650
+    },
+    {
+      "epoch": 3.025912568306011,
+      "grad_norm": 0.5546875,
+      "learning_rate": 0.0002902182163243103,
+      "loss": 1.8624,
+      "step": 12700
+    },
+    {
+      "epoch": 3.0264590163934426,
+      "grad_norm": 0.57421875,
+      "learning_rate": 0.00029012406647982306,
+      "loss": 1.8277,
+      "step": 12750
+    },
+    {
+      "epoch": 3.027005464480874,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.0002900294811265048,
+      "loss": 1.8209,
+      "step": 12800
+    },
+    {
+      "epoch": 3.027551912568306,
+      "grad_norm": 0.5078125,
+      "learning_rate": 0.0002899344605583291,
+      "loss": 1.8295,
+      "step": 12850
+    },
+    {
+      "epoch": 3.0280983606557377,
+      "grad_norm": 0.4921875,
+      "learning_rate": 0.0002898390050706219,
+      "loss": 1.7926,
+      "step": 12900
+    },
+    {
+      "epoch": 3.028644808743169,
+      "grad_norm": 0.515625,
+      "learning_rate": 0.0002897431149600612,
+      "loss": 1.8064,
+      "step": 12950
+    },
+    {
+      "epoch": 3.029191256830601,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.0002896467905246755,
+      "loss": 1.7923,
+      "step": 13000
+    },
+    {
+      "epoch": 3.0297377049180327,
+      "grad_norm": 0.5625,
+      "learning_rate": 0.00028955003206384357,
+      "loss": 1.8346,
+      "step": 13050
+    },
+    {
+      "epoch": 3.0302841530054643,
+      "grad_norm": 0.5078125,
+      "learning_rate": 0.0002894528398782929,
+      "loss": 1.8187,
+      "step": 13100
+    },
+    {
+      "epoch": 3.0308306010928963,
+      "grad_norm": 0.55078125,
+      "learning_rate": 0.0002893552142700989,
+      "loss": 1.8035,
+      "step": 13150
+    },
+    {
+      "epoch": 3.031377049180328,
+      "grad_norm": 0.55078125,
+      "learning_rate": 0.0002892571555426843,
+      "loss": 1.8248,
+      "step": 13200
+    },
+    {
+      "epoch": 3.0319234972677593,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.00028915866400081795,
+      "loss": 1.8066,
+      "step": 13250
+    },
+    {
+      "epoch": 3.0324699453551913,
+      "grad_norm": 0.50390625,
+      "learning_rate": 0.00028905973995061373,
+      "loss": 1.8087,
+      "step": 13300
+    },
+    {
+      "epoch": 3.033016393442623,
+      "grad_norm": 0.53515625,
+      "learning_rate": 0.00028896038369953,
+      "loss": 1.8208,
+      "step": 13350
+    },
+    {
+      "epoch": 3.033562841530055,
+      "grad_norm": 0.53125,
+      "learning_rate": 0.00028886059555636816,
+      "loss": 1.8282,
+      "step": 13400
+    },
+    {
+      "epoch": 3.0341092896174864,
+      "grad_norm": 0.5546875,
+      "learning_rate": 0.00028876037583127213,
+      "loss": 1.7288,
+      "step": 13450
+    },
+    {
+      "epoch": 3.034655737704918,
+      "grad_norm": 0.58203125,
+      "learning_rate": 0.000288659724835727,
+      "loss": 1.841,
+      "step": 13500
+    },
+    {
+      "epoch": 3.03520218579235,
+      "grad_norm": 0.51171875,
+      "learning_rate": 0.00028855864288255856,
+      "loss": 1.8044,
+      "step": 13550
+    },
+    {
+      "epoch": 3.0357486338797814,
+      "grad_norm": 0.52734375,
+      "learning_rate": 0.00028845713028593183,
+      "loss": 1.8101,
+      "step": 13600
+    },
+    {
+      "epoch": 3.036295081967213,
+      "grad_norm": 0.53515625,
+      "learning_rate": 0.00028835518736135013,
+      "loss": 1.8193,
+      "step": 13650
+    },
+    {
+      "epoch": 3.036841530054645,
+      "grad_norm": 0.5625,
+      "learning_rate": 0.0002882528144256546,
+      "loss": 1.8219,
+      "step": 13700
+    },
+    {
+      "epoch": 3.0373879781420765,
+      "grad_norm": 0.53125,
+      "learning_rate": 0.00028815001179702265,
+      "loss": 1.8044,
+      "step": 13750
+    },
+    {
+      "epoch": 4.000306010928962,
+      "grad_norm": 0.546875,
+      "learning_rate": 0.0002880467797949671,
+      "loss": 1.8068,
+      "step": 13800
+    },
+    {
+      "epoch": 4.000852459016394,
+      "grad_norm": 0.57421875,
+      "learning_rate": 0.00028794311874033563,
+      "loss": 1.7919,
+      "step": 13850
+    },
+    {
+      "epoch": 4.001398907103825,
+      "grad_norm": 0.53515625,
+      "learning_rate": 0.00028783902895530893,
+      "loss": 1.7501,
+      "step": 13900
+    },
+    {
+      "epoch": 4.001945355191257,
+      "grad_norm": 0.6171875,
+      "learning_rate": 0.00028773451076340064,
+      "loss": 1.7494,
+      "step": 13950
+    },
+    {
+      "epoch": 4.002491803278689,
+      "grad_norm": 0.546875,
+      "learning_rate": 0.00028762956448945563,
+      "loss": 1.6976,
+      "step": 14000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 7.486988890272694e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null