Training in progress, step 600, checkpoint

Browse files

Files changed (12) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +221 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b25892eb513588bbcf11bce9a0a3e83e12594fe856f5f0d8f5b1d034d451ba78
 size 368988278

 version https://git-lfs.github.com/spec/v1
+oid sha256:65b28a24cf96f633c1e0849160faa6f1c242644bdfeb8cbace24b219b5fa97c2
 size 368988278

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84234f0e7fa45b116430e500da89d6e9f1fa17aeb1060cb136714fd526bf6880
 size 1107079290

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8943d3cde63c6018dd234f7968f8ed57a347eb3284892f8cf4de33d873d8af6
 size 1107079290

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:78d3f197f6c6558fa8056324f1563ab9e957255f5a1a959362aa4eed7a9545db
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:74386f26f36ed67f56395205881e5db2d0c28ffcbeed50dd95b28771d2dac588
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c1a9c65c2869356282cad6b4a0f7dff7f4dd68ab3d9d216c72b7d6cb524f860
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:41c88f9de084200454883a13c3717941ea3fd433e2f8735507fc30611f9c5501
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:896febe768e17bae5022a95960c041f6425783774ec8859d99d3b149063b1bf9
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:965b00d4cb4710ebab57c8787b9925bb3f77b8eeba94a186ec4bc1c2f326ef3f
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eac482d57e966585467c8ef44dae2869bf7e5d92886f69c11ed7bccc34c07efe
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5dc374b8b9a4c45c950f9d136feab85a767081fa59f0c7d68ed3a62060c4949
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1f27d227a20dc320ac283e0938fb2f6e5b475829a583f8c44d1a16a8c828307
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c7c212fb779217f1edac0baf44f67b608eefc1e0e4e3f5a9dd7eb557032c1bc
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d05a7106aaeaec4b81704e3f4a998b5123cf9342a6733bd9fd2d578e99108c3b
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:86e1effd626ce1e95dd68a0c8089fe19218f2b24dfe9e45ed2cab1c0ebc10ba1
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b94120d8d88502ec8d8b623ec7550315caca003b44fcffbb5767ab0de91baefe
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:799cc83f60dfc1c4243cfd6403592112414a2eec494e6832f10221c96ff62c20
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:332e4d901be380f740b5d8578f7b80ef1865c7fba83bc288c8a35852205cc668
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:586777c398770c3255d3a1f48c7fef44ea9d89117c627c9ea490e16bfd9a49ba
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51052504c8409fe520bbd0e2195cde8473df8898cb97356bd375976fa6c620f2
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8c36296a53c18048f4ca70fd61feb41492ed14663bc9aeac9ca5e6261898e1a
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.4904465106774292,
   "eval_steps": 300,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -225,6 +225,224 @@
       "eval_samples_per_second": 720.766,
       "eval_steps_per_second": 22.528,
       "step": 300
     }
   ],
   "logging_steps": 10,
@@ -244,7 +462,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.283349733238374e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9808930213548585,
   "eval_steps": 300,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 720.766,
       "eval_steps_per_second": 22.528,
       "step": 300
+    },
+    {
+      "epoch": 0.5067947277000102,
+      "grad_norm": 16.25,
+      "learning_rate": 9.990101823499548e-07,
+      "loss": 81.1266,
+      "step": 310
+    },
+    {
+      "epoch": 0.5231429447225912,
+      "grad_norm": 17.6875,
+      "learning_rate": 9.989782527483405e-07,
+      "loss": 81.1453,
+      "step": 320
+    },
+    {
+      "epoch": 0.5394911617451722,
+      "grad_norm": 19.375,
+      "learning_rate": 9.98946323146726e-07,
+      "loss": 81.2379,
+      "step": 330
+    },
+    {
+      "epoch": 0.5558393787677531,
+      "grad_norm": 18.0625,
+      "learning_rate": 9.989143935451117e-07,
+      "loss": 80.8672,
+      "step": 340
+    },
+    {
+      "epoch": 0.5721875957903341,
+      "grad_norm": 17.21875,
+      "learning_rate": 9.988824639434972e-07,
+      "loss": 81.0685,
+      "step": 350
+    },
+    {
+      "epoch": 0.5885358128129151,
+      "grad_norm": 17.0625,
+      "learning_rate": 9.988505343418828e-07,
+      "loss": 80.8454,
+      "step": 360
+    },
+    {
+      "epoch": 0.604884029835496,
+      "grad_norm": 20.671875,
+      "learning_rate": 9.988186047402685e-07,
+      "loss": 81.0763,
+      "step": 370
+    },
+    {
+      "epoch": 0.621232246858077,
+      "grad_norm": 18.359375,
+      "learning_rate": 9.987866751386541e-07,
+      "loss": 81.0897,
+      "step": 380
+    },
+    {
+      "epoch": 0.637580463880658,
+      "grad_norm": 16.8125,
+      "learning_rate": 9.987547455370398e-07,
+      "loss": 81.1589,
+      "step": 390
+    },
+    {
+      "epoch": 0.653928680903239,
+      "grad_norm": 16.171875,
+      "learning_rate": 9.987228159354254e-07,
+      "loss": 81.115,
+      "step": 400
+    },
+    {
+      "epoch": 0.67027689792582,
+      "grad_norm": 17.484375,
+      "learning_rate": 9.98690886333811e-07,
+      "loss": 80.6342,
+      "step": 410
+    },
+    {
+      "epoch": 0.686625114948401,
+      "grad_norm": 18.984375,
+      "learning_rate": 9.986589567321967e-07,
+      "loss": 80.8112,
+      "step": 420
+    },
+    {
+      "epoch": 0.7029733319709819,
+      "grad_norm": 20.5,
+      "learning_rate": 9.986270271305824e-07,
+      "loss": 80.7997,
+      "step": 430
+    },
+    {
+      "epoch": 0.7193215489935629,
+      "grad_norm": 18.515625,
+      "learning_rate": 9.98595097528968e-07,
+      "loss": 80.9999,
+      "step": 440
+    },
+    {
+      "epoch": 0.7356697660161439,
+      "grad_norm": 17.15625,
+      "learning_rate": 9.985631679273537e-07,
+      "loss": 81.0371,
+      "step": 450
+    },
+    {
+      "epoch": 0.7520179830387248,
+      "grad_norm": 16.28125,
+      "learning_rate": 9.985312383257393e-07,
+      "loss": 81.1055,
+      "step": 460
+    },
+    {
+      "epoch": 0.7683662000613058,
+      "grad_norm": 17.453125,
+      "learning_rate": 9.98499308724125e-07,
+      "loss": 80.7224,
+      "step": 470
+    },
+    {
+      "epoch": 0.7847144170838868,
+      "grad_norm": 17.015625,
+      "learning_rate": 9.984673791225106e-07,
+      "loss": 80.7431,
+      "step": 480
+    },
+    {
+      "epoch": 0.8010626341064677,
+      "grad_norm": 17.15625,
+      "learning_rate": 9.984354495208962e-07,
+      "loss": 80.8602,
+      "step": 490
+    },
+    {
+      "epoch": 0.8174108511290488,
+      "grad_norm": 17.46875,
+      "learning_rate": 9.984035199192819e-07,
+      "loss": 80.7878,
+      "step": 500
+    },
+    {
+      "epoch": 0.8337590681516297,
+      "grad_norm": 18.4375,
+      "learning_rate": 9.983715903176675e-07,
+      "loss": 80.8012,
+      "step": 510
+    },
+    {
+      "epoch": 0.8501072851742106,
+      "grad_norm": 18.609375,
+      "learning_rate": 9.983396607160532e-07,
+      "loss": 81.2591,
+      "step": 520
+    },
+    {
+      "epoch": 0.8664555021967917,
+      "grad_norm": 16.5625,
+      "learning_rate": 9.983077311144388e-07,
+      "loss": 80.7674,
+      "step": 530
+    },
+    {
+      "epoch": 0.8828037192193726,
+      "grad_norm": 17.171875,
+      "learning_rate": 9.982758015128245e-07,
+      "loss": 80.5839,
+      "step": 540
+    },
+    {
+      "epoch": 0.8991519362419537,
+      "grad_norm": 16.8125,
+      "learning_rate": 9.982438719112101e-07,
+      "loss": 80.9023,
+      "step": 550
+    },
+    {
+      "epoch": 0.9155001532645346,
+      "grad_norm": 16.375,
+      "learning_rate": 9.982119423095958e-07,
+      "loss": 80.4215,
+      "step": 560
+    },
+    {
+      "epoch": 0.9318483702871155,
+      "grad_norm": 16.484375,
+      "learning_rate": 9.981800127079814e-07,
+      "loss": 80.5366,
+      "step": 570
+    },
+    {
+      "epoch": 0.9481965873096966,
+      "grad_norm": 16.40625,
+      "learning_rate": 9.98148083106367e-07,
+      "loss": 80.6862,
+      "step": 580
+    },
+    {
+      "epoch": 0.9645448043322775,
+      "grad_norm": 17.28125,
+      "learning_rate": 9.981161535047527e-07,
+      "loss": 81.2897,
+      "step": 590
+    },
+    {
+      "epoch": 0.9808930213548585,
+      "grad_norm": 17.375,
+      "learning_rate": 9.980842239031384e-07,
+      "loss": 80.4117,
+      "step": 600
+    },
+    {
+      "epoch": 0.9808930213548585,
+      "eval_loss": 1.2649548053741455,
+      "eval_runtime": 44.4765,
+      "eval_samples_per_second": 722.224,
+      "eval_steps_per_second": 22.574,
+      "step": 600
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.656669946647675e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null