Training in progress, step 16000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +283 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:678e7213985883fa100ce33420c0abcc086b1e5d1ebbfe59b4fc2eb98de42dad
 size 1520630616

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e3c2ca1453671908d126e303eba98dd0d57768bc3b1dcb8cf48dcbd5df11353
 size 1520630616

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6cfd935475ed18ce02ed976833b400feff2e9b2f6898bb398d54a55c1abfb69
 size 3041448587

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3d6326aeb70f12a4b9828676ff7fb0f81b4f603e04b14d7e8b6337709d69892
 size 3041448587

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c216abaf78c8f0c9ca973ee178c53d92ffd82db7d49dbcd691d89f2e73ac2041
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:839155d8d479a4428e25ab272c147641fcc513d85570b8d0b1dcd722136156e9
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fc468a4d295314c2bd994a0ecebe28224d0db1b0559745a94a6c0cd1ea3e5107
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:027f96c69ce599f1f33b2261db2960f4a6aaefef410e2d604c54d3aa094ca9a9
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.002491803278689,
   "eval_steps": 500,
-  "global_step": 14000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1968,6 +1968,286 @@
       "learning_rate": 0.00028762956448945563,
       "loss": 1.6976,
       "step": 14000
     }
   ],
   "logging_steps": 50,
@@ -1987,7 +2267,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7.486988890272694e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.024349726775957,
   "eval_steps": 500,
+  "global_step": 16000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00028762956448945563,
       "loss": 1.6976,
       "step": 14000
+    },
+    {
+      "epoch": 4.00303825136612,
+      "grad_norm": 0.53125,
+      "learning_rate": 0.00028752419045964935,
+      "loss": 1.7673,
+      "step": 14050
+    },
+    {
+      "epoch": 4.003584699453552,
+      "grad_norm": 0.6328125,
+      "learning_rate": 0.0002874183890014867,
+      "loss": 1.7385,
+      "step": 14100
+    },
+    {
+      "epoch": 4.004131147540984,
+      "grad_norm": 0.5703125,
+      "learning_rate": 0.0002873121604438011,
+      "loss": 1.7526,
+      "step": 14150
+    },
+    {
+      "epoch": 4.004677595628415,
+      "grad_norm": 0.53125,
+      "learning_rate": 0.0002872055051167533,
+      "loss": 1.7323,
+      "step": 14200
+    },
+    {
+      "epoch": 4.005224043715847,
+      "grad_norm": 0.578125,
+      "learning_rate": 0.0002870984233518306,
+      "loss": 1.7666,
+      "step": 14250
+    },
+    {
+      "epoch": 4.005770491803279,
+      "grad_norm": 0.5546875,
+      "learning_rate": 0.0002869909154818455,
+      "loss": 1.7382,
+      "step": 14300
+    },
+    {
+      "epoch": 4.00631693989071,
+      "grad_norm": 0.55859375,
+      "learning_rate": 0.00028688298184093497,
+      "loss": 1.7366,
+      "step": 14350
+    },
+    {
+      "epoch": 4.006863387978142,
+      "grad_norm": 0.5703125,
+      "learning_rate": 0.0002867746227645593,
+      "loss": 1.7206,
+      "step": 14400
+    },
+    {
+      "epoch": 4.007409836065574,
+      "grad_norm": 0.5703125,
+      "learning_rate": 0.000286665838589501,
+      "loss": 1.7655,
+      "step": 14450
+    },
+    {
+      "epoch": 4.007956284153005,
+      "grad_norm": 0.55859375,
+      "learning_rate": 0.0002865566296538637,
+      "loss": 1.7599,
+      "step": 14500
+    },
+    {
+      "epoch": 4.008502732240437,
+      "grad_norm": 0.62890625,
+      "learning_rate": 0.00028644699629707136,
+      "loss": 1.7298,
+      "step": 14550
+    },
+    {
+      "epoch": 4.009049180327869,
+      "grad_norm": 0.671875,
+      "learning_rate": 0.00028633693885986696,
+      "loss": 1.7392,
+      "step": 14600
+    },
+    {
+      "epoch": 4.0095956284153,
+      "grad_norm": 0.5703125,
+      "learning_rate": 0.0002862264576843116,
+      "loss": 1.7556,
+      "step": 14650
+    },
+    {
+      "epoch": 4.010142076502732,
+      "grad_norm": 0.57421875,
+      "learning_rate": 0.0002861155531137833,
+      "loss": 1.7677,
+      "step": 14700
+    },
+    {
+      "epoch": 4.010688524590164,
+      "grad_norm": 0.62890625,
+      "learning_rate": 0.00028600422549297604,
+      "loss": 1.7283,
+      "step": 14750
+    },
+    {
+      "epoch": 4.011234972677595,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.00028589247516789856,
+      "loss": 1.7389,
+      "step": 14800
+    },
+    {
+      "epoch": 4.011781420765027,
+      "grad_norm": 0.5703125,
+      "learning_rate": 0.0002857803024858735,
+      "loss": 1.7769,
+      "step": 14850
+    },
+    {
+      "epoch": 4.012327868852459,
+      "grad_norm": 0.5234375,
+      "learning_rate": 0.00028566770779553613,
+      "loss": 1.7306,
+      "step": 14900
+    },
+    {
+      "epoch": 4.01287431693989,
+      "grad_norm": 0.54296875,
+      "learning_rate": 0.00028555469144683337,
+      "loss": 1.7341,
+      "step": 14950
+    },
+    {
+      "epoch": 4.013420765027322,
+      "grad_norm": 0.53515625,
+      "learning_rate": 0.00028544125379102264,
+      "loss": 1.7364,
+      "step": 15000
+    },
+    {
+      "epoch": 4.013967213114754,
+      "grad_norm": 0.52734375,
+      "learning_rate": 0.0002853273951806708,
+      "loss": 1.7203,
+      "step": 15050
+    },
+    {
+      "epoch": 4.0145136612021854,
+      "grad_norm": 0.57421875,
+      "learning_rate": 0.00028521311596965297,
+      "loss": 1.7735,
+      "step": 15100
+    },
+    {
+      "epoch": 4.015060109289617,
+      "grad_norm": 0.5546875,
+      "learning_rate": 0.00028509841651315156,
+      "loss": 1.7457,
+      "step": 15150
+    },
+    {
+      "epoch": 4.015606557377049,
+      "grad_norm": 0.53125,
+      "learning_rate": 0.0002849832971676553,
+      "loss": 1.7317,
+      "step": 15200
+    },
+    {
+      "epoch": 4.0161530054644805,
+      "grad_norm": 0.5625,
+      "learning_rate": 0.0002848677582909576,
+      "loss": 1.7168,
+      "step": 15250
+    },
+    {
+      "epoch": 4.0166994535519125,
+      "grad_norm": 0.5625,
+      "learning_rate": 0.000284751800242156,
+      "loss": 1.714,
+      "step": 15300
+    },
+    {
+      "epoch": 4.0172459016393445,
+      "grad_norm": 0.53125,
+      "learning_rate": 0.0002846354233816508,
+      "loss": 1.7342,
+      "step": 15350
+    },
+    {
+      "epoch": 4.017792349726776,
+      "grad_norm": 0.58203125,
+      "learning_rate": 0.00028451862807114396,
+      "loss": 1.7201,
+      "step": 15400
+    },
+    {
+      "epoch": 4.0183387978142076,
+      "grad_norm": 0.55078125,
+      "learning_rate": 0.00028440141467363803,
+      "loss": 1.6881,
+      "step": 15450
+    },
+    {
+      "epoch": 4.0188852459016395,
+      "grad_norm": 0.54296875,
+      "learning_rate": 0.00028428378355343495,
+      "loss": 1.7184,
+      "step": 15500
+    },
+    {
+      "epoch": 4.019431693989071,
+      "grad_norm": 0.54296875,
+      "learning_rate": 0.00028416573507613485,
+      "loss": 1.7055,
+      "step": 15550
+    },
+    {
+      "epoch": 4.019978142076503,
+      "grad_norm": 0.58203125,
+      "learning_rate": 0.0002840472696086353,
+      "loss": 1.72,
+      "step": 15600
+    },
+    {
+      "epoch": 4.020524590163935,
+      "grad_norm": 0.53125,
+      "learning_rate": 0.0002839283875191295,
+      "loss": 1.7308,
+      "step": 15650
+    },
+    {
+      "epoch": 4.021071038251366,
+      "grad_norm": 0.5625,
+      "learning_rate": 0.0002838090891771059,
+      "loss": 1.702,
+      "step": 15700
+    },
+    {
+      "epoch": 4.021617486338798,
+      "grad_norm": 0.59375,
+      "learning_rate": 0.0002836893749533465,
+      "loss": 1.7191,
+      "step": 15750
+    },
+    {
+      "epoch": 4.02216393442623,
+      "grad_norm": 0.54296875,
+      "learning_rate": 0.0002835692452199257,
+      "loss": 1.7215,
+      "step": 15800
+    },
+    {
+      "epoch": 4.022710382513662,
+      "grad_norm": 0.54296875,
+      "learning_rate": 0.00028344870035020963,
+      "loss": 1.6984,
+      "step": 15850
+    },
+    {
+      "epoch": 4.023256830601093,
+      "grad_norm": 0.53125,
+      "learning_rate": 0.0002833277407188545,
+      "loss": 1.6847,
+      "step": 15900
+    },
+    {
+      "epoch": 4.023803278688525,
+      "grad_norm": 0.53125,
+      "learning_rate": 0.00028320636670180557,
+      "loss": 1.6739,
+      "step": 15950
+    },
+    {
+      "epoch": 4.024349726775957,
+      "grad_norm": 0.52734375,
+      "learning_rate": 0.0002830845786762962,
+      "loss": 1.7125,
+      "step": 16000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 8.556472791069622e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null