Training in progress, step 16, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/model-00001-of-00002.safetensors +1 -1
last-checkpoint/model-00002-of-00002.safetensors +1 -1
last-checkpoint/trainer_state.json +57 -161
last-checkpoint/training_args.bin +1 -1

last-checkpoint/model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:943608aa5c6d1a33cd3de6c65e5e3dc4364cc2718c2e96c2431f1f8af7ed45a8
 size 4969539560

 version https://git-lfs.github.com/spec/v1
+oid sha256:5e7da2c9aef8f35f6786cbf06af70258ed692543ecd8515c205ebddc810fd910
 size 4969539560

last-checkpoint/model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b0f8c786480fb81dbef237ecccb6d214b6a308947b049039952c10df566011d7
 size 1912795688

 version https://git-lfs.github.com/spec/v1
+oid sha256:16af573dbb77b92352dc7d86e3ffabc1ab8f05af70d970bb7737f8c187b8c429
 size 1912795688

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,242 +2,138 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 8.0,
-  "eval_steps": 100,
-  "global_step": 32,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.2909090909090909,
-      "grad_norm": 10.45223617553711,
       "learning_rate": 0.0,
       "loss": 0.7859,
       "step": 1
     },
     {
       "epoch": 0.5818181818181818,
-      "grad_norm": 9.886490821838379,
-      "learning_rate": 2.5e-08,
       "loss": 0.7965,
       "step": 2
     },
     {
       "epoch": 0.8727272727272727,
-      "grad_norm": 10.403158187866211,
-      "learning_rate": 5e-08,
       "loss": 0.7893,
       "step": 3
     },
     {
       "epoch": 1.0,
-      "grad_norm": 12.796398162841797,
-      "learning_rate": 7.5e-08,
-      "loss": 0.8701,
       "step": 4
     },
     {
       "epoch": 1.290909090909091,
-      "grad_norm": 10.29249095916748,
-      "learning_rate": 1e-07,
-      "loss": 0.7784,
       "step": 5
     },
     {
       "epoch": 1.5818181818181818,
-      "grad_norm": 9.718952178955078,
-      "learning_rate": 9.971704944519592e-08,
-      "loss": 0.7589,
       "step": 6
     },
     {
       "epoch": 1.8727272727272726,
-      "grad_norm": 10.934309959411621,
-      "learning_rate": 9.887175604818205e-08,
-      "loss": 0.8307,
       "step": 7
     },
     {
       "epoch": 2.0,
-      "grad_norm": 11.188789367675781,
-      "learning_rate": 9.747474986387654e-08,
-      "loss": 0.8821,
       "step": 8
     },
     {
       "epoch": 2.290909090909091,
-      "grad_norm": 9.692912101745605,
-      "learning_rate": 9.554359905560885e-08,
-      "loss": 0.7998,
       "step": 9
     },
     {
       "epoch": 2.581818181818182,
-      "grad_norm": 11.390420913696289,
-      "learning_rate": 9.310258896527278e-08,
-      "loss": 0.8196,
       "step": 10
     },
     {
       "epoch": 2.8727272727272726,
-      "grad_norm": 10.247771263122559,
-      "learning_rate": 9.018241671106134e-08,
-      "loss": 0.7766,
       "step": 11
     },
     {
       "epoch": 3.0,
-      "grad_norm": 11.571775436401367,
-      "learning_rate": 8.681980515339464e-08,
-      "loss": 0.8289,
       "step": 12
     },
     {
       "epoch": 3.290909090909091,
-      "grad_norm": 10.701568603515625,
-      "learning_rate": 8.305704108364301e-08,
-      "loss": 0.8375,
       "step": 13
     },
     {
       "epoch": 3.581818181818182,
-      "grad_norm": 10.49411678314209,
-      "learning_rate": 7.894144344319013e-08,
-      "loss": 0.8383,
       "step": 14
     },
     {
       "epoch": 3.8727272727272726,
-      "grad_norm": 9.895997047424316,
-      "learning_rate": 7.452476826029011e-08,
-      "loss": 0.772,
       "step": 15
     },
     {
       "epoch": 4.0,
-      "grad_norm": 10.467330932617188,
-      "learning_rate": 6.986255778798252e-08,
-      "loss": 0.7012,
       "step": 16
     },
     {
-      "epoch": 4.290909090909091,
-      "grad_norm": 9.836618423461914,
-      "learning_rate": 6.501344202803413e-08,
-      "loss": 0.777,
-      "step": 17
-    },
-    {
-      "epoch": 4.581818181818182,
-      "grad_norm": 11.242887496948242,
-      "learning_rate": 6.003840142464886e-08,
-      "loss": 0.8631,
-      "step": 18
-    },
-    {
-      "epoch": 4.872727272727273,
-      "grad_norm": 10.001364707946777,
-      "learning_rate": 5.5e-08,
-      "loss": 0.7819,
-      "step": 19
-    },
-    {
-      "epoch": 5.0,
-      "grad_norm": 10.092758178710938,
-      "learning_rate": 4.996159857535115e-08,
-      "loss": 0.7722,
-      "step": 20
-    },
-    {
-      "epoch": 5.290909090909091,
-      "grad_norm": 9.45466423034668,
-      "learning_rate": 4.498655797196585e-08,
-      "loss": 0.7416,
-      "step": 21
-    },
-    {
-      "epoch": 5.581818181818182,
-      "grad_norm": 10.496912956237793,
-      "learning_rate": 4.0137442212017494e-08,
-      "loss": 0.8161,
-      "step": 22
-    },
-    {
-      "epoch": 5.872727272727273,
-      "grad_norm": 10.202836036682129,
-      "learning_rate": 3.5475231739709885e-08,
-      "loss": 0.823,
-      "step": 23
-    },
-    {
-      "epoch": 6.0,
-      "grad_norm": 12.920607566833496,
-      "learning_rate": 3.105855655680986e-08,
-      "loss": 0.8315,
-      "step": 24
-    },
-    {
-      "epoch": 6.290909090909091,
-      "grad_norm": 10.253811836242676,
-      "learning_rate": 2.6942958916356994e-08,
-      "loss": 0.8316,
-      "step": 25
-    },
-    {
-      "epoch": 6.581818181818182,
-      "grad_norm": 9.783924102783203,
-      "learning_rate": 2.3180194846605363e-08,
-      "loss": 0.7542,
-      "step": 26
-    },
-    {
-      "epoch": 6.872727272727273,
-      "grad_norm": 10.855210304260254,
-      "learning_rate": 1.981758328893866e-08,
-      "loss": 0.8357,
-      "step": 27
-    },
-    {
-      "epoch": 7.0,
-      "grad_norm": 10.147912979125977,
-      "learning_rate": 1.6897411034727217e-08,
-      "loss": 0.7426,
-      "step": 28
-    },
-    {
-      "epoch": 7.290909090909091,
-      "grad_norm": 10.078908920288086,
-      "learning_rate": 1.4456400944391145e-08,
-      "loss": 0.7832,
-      "step": 29
-    },
-    {
-      "epoch": 7.581818181818182,
-      "grad_norm": 10.833037376403809,
-      "learning_rate": 1.2525250136123459e-08,
-      "loss": 0.7954,
-      "step": 30
-    },
-    {
-      "epoch": 7.872727272727273,
-      "grad_norm": 9.931336402893066,
-      "learning_rate": 1.1128243951817936e-08,
-      "loss": 0.7893,
-      "step": 31
-    },
-    {
-      "epoch": 8.0,
-      "grad_norm": 10.87130355834961,
-      "learning_rate": 1.0282950554804083e-08,
-      "loss": 0.9104,
-      "step": 32
     }
   ],
   "logging_steps": 1,
-  "max_steps": 32,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 8,
   "save_steps": 16,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -246,12 +142,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.073226288070656e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.0,
+  "eval_steps": 16,
+  "global_step": 16,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.2909090909090909,
+      "grad_norm": 10.451888084411621,
       "learning_rate": 0.0,
       "loss": 0.7859,
       "step": 1
     },
     {
       "epoch": 0.5818181818181818,
+      "grad_norm": 9.886292457580566,
+      "learning_rate": 2.5e-09,
       "loss": 0.7965,
       "step": 2
     },
     {
       "epoch": 0.8727272727272727,
+      "grad_norm": 10.406240463256836,
+      "learning_rate": 5e-09,
       "loss": 0.7893,
       "step": 3
     },
     {
       "epoch": 1.0,
+      "grad_norm": 12.746437072753906,
+      "learning_rate": 7.5e-09,
+      "loss": 0.8702,
       "step": 4
     },
     {
       "epoch": 1.290909090909091,
+      "grad_norm": 10.291970252990723,
+      "learning_rate": 1e-08,
+      "loss": 0.7785,
       "step": 5
     },
     {
       "epoch": 1.5818181818181818,
+      "grad_norm": 9.746875762939453,
+      "learning_rate": 1.25e-08,
+      "loss": 0.759,
       "step": 6
     },
     {
       "epoch": 1.8727272727272726,
+      "grad_norm": 10.920265197753906,
+      "learning_rate": 1.5e-08,
+      "loss": 0.8308,
       "step": 7
     },
     {
       "epoch": 2.0,
+      "grad_norm": 11.265154838562012,
+      "learning_rate": 1.7499999999999998e-08,
+      "loss": 0.8828,
       "step": 8
     },
     {
       "epoch": 2.290909090909091,
+      "grad_norm": 9.750505447387695,
+      "learning_rate": 2e-08,
+      "loss": 0.8004,
       "step": 9
     },
     {
       "epoch": 2.581818181818182,
+      "grad_norm": 11.47065544128418,
+      "learning_rate": 2.25e-08,
+      "loss": 0.8204,
       "step": 10
     },
     {
       "epoch": 2.8727272727272726,
+      "grad_norm": 10.275605201721191,
+      "learning_rate": 2.5e-08,
+      "loss": 0.7771,
       "step": 11
     },
     {
       "epoch": 3.0,
+      "grad_norm": 11.604477882385254,
+      "learning_rate": 2.75e-08,
+      "loss": 0.8295,
       "step": 12
     },
     {
       "epoch": 3.290909090909091,
+      "grad_norm": 10.712018966674805,
+      "learning_rate": 3e-08,
+      "loss": 0.8378,
       "step": 13
     },
     {
       "epoch": 3.581818181818182,
+      "grad_norm": 10.54987907409668,
+      "learning_rate": 3.25e-08,
+      "loss": 0.8398,
       "step": 14
     },
     {
       "epoch": 3.8727272727272726,
+      "grad_norm": 9.999624252319336,
+      "learning_rate": 3.4999999999999996e-08,
+      "loss": 0.773,
       "step": 15
     },
     {
       "epoch": 4.0,
+      "grad_norm": 10.562870979309082,
+      "learning_rate": 3.75e-08,
+      "loss": 0.7025,
       "step": 16
     },
     {
+      "epoch": 4.0,
+      "eval_loss": 0.760595977306366,
+      "eval_runtime": 0.722,
+      "eval_samples_per_second": 18.005,
+      "eval_steps_per_second": 18.005,
+      "step": 16
     }
   ],
   "logging_steps": 1,
+  "max_steps": 400,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 100,
   "save_steps": 16,
   "stateful_callbacks": {
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 5366131440353280.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d840eebf56f8b6e42656fd7c380bd348e6804b913efec9e3bbfeabc52c3a4df6
 size 5816

 version https://git-lfs.github.com/spec/v1
+oid sha256:b66b3dd2c00440ec978f1804a2b69b05b8711272903c469b51af4f9859ceb8e9
 size 5816