Training in progress, step 120, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +222 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3896dc7e3dfc728c24eaa63c18e8781b696d643a43167cf61002d15f7d96c342
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ce1d3865491490218207bac75bccedb8c2e28ef95b0cf9a0f748b479a6adc57
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57887673c056b1c8087dce5b980c4db8dc84eed1b075794a285c54d497c534b1
 size 43122580

 version https://git-lfs.github.com/spec/v1
+oid sha256:8287c39800d6ba02c8dfe28b1ae66f1a0eed7e1907cdd7f1aa84e95e9369848e
 size 43122580

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f5daff26b89d78b10080e8caa8f194190056243cab03647cb95fa7bc83c4aee
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:03991d31508c600c9181221b8bb07dd43258c023851238fbb98928b343012b1a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d2347f838bf86167a02aaac5d32873f1e6b1d790c95b57022527bb2bcb488890
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2409fb1c3fd1c2de2c63b5459c49a97abc9a50084af4337cc5090c00ef975a6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7515657620041754,
   "eval_steps": 500,
-  "global_step": 90,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -637,6 +637,224 @@
       "learning_rate": 1.5872342839067306e-05,
       "loss": 2.7213,
       "step": 90
     }
   ],
   "logging_steps": 1,
@@ -651,12 +869,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.309618477170688e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0020876826722338,
   "eval_steps": 500,
+  "global_step": 120,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.5872342839067306e-05,
       "loss": 2.7213,
       "step": 90
+    },
+    {
+      "epoch": 0.7599164926931107,
+      "grad_norm": 39.1318244934082,
+      "learning_rate": 1.4886944624647647e-05,
+      "loss": 2.5212,
+      "step": 91
+    },
+    {
+      "epoch": 0.7682672233820459,
+      "grad_norm": 37.528324127197266,
+      "learning_rate": 1.3927749088052217e-05,
+      "loss": 3.2661,
+      "step": 92
+    },
+    {
+      "epoch": 0.7766179540709812,
+      "grad_norm": 33.13786315917969,
+      "learning_rate": 1.2995472016937404e-05,
+      "loss": 4.3563,
+      "step": 93
+    },
+    {
+      "epoch": 0.7849686847599165,
+      "grad_norm": 28.756526947021484,
+      "learning_rate": 1.209080911139187e-05,
+      "loss": 3.6397,
+      "step": 94
+    },
+    {
+      "epoch": 0.7933194154488518,
+      "grad_norm": 25.469545364379883,
+      "learning_rate": 1.1214435464779006e-05,
+      "loss": 4.4051,
+      "step": 95
+    },
+    {
+      "epoch": 0.8016701461377871,
+      "grad_norm": 27.566421508789062,
+      "learning_rate": 1.0367005059957096e-05,
+      "loss": 5.1261,
+      "step": 96
+    },
+    {
+      "epoch": 0.8100208768267223,
+      "grad_norm": 18.103004455566406,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 3.8833,
+      "step": 97
+    },
+    {
+      "epoch": 0.8183716075156576,
+      "grad_norm": 13.448304176330566,
+      "learning_rate": 8.761481442551573e-06,
+      "loss": 5.2468,
+      "step": 98
+    },
+    {
+      "epoch": 0.826722338204593,
+      "grad_norm": 11.937213897705078,
+      "learning_rate": 8.004586331860175e-06,
+      "loss": 4.6798,
+      "step": 99
+    },
+    {
+      "epoch": 0.8350730688935282,
+      "grad_norm": 8.884134292602539,
+      "learning_rate": 7.2790297726755716e-06,
+      "loss": 4.9037,
+      "step": 100
+    },
+    {
+      "epoch": 0.8434237995824635,
+      "grad_norm": 9.988716125488281,
+      "learning_rate": 6.585353202493322e-06,
+      "loss": 5.0198,
+      "step": 101
+    },
+    {
+      "epoch": 0.8517745302713987,
+      "grad_norm": 9.69389820098877,
+      "learning_rate": 5.924074268766422e-06,
+      "loss": 3.5639,
+      "step": 102
+    },
+    {
+      "epoch": 0.860125260960334,
+      "grad_norm": 12.106159210205078,
+      "learning_rate": 5.295686442617443e-06,
+      "loss": 4.083,
+      "step": 103
+    },
+    {
+      "epoch": 0.8684759916492694,
+      "grad_norm": 10.521677017211914,
+      "learning_rate": 4.700658650591827e-06,
+      "loss": 4.8845,
+      "step": 104
+    },
+    {
+      "epoch": 0.8768267223382046,
+      "grad_norm": 12.082547187805176,
+      "learning_rate": 4.139434924727359e-06,
+      "loss": 4.4946,
+      "step": 105
+    },
+    {
+      "epoch": 0.8851774530271399,
+      "grad_norm": 10.821547508239746,
+      "learning_rate": 3.612434071200771e-06,
+      "loss": 4.7307,
+      "step": 106
+    },
+    {
+      "epoch": 0.8935281837160751,
+      "grad_norm": 10.070833206176758,
+      "learning_rate": 3.1200493577989875e-06,
+      "loss": 5.0714,
+      "step": 107
+    },
+    {
+      "epoch": 0.9018789144050104,
+      "grad_norm": 19.112159729003906,
+      "learning_rate": 2.662648220447811e-06,
+      "loss": 4.6327,
+      "step": 108
+    },
+    {
+      "epoch": 0.9102296450939458,
+      "grad_norm": 12.466245651245117,
+      "learning_rate": 2.240571989017598e-06,
+      "loss": 5.2114,
+      "step": 109
+    },
+    {
+      "epoch": 0.918580375782881,
+      "grad_norm": 17.981706619262695,
+      "learning_rate": 1.8541356326100433e-06,
+      "loss": 5.5056,
+      "step": 110
+    },
+    {
+      "epoch": 0.9269311064718163,
+      "grad_norm": 20.902307510375977,
+      "learning_rate": 1.5036275245164377e-06,
+      "loss": 4.974,
+      "step": 111
+    },
+    {
+      "epoch": 0.9352818371607515,
+      "grad_norm": 17.289682388305664,
+      "learning_rate": 1.1893092270227724e-06,
+      "loss": 4.9496,
+      "step": 112
+    },
+    {
+      "epoch": 0.9436325678496869,
+      "grad_norm": 17.364238739013672,
+      "learning_rate": 9.114152962220735e-07,
+      "loss": 6.3092,
+      "step": 113
+    },
+    {
+      "epoch": 0.9519832985386222,
+      "grad_norm": 30.94085121154785,
+      "learning_rate": 6.701531069799038e-07,
+      "loss": 5.5584,
+      "step": 114
+    },
+    {
+      "epoch": 0.9603340292275574,
+      "grad_norm": 27.87859535217285,
+      "learning_rate": 4.6570269818346224e-07,
+      "loss": 6.069,
+      "step": 115
+    },
+    {
+      "epoch": 0.9686847599164927,
+      "grad_norm": 30.613059997558594,
+      "learning_rate": 2.9821663838981993e-07,
+      "loss": 6.3244,
+      "step": 116
+    },
+    {
+      "epoch": 0.9770354906054279,
+      "grad_norm": 8.08796215057373,
+      "learning_rate": 1.6781991197352133e-07,
+      "loss": 2.1168,
+      "step": 117
+    },
+    {
+      "epoch": 0.9853862212943633,
+      "grad_norm": 7.885867595672607,
+      "learning_rate": 7.460982585860144e-08,
+      "loss": 4.3936,
+      "step": 118
+    },
+    {
+      "epoch": 0.9937369519832986,
+      "grad_norm": 13.48105525970459,
+      "learning_rate": 1.8655936904465875e-08,
+      "loss": 5.1137,
+      "step": 119
+    },
+    {
+      "epoch": 0.9937369519832986,
+      "eval_loss": 0.985297679901123,
+      "eval_runtime": 5.2963,
+      "eval_samples_per_second": 19.07,
+      "eval_steps_per_second": 4.909,
+      "step": 119
+    },
+    {
+      "epoch": 1.0020876826722338,
+      "grad_norm": 15.896007537841797,
+      "learning_rate": 0.0,
+      "loss": 4.0913,
+      "step": 120
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.408442956729549e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null