Training in progress, step 9500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b6a100ff38d4f00b501f20a5190982d96bca76e8f9a3dd9afd41838295e088c
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:feeb8af86d4228c031ab0303150253b8e59c08c82f4f8aa78a75fae604e120a1
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:318e4ae9012739627ee7e1642d03ed5987f8ac51a72f0db40543b41f04528304
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d5fa1cbde1c469de32a370ba5361ae4e7744a119f98350fc2511f131db06a4e
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3343121e0ab3aeb674ab29d872307564462c4bd82cdd92e6577a4ff26999fc00
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:ccd074c7b8f0b016dc440e87123ddc293303707dc1fa944c0ab62d0b20aa48bd
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:528ba9a1d2a5739586b1652bb1454f9e977f93a6ae9e9c38a71b51bc41c45de4
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:4784f3b1ac308d4093c525f58ebfb1ed5c4e7ca17828bd58e2e6a8e2baed20b5
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.5205271160669032,
   "eval_steps": 500,
-  "global_step": 9000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6459,6 +6459,364 @@
       "eval_samples_per_second": 269.07,
       "eval_steps_per_second": 5.65,
       "step": 9000
     }
   ],
   "logging_steps": 10,
@@ -6478,7 +6836,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.010090484178616e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.6050008447372868,
   "eval_steps": 500,
+  "global_step": 9500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 269.07,
       "eval_steps_per_second": 5.65,
       "step": 9000
+    },
+    {
+      "epoch": 1.522216590640311,
+      "grad_norm": 0.4886947572231293,
+      "learning_rate": 5.715753870066455e-05,
+      "loss": 4.321556472778321,
+      "step": 9010
+    },
+    {
+      "epoch": 1.5239060652137186,
+      "grad_norm": 0.4844423532485962,
+      "learning_rate": 5.67817924971296e-05,
+      "loss": 4.319805908203125,
+      "step": 9020
+    },
+    {
+      "epoch": 1.5255955397871261,
+      "grad_norm": 0.5226190686225891,
+      "learning_rate": 5.6406996868811885e-05,
+      "loss": 4.334315490722656,
+      "step": 9030
+    },
+    {
+      "epoch": 1.5272850143605339,
+      "grad_norm": 0.4798380136489868,
+      "learning_rate": 5.60331556376197e-05,
+      "loss": 4.332364654541015,
+      "step": 9040
+    },
+    {
+      "epoch": 1.5289744889339416,
+      "grad_norm": 0.4943060874938965,
+      "learning_rate": 5.566027261572907e-05,
+      "loss": 4.315433502197266,
+      "step": 9050
+    },
+    {
+      "epoch": 1.5306639635073491,
+      "grad_norm": 0.4950096011161804,
+      "learning_rate": 5.528835160554475e-05,
+      "loss": 4.333520126342774,
+      "step": 9060
+    },
+    {
+      "epoch": 1.5323534380807569,
+      "grad_norm": 0.49306342005729675,
+      "learning_rate": 5.491739639966153e-05,
+      "loss": 4.341754531860351,
+      "step": 9070
+    },
+    {
+      "epoch": 1.5340429126541646,
+      "grad_norm": 0.5156424641609192,
+      "learning_rate": 5.454741078082578e-05,
+      "loss": 4.341635894775391,
+      "step": 9080
+    },
+    {
+      "epoch": 1.535732387227572,
+      "grad_norm": 0.4766913652420044,
+      "learning_rate": 5.417839852189653e-05,
+      "loss": 4.368686294555664,
+      "step": 9090
+    },
+    {
+      "epoch": 1.53742186180098,
+      "grad_norm": 0.4723012447357178,
+      "learning_rate": 5.381036338580718e-05,
+      "loss": 4.3226276397705075,
+      "step": 9100
+    },
+    {
+      "epoch": 1.5391113363743876,
+      "grad_norm": 0.48478707671165466,
+      "learning_rate": 5.344330912552703e-05,
+      "loss": 4.324138259887695,
+      "step": 9110
+    },
+    {
+      "epoch": 1.540800810947795,
+      "grad_norm": 0.4678701162338257,
+      "learning_rate": 5.3077239484023385e-05,
+      "loss": 4.336772918701172,
+      "step": 9120
+    },
+    {
+      "epoch": 1.542490285521203,
+      "grad_norm": 0.5012118220329285,
+      "learning_rate": 5.271215819422277e-05,
+      "loss": 4.340796661376953,
+      "step": 9130
+    },
+    {
+      "epoch": 1.5441797600946106,
+      "grad_norm": 0.46702662110328674,
+      "learning_rate": 5.234806897897328e-05,
+      "loss": 4.351072692871094,
+      "step": 9140
+    },
+    {
+      "epoch": 1.5458692346680183,
+      "grad_norm": 0.4755004346370697,
+      "learning_rate": 5.1984975551006434e-05,
+      "loss": 4.333198165893554,
+      "step": 9150
+    },
+    {
+      "epoch": 1.547558709241426,
+      "grad_norm": 0.49710944294929504,
+      "learning_rate": 5.1622881612899635e-05,
+      "loss": 4.329468536376953,
+      "step": 9160
+    },
+    {
+      "epoch": 1.5492481838148335,
+      "grad_norm": 0.47669288516044617,
+      "learning_rate": 5.126179085703794e-05,
+      "loss": 4.306584167480469,
+      "step": 9170
+    },
+    {
+      "epoch": 1.5509376583882413,
+      "grad_norm": 0.4755394458770752,
+      "learning_rate": 5.090170696557667e-05,
+      "loss": 4.332014083862305,
+      "step": 9180
+    },
+    {
+      "epoch": 1.552627132961649,
+      "grad_norm": 0.4676371216773987,
+      "learning_rate": 5.054263361040395e-05,
+      "loss": 4.323485565185547,
+      "step": 9190
+    },
+    {
+      "epoch": 1.5543166075350565,
+      "grad_norm": 0.478369802236557,
+      "learning_rate": 5.018457445310313e-05,
+      "loss": 4.330324935913086,
+      "step": 9200
+    },
+    {
+      "epoch": 1.5560060821084643,
+      "grad_norm": 0.47149577736854553,
+      "learning_rate": 4.9827533144915384e-05,
+      "loss": 4.3147937774658205,
+      "step": 9210
+    },
+    {
+      "epoch": 1.557695556681872,
+      "grad_norm": 0.46445849537849426,
+      "learning_rate": 4.9471513326702544e-05,
+      "loss": 4.3321784973144535,
+      "step": 9220
+    },
+    {
+      "epoch": 1.5593850312552795,
+      "grad_norm": 0.4738256335258484,
+      "learning_rate": 4.911651862891014e-05,
+      "loss": 4.331890487670899,
+      "step": 9230
+    },
+    {
+      "epoch": 1.5610745058286872,
+      "grad_norm": 0.461725115776062,
+      "learning_rate": 4.876255267153011e-05,
+      "loss": 4.33313217163086,
+      "step": 9240
+    },
+    {
+      "epoch": 1.562763980402095,
+      "grad_norm": 0.47477617859840393,
+      "learning_rate": 4.8409619064063965e-05,
+      "loss": 4.322317504882813,
+      "step": 9250
+    },
+    {
+      "epoch": 1.5644534549755025,
+      "grad_norm": 0.5020060539245605,
+      "learning_rate": 4.805772140548613e-05,
+      "loss": 4.332529067993164,
+      "step": 9260
+    },
+    {
+      "epoch": 1.5661429295489104,
+      "grad_norm": 0.49133333563804626,
+      "learning_rate": 4.770686328420713e-05,
+      "loss": 4.309441375732422,
+      "step": 9270
+    },
+    {
+      "epoch": 1.567832404122318,
+      "grad_norm": 0.4712921380996704,
+      "learning_rate": 4.7357048278036944e-05,
+      "loss": 4.33348503112793,
+      "step": 9280
+    },
+    {
+      "epoch": 1.5695218786957257,
+      "grad_norm": 0.4794677495956421,
+      "learning_rate": 4.700827995414853e-05,
+      "loss": 4.318268203735352,
+      "step": 9290
+    },
+    {
+      "epoch": 1.5712113532691334,
+      "grad_norm": 0.48898905515670776,
+      "learning_rate": 4.666056186904168e-05,
+      "loss": 4.350247955322265,
+      "step": 9300
+    },
+    {
+      "epoch": 1.572900827842541,
+      "grad_norm": 0.4803585112094879,
+      "learning_rate": 4.63138975685064e-05,
+      "loss": 4.3233489990234375,
+      "step": 9310
+    },
+    {
+      "epoch": 1.5745903024159487,
+      "grad_norm": 0.46920374035835266,
+      "learning_rate": 4.596829058758694e-05,
+      "loss": 4.339992904663086,
+      "step": 9320
+    },
+    {
+      "epoch": 1.5762797769893564,
+      "grad_norm": 0.483453631401062,
+      "learning_rate": 4.5623744450545846e-05,
+      "loss": 4.3560230255126955,
+      "step": 9330
+    },
+    {
+      "epoch": 1.577969251562764,
+      "grad_norm": 0.4820483326911926,
+      "learning_rate": 4.528026267082786e-05,
+      "loss": 4.344028091430664,
+      "step": 9340
+    },
+    {
+      "epoch": 1.5796587261361716,
+      "grad_norm": 0.47812727093696594,
+      "learning_rate": 4.493784875102409e-05,
+      "loss": 4.326963806152344,
+      "step": 9350
+    },
+    {
+      "epoch": 1.5813482007095794,
+      "grad_norm": 0.49543893337249756,
+      "learning_rate": 4.45965061828363e-05,
+      "loss": 4.336210632324219,
+      "step": 9360
+    },
+    {
+      "epoch": 1.583037675282987,
+      "grad_norm": 0.4648870825767517,
+      "learning_rate": 4.4256238447041556e-05,
+      "loss": 4.335990905761719,
+      "step": 9370
+    },
+    {
+      "epoch": 1.5847271498563946,
+      "grad_norm": 0.4952487051486969,
+      "learning_rate": 4.39170490134563e-05,
+      "loss": 4.324835968017578,
+      "step": 9380
+    },
+    {
+      "epoch": 1.5864166244298024,
+      "grad_norm": 0.4667623043060303,
+      "learning_rate": 4.3578941340901274e-05,
+      "loss": 4.3118232727050785,
+      "step": 9390
+    },
+    {
+      "epoch": 1.5881060990032099,
+      "grad_norm": 0.4628295600414276,
+      "learning_rate": 4.324191887716612e-05,
+      "loss": 4.319614028930664,
+      "step": 9400
+    },
+    {
+      "epoch": 1.5897955735766178,
+      "grad_norm": 0.47781363129615784,
+      "learning_rate": 4.290598505897439e-05,
+      "loss": 4.33368148803711,
+      "step": 9410
+    },
+    {
+      "epoch": 1.5914850481500253,
+      "grad_norm": 0.47368741035461426,
+      "learning_rate": 4.25711433119483e-05,
+      "loss": 4.343207550048828,
+      "step": 9420
+    },
+    {
+      "epoch": 1.5931745227234329,
+      "grad_norm": 0.48697587847709656,
+      "learning_rate": 4.223739705057384e-05,
+      "loss": 4.332680130004883,
+      "step": 9430
+    },
+    {
+      "epoch": 1.5948639972968408,
+      "grad_norm": 0.4794331192970276,
+      "learning_rate": 4.1904749678165965e-05,
+      "loss": 4.318676376342774,
+      "step": 9440
+    },
+    {
+      "epoch": 1.5965534718702483,
+      "grad_norm": 0.4642111361026764,
+      "learning_rate": 4.157320458683409e-05,
+      "loss": 4.281653976440429,
+      "step": 9450
+    },
+    {
+      "epoch": 1.598242946443656,
+      "grad_norm": 0.48844289779663086,
+      "learning_rate": 4.124276515744713e-05,
+      "loss": 4.316392135620117,
+      "step": 9460
+    },
+    {
+      "epoch": 1.5999324210170638,
+      "grad_norm": 0.4724809229373932,
+      "learning_rate": 4.091343475959928e-05,
+      "loss": 4.30932502746582,
+      "step": 9470
+    },
+    {
+      "epoch": 1.6016218955904713,
+      "grad_norm": 0.463838666677475,
+      "learning_rate": 4.058521675157563e-05,
+      "loss": 4.312925338745117,
+      "step": 9480
+    },
+    {
+      "epoch": 1.603311370163879,
+      "grad_norm": 0.48553308844566345,
+      "learning_rate": 4.025811448031792e-05,
+      "loss": 4.316752624511719,
+      "step": 9490
+    },
+    {
+      "epoch": 1.6050008447372868,
+      "grad_norm": 0.4514022171497345,
+      "learning_rate": 3.993213128139027e-05,
+      "loss": 4.320250701904297,
+      "step": 9500
+    },
+    {
+      "epoch": 1.6050008447372868,
+      "eval_loss": 4.275903224945068,
+      "eval_runtime": 3.5803,
+      "eval_samples_per_second": 279.306,
+      "eval_steps_per_second": 5.865,
+      "step": 9500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.177318894608056e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null