Training in progress, step 9000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:663d31a8b6ad2423dc3c0b8759bef8029d3f5914e7b173b5be641f54497bab8c
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5822c5d51ff1a3f6c8d63d9491441c689004f44619d361568f98a19df1caeab
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c93129eb07b3c389c642dd3ac521458eb6b0b8b0b4b6634a4a4ec236e73b73dd
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2c88f007992dd9990ea0216c73aaca02a8b4aebfac4c43fbb77c941bb9cf18e
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95d6f8a42fc11a5f0262b0c737f666f824322b1b030452310cca3fb10ffef9ad
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:88595be53afbf68c948f838fbf4b1fa7776619d23de4baf3620fece471fafed5
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6cda9bcc9266ec91d2da20eab50cd7cea609c16666645a54519c40bab7f69f1a
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:528ba9a1d2a5739586b1652bb1454f9e977f93a6ae9e9c38a71b51bc41c45de4
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.4360533873965196,
   "eval_steps": 500,
-  "global_step": 8500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6101,6 +6101,364 @@
       "eval_samples_per_second": 271.089,
       "eval_steps_per_second": 5.693,
       "step": 8500
     }
   ],
   "logging_steps": 10,
@@ -6120,7 +6478,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.8428620737491763e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.5205271160669032,
   "eval_steps": 500,
+  "global_step": 9000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 271.089,
       "eval_steps_per_second": 5.693,
       "step": 8500
+    },
+    {
+      "epoch": 1.4377428619699273,
+      "grad_norm": 0.4925293028354645,
+      "learning_rate": 7.706964398504293e-05,
+      "loss": 4.376210403442383,
+      "step": 8510
+    },
+    {
+      "epoch": 1.439432336543335,
+      "grad_norm": 0.4719123840332031,
+      "learning_rate": 7.665144535049224e-05,
+      "loss": 4.338931274414063,
+      "step": 8520
+    },
+    {
+      "epoch": 1.4411218111167428,
+      "grad_norm": 0.4722173511981964,
+      "learning_rate": 7.623399467409416e-05,
+      "loss": 4.352537536621094,
+      "step": 8530
+    },
+    {
+      "epoch": 1.4428112856901505,
+      "grad_norm": 0.4844585955142975,
+      "learning_rate": 7.581729621272386e-05,
+      "loss": 4.332356262207031,
+      "step": 8540
+    },
+    {
+      "epoch": 1.444500760263558,
+      "grad_norm": 0.49630841612815857,
+      "learning_rate": 7.540135421558585e-05,
+      "loss": 4.3133392333984375,
+      "step": 8550
+    },
+    {
+      "epoch": 1.4461902348369657,
+      "grad_norm": 0.472133994102478,
+      "learning_rate": 7.498617292417074e-05,
+      "loss": 4.3697349548339846,
+      "step": 8560
+    },
+    {
+      "epoch": 1.4478797094103735,
+      "grad_norm": 0.48327624797821045,
+      "learning_rate": 7.457175657221194e-05,
+      "loss": 4.366666030883789,
+      "step": 8570
+    },
+    {
+      "epoch": 1.449569183983781,
+      "grad_norm": 0.4768034815788269,
+      "learning_rate": 7.415810938564277e-05,
+      "loss": 4.33704719543457,
+      "step": 8580
+    },
+    {
+      "epoch": 1.4512586585571887,
+      "grad_norm": 0.4592680037021637,
+      "learning_rate": 7.37452355825528e-05,
+      "loss": 4.343940734863281,
+      "step": 8590
+    },
+    {
+      "epoch": 1.4529481331305965,
+      "grad_norm": 0.4643280804157257,
+      "learning_rate": 7.333313937314548e-05,
+      "loss": 4.346873474121094,
+      "step": 8600
+    },
+    {
+      "epoch": 1.454637607704004,
+      "grad_norm": 0.4980602264404297,
+      "learning_rate": 7.292182495969462e-05,
+      "loss": 4.370085525512695,
+      "step": 8610
+    },
+    {
+      "epoch": 1.4563270822774117,
+      "grad_norm": 0.4845782518386841,
+      "learning_rate": 7.251129653650206e-05,
+      "loss": 4.3420463562011715,
+      "step": 8620
+    },
+    {
+      "epoch": 1.4580165568508194,
+      "grad_norm": 0.47701558470726013,
+      "learning_rate": 7.210155828985447e-05,
+      "loss": 4.333865356445313,
+      "step": 8630
+    },
+    {
+      "epoch": 1.459706031424227,
+      "grad_norm": 0.4681967794895172,
+      "learning_rate": 7.169261439798083e-05,
+      "loss": 4.315822982788086,
+      "step": 8640
+    },
+    {
+      "epoch": 1.4613955059976347,
+      "grad_norm": 0.48438313603401184,
+      "learning_rate": 7.128446903101004e-05,
+      "loss": 4.31340446472168,
+      "step": 8650
+    },
+    {
+      "epoch": 1.4630849805710424,
+      "grad_norm": 0.4675985872745514,
+      "learning_rate": 7.087712635092802e-05,
+      "loss": 4.347599792480469,
+      "step": 8660
+    },
+    {
+      "epoch": 1.4647744551444501,
+      "grad_norm": 0.5026019215583801,
+      "learning_rate": 7.047059051153538e-05,
+      "loss": 4.3385356903076175,
+      "step": 8670
+    },
+    {
+      "epoch": 1.4664639297178579,
+      "grad_norm": 0.4908424913883209,
+      "learning_rate": 7.006486565840532e-05,
+      "loss": 4.337771224975586,
+      "step": 8680
+    },
+    {
+      "epoch": 1.4681534042912654,
+      "grad_norm": 0.47692814469337463,
+      "learning_rate": 6.96599559288411e-05,
+      "loss": 4.350002288818359,
+      "step": 8690
+    },
+    {
+      "epoch": 1.4698428788646731,
+      "grad_norm": 0.4985916316509247,
+      "learning_rate": 6.925586545183383e-05,
+      "loss": 4.357270812988281,
+      "step": 8700
+    },
+    {
+      "epoch": 1.4715323534380809,
+      "grad_norm": 0.4779921770095825,
+      "learning_rate": 6.885259834802042e-05,
+      "loss": 4.3343353271484375,
+      "step": 8710
+    },
+    {
+      "epoch": 1.4732218280114884,
+      "grad_norm": 0.4964430630207062,
+      "learning_rate": 6.845015872964179e-05,
+      "loss": 4.345649337768554,
+      "step": 8720
+    },
+    {
+      "epoch": 1.4749113025848961,
+      "grad_norm": 0.4816732108592987,
+      "learning_rate": 6.80485507005005e-05,
+      "loss": 4.349812316894531,
+      "step": 8730
+    },
+    {
+      "epoch": 1.4766007771583038,
+      "grad_norm": 0.4839925765991211,
+      "learning_rate": 6.764777835591921e-05,
+      "loss": 4.342644119262696,
+      "step": 8740
+    },
+    {
+      "epoch": 1.4782902517317114,
+      "grad_norm": 0.5161303877830505,
+      "learning_rate": 6.724784578269892e-05,
+      "loss": 4.322945022583008,
+      "step": 8750
+    },
+    {
+      "epoch": 1.479979726305119,
+      "grad_norm": 0.4845769703388214,
+      "learning_rate": 6.684875705907722e-05,
+      "loss": 4.33643798828125,
+      "step": 8760
+    },
+    {
+      "epoch": 1.4816692008785268,
+      "grad_norm": 0.48371464014053345,
+      "learning_rate": 6.645051625468657e-05,
+      "loss": 4.319810104370117,
+      "step": 8770
+    },
+    {
+      "epoch": 1.4833586754519343,
+      "grad_norm": 0.4810192286968231,
+      "learning_rate": 6.605312743051297e-05,
+      "loss": 4.350659561157227,
+      "step": 8780
+    },
+    {
+      "epoch": 1.485048150025342,
+      "grad_norm": 0.4886019825935364,
+      "learning_rate": 6.565659463885467e-05,
+      "loss": 4.340823364257813,
+      "step": 8790
+    },
+    {
+      "epoch": 1.4867376245987498,
+      "grad_norm": 0.4922144114971161,
+      "learning_rate": 6.526092192328048e-05,
+      "loss": 4.337167358398437,
+      "step": 8800
+    },
+    {
+      "epoch": 1.4884270991721575,
+      "grad_norm": 0.47720760107040405,
+      "learning_rate": 6.486611331858879e-05,
+      "loss": 4.330669403076172,
+      "step": 8810
+    },
+    {
+      "epoch": 1.490116573745565,
+      "grad_norm": 0.45629069209098816,
+      "learning_rate": 6.447217285076651e-05,
+      "loss": 4.354007339477539,
+      "step": 8820
+    },
+    {
+      "epoch": 1.4918060483189728,
+      "grad_norm": 0.4794461727142334,
+      "learning_rate": 6.407910453694782e-05,
+      "loss": 4.356667327880859,
+      "step": 8830
+    },
+    {
+      "epoch": 1.4934955228923805,
+      "grad_norm": 0.4836932420730591,
+      "learning_rate": 6.368691238537321e-05,
+      "loss": 4.3167163848876955,
+      "step": 8840
+    },
+    {
+      "epoch": 1.4951849974657883,
+      "grad_norm": 0.5060141086578369,
+      "learning_rate": 6.329560039534874e-05,
+      "loss": 4.362548828125,
+      "step": 8850
+    },
+    {
+      "epoch": 1.4968744720391958,
+      "grad_norm": 0.48216700553894043,
+      "learning_rate": 6.290517255720505e-05,
+      "loss": 4.3512012481689455,
+      "step": 8860
+    },
+    {
+      "epoch": 1.4985639466126035,
+      "grad_norm": 0.46019911766052246,
+      "learning_rate": 6.251563285225707e-05,
+      "loss": 4.32593002319336,
+      "step": 8870
+    },
+    {
+      "epoch": 1.5002534211860112,
+      "grad_norm": 0.4773600697517395,
+      "learning_rate": 6.212698525276294e-05,
+      "loss": 4.345823287963867,
+      "step": 8880
+    },
+    {
+      "epoch": 1.5019428957594188,
+      "grad_norm": 0.4903421401977539,
+      "learning_rate": 6.173923372188372e-05,
+      "loss": 4.330167770385742,
+      "step": 8890
+    },
+    {
+      "epoch": 1.5036323703328265,
+      "grad_norm": 0.47027841210365295,
+      "learning_rate": 6.135238221364313e-05,
+      "loss": 4.352994155883789,
+      "step": 8900
+    },
+    {
+      "epoch": 1.5053218449062342,
+      "grad_norm": 0.4893588125705719,
+      "learning_rate": 6.096643467288703e-05,
+      "loss": 4.3315269470214846,
+      "step": 8910
+    },
+    {
+      "epoch": 1.5070113194796417,
+      "grad_norm": 0.4835808277130127,
+      "learning_rate": 6.058139503524314e-05,
+      "loss": 4.349056625366211,
+      "step": 8920
+    },
+    {
+      "epoch": 1.5087007940530495,
+      "grad_norm": 0.4750809967517853,
+      "learning_rate": 6.019726722708104e-05,
+      "loss": 4.325545120239258,
+      "step": 8930
+    },
+    {
+      "epoch": 1.5103902686264572,
+      "grad_norm": 0.4945700466632843,
+      "learning_rate": 5.981405516547222e-05,
+      "loss": 4.312815093994141,
+      "step": 8940
+    },
+    {
+      "epoch": 1.5120797431998647,
+      "grad_norm": 0.4704221487045288,
+      "learning_rate": 5.9431762758149875e-05,
+      "loss": 4.328189849853516,
+      "step": 8950
+    },
+    {
+      "epoch": 1.5137692177732727,
+      "grad_norm": 0.48752453923225403,
+      "learning_rate": 5.9050393903469215e-05,
+      "loss": 4.324124145507812,
+      "step": 8960
+    },
+    {
+      "epoch": 1.5154586923466802,
+      "grad_norm": 0.5149093270301819,
+      "learning_rate": 5.866995249036775e-05,
+      "loss": 4.334346771240234,
+      "step": 8970
+    },
+    {
+      "epoch": 1.5171481669200877,
+      "grad_norm": 0.49064958095550537,
+      "learning_rate": 5.829044239832564e-05,
+      "loss": 4.324323654174805,
+      "step": 8980
+    },
+    {
+      "epoch": 1.5188376414934956,
+      "grad_norm": 0.486092746257782,
+      "learning_rate": 5.791186749732594e-05,
+      "loss": 4.346895599365235,
+      "step": 8990
+    },
+    {
+      "epoch": 1.5205271160669032,
+      "grad_norm": 0.48512768745422363,
+      "learning_rate": 5.7534231647815244e-05,
+      "loss": 4.350548934936524,
+      "step": 9000
+    },
+    {
+      "epoch": 1.5205271160669032,
+      "eval_loss": 4.312350273132324,
+      "eval_runtime": 4.1596,
+      "eval_samples_per_second": 240.409,
+      "eval_steps_per_second": 5.049,
+      "step": 9000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.010090484178616e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null