Training in progress, step 5000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +361 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71e25cdc8697039d1202fb4440876be16955562540fb206d7cbbcfc37a7f33da
 size 328277848

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9bdb004cc12734dde986cb14fdf851cce0f063e2d6a2ac9c9566bb962bc0873
 size 328277848

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a082fad0106d612afcdf0e9dbf262fd1aa3ca7c9a2ef45f2a14751b1d80d165
 size 318646859

 version https://git-lfs.github.com/spec/v1
+oid sha256:68b278926ebe3e854059774715cf944c796b018b9ed04789c02ad5bd2ddb56db
 size 318646859

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3343121e0ab3aeb674ab29d872307564462c4bd82cdd92e6577a4ff26999fc00
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:5948a5161f7923aa0acf66b01adf35dc2196a8acf5bd2c21227561e5bff45666
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:839b4043be0c777e952526844484b5d7c9eb08d95c6a855198a76f2eb1f08d84
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:a29280eedf28bde93a8485de1b90963ca69c84125cea86695b5935449e18f453
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7602635580334516,
   "eval_steps": 500,
-  "global_step": 4500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3237,6 +3237,364 @@
       "eval_samples_per_second": 276.482,
       "eval_steps_per_second": 5.806,
       "step": 4500
     }
   ],
   "logging_steps": 10,
@@ -3256,7 +3614,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.50505569386496e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8447372867038351,
   "eval_steps": 500,
+  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 276.482,
       "eval_steps_per_second": 5.806,
       "step": 4500
+    },
+    {
+      "epoch": 0.7619530326068593,
+      "grad_norm": 0.5751690864562988,
+      "learning_rate": 0.0002543762555494541,
+      "loss": 4.659806823730468,
+      "step": 4510
+    },
+    {
+      "epoch": 0.7636425071802669,
+      "grad_norm": 0.5488387942314148,
+      "learning_rate": 0.0002540317094124131,
+      "loss": 4.675619888305664,
+      "step": 4520
+    },
+    {
+      "epoch": 0.7653319817536746,
+      "grad_norm": 0.5706210136413574,
+      "learning_rate": 0.0002536861024314936,
+      "loss": 4.647731018066406,
+      "step": 4530
+    },
+    {
+      "epoch": 0.7670214563270823,
+      "grad_norm": 0.5262100100517273,
+      "learning_rate": 0.0002533394381309583,
+      "loss": 4.629973220825195,
+      "step": 4540
+    },
+    {
+      "epoch": 0.7687109309004899,
+      "grad_norm": 0.5438910126686096,
+      "learning_rate": 0.00025299172004585144,
+      "loss": 4.680305099487304,
+      "step": 4550
+    },
+    {
+      "epoch": 0.7704004054738977,
+      "grad_norm": 0.5125553011894226,
+      "learning_rate": 0.00025264295172196304,
+      "loss": 4.6679943084716795,
+      "step": 4560
+    },
+    {
+      "epoch": 0.7720898800473053,
+      "grad_norm": 0.5525355339050293,
+      "learning_rate": 0.0002522931367157928,
+      "loss": 4.6561134338378904,
+      "step": 4570
+    },
+    {
+      "epoch": 0.773779354620713,
+      "grad_norm": 0.5133577585220337,
+      "learning_rate": 0.00025194227859451384,
+      "loss": 4.66561279296875,
+      "step": 4580
+    },
+    {
+      "epoch": 0.7754688291941206,
+      "grad_norm": 0.5095699429512024,
+      "learning_rate": 0.00025159038093593606,
+      "loss": 4.678707122802734,
+      "step": 4590
+    },
+    {
+      "epoch": 0.7771583037675283,
+      "grad_norm": 0.5241293907165527,
+      "learning_rate": 0.0002512374473284699,
+      "loss": 4.642659759521484,
+      "step": 4600
+    },
+    {
+      "epoch": 0.778847778340936,
+      "grad_norm": 0.557011067867279,
+      "learning_rate": 0.00025088348137108983,
+      "loss": 4.642984771728516,
+      "step": 4610
+    },
+    {
+      "epoch": 0.7805372529143436,
+      "grad_norm": 0.5290088653564453,
+      "learning_rate": 0.0002505284866732974,
+      "loss": 4.668995666503906,
+      "step": 4620
+    },
+    {
+      "epoch": 0.7822267274877513,
+      "grad_norm": 0.519223153591156,
+      "learning_rate": 0.0002501724668550846,
+      "loss": 4.627962112426758,
+      "step": 4630
+    },
+    {
+      "epoch": 0.783916202061159,
+      "grad_norm": 0.5338088274002075,
+      "learning_rate": 0.00024981542554689684,
+      "loss": 4.67579231262207,
+      "step": 4640
+    },
+    {
+      "epoch": 0.7856056766345666,
+      "grad_norm": 0.5252251625061035,
+      "learning_rate": 0.000249457366389596,
+      "loss": 4.656952285766602,
+      "step": 4650
+    },
+    {
+      "epoch": 0.7872951512079743,
+      "grad_norm": 0.5428206324577332,
+      "learning_rate": 0.0002490982930344233,
+      "loss": 4.646731185913086,
+      "step": 4660
+    },
+    {
+      "epoch": 0.788984625781382,
+      "grad_norm": 0.5392381548881531,
+      "learning_rate": 0.0002487382091429621,
+      "loss": 4.663632583618164,
+      "step": 4670
+    },
+    {
+      "epoch": 0.7906741003547897,
+      "grad_norm": 0.51649409532547,
+      "learning_rate": 0.00024837711838710035,
+      "loss": 4.620084762573242,
+      "step": 4680
+    },
+    {
+      "epoch": 0.7923635749281973,
+      "grad_norm": 0.5248917937278748,
+      "learning_rate": 0.00024801502444899353,
+      "loss": 4.66024169921875,
+      "step": 4690
+    },
+    {
+      "epoch": 0.794053049501605,
+      "grad_norm": 0.5321633219718933,
+      "learning_rate": 0.00024765193102102676,
+      "loss": 4.647469329833984,
+      "step": 4700
+    },
+    {
+      "epoch": 0.7957425240750127,
+      "grad_norm": 0.5236574411392212,
+      "learning_rate": 0.0002472878418057772,
+      "loss": 4.6667522430419925,
+      "step": 4710
+    },
+    {
+      "epoch": 0.7974319986484203,
+      "grad_norm": 0.5166000127792358,
+      "learning_rate": 0.0002469227605159766,
+      "loss": 4.6316486358642575,
+      "step": 4720
+    },
+    {
+      "epoch": 0.799121473221828,
+      "grad_norm": 0.6069431304931641,
+      "learning_rate": 0.0002465566908744729,
+      "loss": 4.614125442504883,
+      "step": 4730
+    },
+    {
+      "epoch": 0.8008109477952357,
+      "grad_norm": 0.5319153666496277,
+      "learning_rate": 0.00024618963661419285,
+      "loss": 4.649255752563477,
+      "step": 4740
+    },
+    {
+      "epoch": 0.8025004223686434,
+      "grad_norm": 0.4894997477531433,
+      "learning_rate": 0.0002458216014781035,
+      "loss": 4.621485900878906,
+      "step": 4750
+    },
+    {
+      "epoch": 0.804189896942051,
+      "grad_norm": 0.516018807888031,
+      "learning_rate": 0.00024545258921917416,
+      "loss": 4.630000305175781,
+      "step": 4760
+    },
+    {
+      "epoch": 0.8058793715154587,
+      "grad_norm": 0.5458150506019592,
+      "learning_rate": 0.0002450826036003384,
+      "loss": 4.635307312011719,
+      "step": 4770
+    },
+    {
+      "epoch": 0.8075688460888664,
+      "grad_norm": 0.5067882537841797,
+      "learning_rate": 0.00024471164839445526,
+      "loss": 4.636883163452149,
+      "step": 4780
+    },
+    {
+      "epoch": 0.809258320662274,
+      "grad_norm": 0.4767204523086548,
+      "learning_rate": 0.0002443397273842709,
+      "loss": 4.645626831054687,
+      "step": 4790
+    },
+    {
+      "epoch": 0.8109477952356817,
+      "grad_norm": 0.5159788727760315,
+      "learning_rate": 0.00024396684436238025,
+      "loss": 4.605623626708985,
+      "step": 4800
+    },
+    {
+      "epoch": 0.8126372698090893,
+      "grad_norm": 0.5320490598678589,
+      "learning_rate": 0.00024359300313118814,
+      "loss": 4.638732147216797,
+      "step": 4810
+    },
+    {
+      "epoch": 0.8143267443824971,
+      "grad_norm": 0.5451418161392212,
+      "learning_rate": 0.00024321820750287045,
+      "loss": 4.6449028015136715,
+      "step": 4820
+    },
+    {
+      "epoch": 0.8160162189559047,
+      "grad_norm": 0.5369979739189148,
+      "learning_rate": 0.00024284246129933543,
+      "loss": 4.602875518798828,
+      "step": 4830
+    },
+    {
+      "epoch": 0.8177056935293124,
+      "grad_norm": 0.5349618196487427,
+      "learning_rate": 0.0002424657683521847,
+      "loss": 4.624568939208984,
+      "step": 4840
+    },
+    {
+      "epoch": 0.8193951681027201,
+      "grad_norm": 0.5187742114067078,
+      "learning_rate": 0.00024208813250267404,
+      "loss": 4.621414566040039,
+      "step": 4850
+    },
+    {
+      "epoch": 0.8210846426761277,
+      "grad_norm": 0.49689674377441406,
+      "learning_rate": 0.00024170955760167436,
+      "loss": 4.63438606262207,
+      "step": 4860
+    },
+    {
+      "epoch": 0.8227741172495354,
+      "grad_norm": 0.5191966891288757,
+      "learning_rate": 0.0002413300475096322,
+      "loss": 4.629247665405273,
+      "step": 4870
+    },
+    {
+      "epoch": 0.824463591822943,
+      "grad_norm": 0.5321470499038696,
+      "learning_rate": 0.00024094960609653078,
+      "loss": 4.630535507202149,
+      "step": 4880
+    },
+    {
+      "epoch": 0.8261530663963508,
+      "grad_norm": 0.577171802520752,
+      "learning_rate": 0.00024056823724185014,
+      "loss": 4.614957809448242,
+      "step": 4890
+    },
+    {
+      "epoch": 0.8278425409697584,
+      "grad_norm": 0.5203391313552856,
+      "learning_rate": 0.00024018594483452783,
+      "loss": 4.597796630859375,
+      "step": 4900
+    },
+    {
+      "epoch": 0.829532015543166,
+      "grad_norm": 0.568663477897644,
+      "learning_rate": 0.00023980273277291893,
+      "loss": 4.630698394775391,
+      "step": 4910
+    },
+    {
+      "epoch": 0.8312214901165738,
+      "grad_norm": 0.5214170813560486,
+      "learning_rate": 0.00023941860496475687,
+      "loss": 4.6348930358886715,
+      "step": 4920
+    },
+    {
+      "epoch": 0.8329109646899814,
+      "grad_norm": 0.5391976237297058,
+      "learning_rate": 0.00023903356532711296,
+      "loss": 4.6155132293701175,
+      "step": 4930
+    },
+    {
+      "epoch": 0.8346004392633891,
+      "grad_norm": 0.4739229381084442,
+      "learning_rate": 0.0002386476177863568,
+      "loss": 4.622202301025391,
+      "step": 4940
+    },
+    {
+      "epoch": 0.8362899138367967,
+      "grad_norm": 0.5011942386627197,
+      "learning_rate": 0.00023826076627811628,
+      "loss": 4.608601379394531,
+      "step": 4950
+    },
+    {
+      "epoch": 0.8379793884102045,
+      "grad_norm": 0.5716709494590759,
+      "learning_rate": 0.0002378730147472371,
+      "loss": 4.581511306762695,
+      "step": 4960
+    },
+    {
+      "epoch": 0.8396688629836121,
+      "grad_norm": 0.5052880644798279,
+      "learning_rate": 0.00023748436714774294,
+      "loss": 4.649203491210938,
+      "step": 4970
+    },
+    {
+      "epoch": 0.8413583375570197,
+      "grad_norm": 0.512668788433075,
+      "learning_rate": 0.00023709482744279492,
+      "loss": 4.621175765991211,
+      "step": 4980
+    },
+    {
+      "epoch": 0.8430478121304275,
+      "grad_norm": 0.5231815576553345,
+      "learning_rate": 0.00023670439960465128,
+      "loss": 4.606881713867187,
+      "step": 4990
+    },
+    {
+      "epoch": 0.8447372867038351,
+      "grad_norm": 0.5233691930770874,
+      "learning_rate": 0.00023631308761462677,
+      "loss": 4.614410018920898,
+      "step": 5000
+    },
+    {
+      "epoch": 0.8447372867038351,
+      "eval_loss": 4.584611415863037,
+      "eval_runtime": 3.6357,
+      "eval_samples_per_second": 275.05,
+      "eval_steps_per_second": 5.776,
+      "step": 5000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.6722841042944e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null