Training in progress, step 2000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4145daebf514960424426dde3ac18791eecc28556f940d2f1279e7831c514654
 size 448472762

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8c3c2489afee023fe1b2642034587a0bfba7f9b9702eb8b912fd843cd7d1a84
 size 448472762

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b75ae1e75120c33ab2fa9bf933b8449d15710df9b78f94fa73c4c0f186c09c47
 size 151589028

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5b2b3370508976fd8007ef90e4249ff2ce2f69eb4456cd0ada442a2a9748885
 size 151589028

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c45bff611205a48d357012be58601cad8d52180e6ba8ae7b9b1ca21b9d659d0
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:38749652e18bcce614c97ce9bdf3fcf3e27d562abcfb77e8d4ee8fef9ce033f9
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c19fdb0a523198817691be875750d0695dc0006f20c28aee74ce2c6f5e754fdb
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc995806a1d26375df1705636e27a9de242c2427fba3dd43fa585853178b2b24
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:748f82303db435db7a8ad88a622a99fc2b1c74bd84f8ed546b1f4733414b3ff4
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae93497bb2108e5e81c1355029bd994f1afe726f0900074266771d7a223f2a18
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3cd497bb715d7ddc369134434d881bc512ec6192975735d4c15cbdcb223196e4
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:36fbc7f6fa9e4cc3d70f89ca68cdad6c0f766ecec8e0660c99c73f616d503ca6
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41059b96bf7552199863826b1441616d21eed16456dd0c2a6456f0486fdeecde
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed4f0c7d27d3a2f37d3ba7b466a05d56a94bc7ee56305a4c3a09de3291b6daed
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.003162538675212549,
   "eval_steps": 500,
-  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1058,6 +1058,356 @@
       "learning_rate": 0.0004996488344396747,
       "loss": 3.1223,
       "step": 1500
     }
   ],
   "logging_steps": 10,
@@ -1077,7 +1427,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.9047061694880154e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.004216718233616732,
   "eval_steps": 500,
+  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004996488344396747,
       "loss": 3.1223,
       "step": 1500
+    },
+    {
+      "epoch": 0.0031836222663806325,
+      "grad_norm": 1.0859375,
+      "learning_rate": 0.0004996453192689008,
+      "loss": 3.1059,
+      "step": 1510
+    },
+    {
+      "epoch": 0.0032047058575487165,
+      "grad_norm": 0.8984375,
+      "learning_rate": 0.0004996418040981267,
+      "loss": 3.0938,
+      "step": 1520
+    },
+    {
+      "epoch": 0.0032257894487168,
+      "grad_norm": 0.96484375,
+      "learning_rate": 0.0004996382889273527,
+      "loss": 3.1251,
+      "step": 1530
+    },
+    {
+      "epoch": 0.0032468730398848835,
+      "grad_norm": 0.88671875,
+      "learning_rate": 0.0004996347737565787,
+      "loss": 3.106,
+      "step": 1540
+    },
+    {
+      "epoch": 0.003267956631052967,
+      "grad_norm": 0.9765625,
+      "learning_rate": 0.0004996312585858046,
+      "loss": 3.089,
+      "step": 1550
+    },
+    {
+      "epoch": 0.003289040222221051,
+      "grad_norm": 0.96875,
+      "learning_rate": 0.0004996277434150306,
+      "loss": 3.09,
+      "step": 1560
+    },
+    {
+      "epoch": 0.0033101238133891346,
+      "grad_norm": 0.98046875,
+      "learning_rate": 0.0004996242282442566,
+      "loss": 3.0891,
+      "step": 1570
+    },
+    {
+      "epoch": 0.003331207404557218,
+      "grad_norm": 0.86328125,
+      "learning_rate": 0.0004996207130734825,
+      "loss": 3.0794,
+      "step": 1580
+    },
+    {
+      "epoch": 0.003352290995725302,
+      "grad_norm": 0.921875,
+      "learning_rate": 0.0004996171979027086,
+      "loss": 3.0718,
+      "step": 1590
+    },
+    {
+      "epoch": 0.0033733745868933856,
+      "grad_norm": 1.0234375,
+      "learning_rate": 0.0004996136827319345,
+      "loss": 3.0706,
+      "step": 1600
+    },
+    {
+      "epoch": 0.003394458178061469,
+      "grad_norm": 1.046875,
+      "learning_rate": 0.0004996101675611604,
+      "loss": 3.053,
+      "step": 1610
+    },
+    {
+      "epoch": 0.0034155417692295527,
+      "grad_norm": 0.88671875,
+      "learning_rate": 0.0004996066523903865,
+      "loss": 3.0476,
+      "step": 1620
+    },
+    {
+      "epoch": 0.0034366253603976367,
+      "grad_norm": 0.8984375,
+      "learning_rate": 0.0004996031372196125,
+      "loss": 3.0537,
+      "step": 1630
+    },
+    {
+      "epoch": 0.00345770895156572,
+      "grad_norm": 0.90625,
+      "learning_rate": 0.0004995996220488384,
+      "loss": 3.0558,
+      "step": 1640
+    },
+    {
+      "epoch": 0.0034787925427338037,
+      "grad_norm": 0.8046875,
+      "learning_rate": 0.0004995961068780644,
+      "loss": 3.0366,
+      "step": 1650
+    },
+    {
+      "epoch": 0.0034998761339018877,
+      "grad_norm": 0.9140625,
+      "learning_rate": 0.0004995925917072904,
+      "loss": 3.0584,
+      "step": 1660
+    },
+    {
+      "epoch": 0.0035209597250699712,
+      "grad_norm": 0.90625,
+      "learning_rate": 0.0004995890765365163,
+      "loss": 3.0447,
+      "step": 1670
+    },
+    {
+      "epoch": 0.0035420433162380548,
+      "grad_norm": 0.8984375,
+      "learning_rate": 0.0004995855613657423,
+      "loss": 3.0423,
+      "step": 1680
+    },
+    {
+      "epoch": 0.0035631269074061383,
+      "grad_norm": 0.91015625,
+      "learning_rate": 0.0004995820461949682,
+      "loss": 3.0278,
+      "step": 1690
+    },
+    {
+      "epoch": 0.0035842104985742223,
+      "grad_norm": 0.8359375,
+      "learning_rate": 0.0004995785310241942,
+      "loss": 3.0404,
+      "step": 1700
+    },
+    {
+      "epoch": 0.003605294089742306,
+      "grad_norm": 0.93359375,
+      "learning_rate": 0.0004995750158534202,
+      "loss": 3.0173,
+      "step": 1710
+    },
+    {
+      "epoch": 0.0036263776809103893,
+      "grad_norm": 0.83203125,
+      "learning_rate": 0.0004995715006826461,
+      "loss": 3.0233,
+      "step": 1720
+    },
+    {
+      "epoch": 0.0036474612720784733,
+      "grad_norm": 0.80078125,
+      "learning_rate": 0.0004995679855118721,
+      "loss": 3.0179,
+      "step": 1730
+    },
+    {
+      "epoch": 0.003668544863246557,
+      "grad_norm": 0.9375,
+      "learning_rate": 0.0004995644703410982,
+      "loss": 3.0283,
+      "step": 1740
+    },
+    {
+      "epoch": 0.0036896284544146404,
+      "grad_norm": 0.8203125,
+      "learning_rate": 0.000499560955170324,
+      "loss": 3.0188,
+      "step": 1750
+    },
+    {
+      "epoch": 0.003710712045582724,
+      "grad_norm": 1.0625,
+      "learning_rate": 0.00049955743999955,
+      "loss": 2.9979,
+      "step": 1760
+    },
+    {
+      "epoch": 0.003731795636750808,
+      "grad_norm": 0.87890625,
+      "learning_rate": 0.0004995539248287761,
+      "loss": 2.9959,
+      "step": 1770
+    },
+    {
+      "epoch": 0.0037528792279188914,
+      "grad_norm": 0.85546875,
+      "learning_rate": 0.000499550409658002,
+      "loss": 2.9987,
+      "step": 1780
+    },
+    {
+      "epoch": 0.003773962819086975,
+      "grad_norm": 0.8828125,
+      "learning_rate": 0.000499546894487228,
+      "loss": 3.0055,
+      "step": 1790
+    },
+    {
+      "epoch": 0.003795046410255059,
+      "grad_norm": 0.7734375,
+      "learning_rate": 0.000499543379316454,
+      "loss": 2.984,
+      "step": 1800
+    },
+    {
+      "epoch": 0.0038161300014231425,
+      "grad_norm": 0.828125,
+      "learning_rate": 0.0004995398641456799,
+      "loss": 3.0139,
+      "step": 1810
+    },
+    {
+      "epoch": 0.003837213592591226,
+      "grad_norm": 0.73046875,
+      "learning_rate": 0.0004995363489749059,
+      "loss": 2.9974,
+      "step": 1820
+    },
+    {
+      "epoch": 0.0038582971837593095,
+      "grad_norm": 0.90234375,
+      "learning_rate": 0.0004995328338041319,
+      "loss": 2.9834,
+      "step": 1830
+    },
+    {
+      "epoch": 0.0038793807749273935,
+      "grad_norm": 0.82421875,
+      "learning_rate": 0.0004995293186333578,
+      "loss": 2.9901,
+      "step": 1840
+    },
+    {
+      "epoch": 0.003900464366095477,
+      "grad_norm": 1.03125,
+      "learning_rate": 0.0004995258034625838,
+      "loss": 2.9712,
+      "step": 1850
+    },
+    {
+      "epoch": 0.003921547957263561,
+      "grad_norm": 0.84375,
+      "learning_rate": 0.0004995222882918098,
+      "loss": 2.9673,
+      "step": 1860
+    },
+    {
+      "epoch": 0.003942631548431644,
+      "grad_norm": 0.9140625,
+      "learning_rate": 0.0004995187731210357,
+      "loss": 2.9839,
+      "step": 1870
+    },
+    {
+      "epoch": 0.003963715139599728,
+      "grad_norm": 0.76171875,
+      "learning_rate": 0.0004995152579502618,
+      "loss": 2.9752,
+      "step": 1880
+    },
+    {
+      "epoch": 0.003984798730767812,
+      "grad_norm": 0.80078125,
+      "learning_rate": 0.0004995117427794878,
+      "loss": 2.9529,
+      "step": 1890
+    },
+    {
+      "epoch": 0.004005882321935895,
+      "grad_norm": 0.8125,
+      "learning_rate": 0.0004995082276087136,
+      "loss": 2.9564,
+      "step": 1900
+    },
+    {
+      "epoch": 0.004026965913103979,
+      "grad_norm": 1.109375,
+      "learning_rate": 0.0004995047124379397,
+      "loss": 2.965,
+      "step": 1910
+    },
+    {
+      "epoch": 0.004048049504272063,
+      "grad_norm": 0.94921875,
+      "learning_rate": 0.0004995011972671657,
+      "loss": 2.9564,
+      "step": 1920
+    },
+    {
+      "epoch": 0.004069133095440146,
+      "grad_norm": 0.78515625,
+      "learning_rate": 0.0004994976820963916,
+      "loss": 2.9654,
+      "step": 1930
+    },
+    {
+      "epoch": 0.00409021668660823,
+      "grad_norm": 0.875,
+      "learning_rate": 0.0004994941669256176,
+      "loss": 2.9514,
+      "step": 1940
+    },
+    {
+      "epoch": 0.004111300277776313,
+      "grad_norm": 0.76953125,
+      "learning_rate": 0.0004994906517548436,
+      "loss": 2.9288,
+      "step": 1950
+    },
+    {
+      "epoch": 0.004132383868944397,
+      "grad_norm": 1.015625,
+      "learning_rate": 0.0004994871365840695,
+      "loss": 2.9418,
+      "step": 1960
+    },
+    {
+      "epoch": 0.004153467460112481,
+      "grad_norm": 0.734375,
+      "learning_rate": 0.0004994836214132955,
+      "loss": 2.9364,
+      "step": 1970
+    },
+    {
+      "epoch": 0.004174551051280564,
+      "grad_norm": 0.78515625,
+      "learning_rate": 0.0004994801062425215,
+      "loss": 2.9262,
+      "step": 1980
+    },
+    {
+      "epoch": 0.004195634642448648,
+      "grad_norm": 0.81640625,
+      "learning_rate": 0.0004994765910717474,
+      "loss": 2.9372,
+      "step": 1990
+    },
+    {
+      "epoch": 0.004216718233616732,
+      "grad_norm": 0.98046875,
+      "learning_rate": 0.0004994730759009734,
+      "loss": 2.9266,
+      "step": 2000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 6.539445548728975e+17,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null