Training in progress, step 7000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:025bd3895676bb45f85e978709ee3100916e8b661a4acf118d66da1a34578c20
 size 487156538

 version https://git-lfs.github.com/spec/v1
+oid sha256:a619d173a81b959c06c6819e63784a3964cf704234614a53b41a95f8c4ce423b
 size 487156538

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9cf7ac773382c8d1aee6b04a9257745ebc6433c31bf32a35f2c28ca8a787ce38
 size 1059459406

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc07de972c404bd9d03e65d6a0a8bb8a57f33213d57e57b51c19d276698a2990
 size 1059459406

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:05cc36ea509ae3d9ed977bcbbb89394adc9cfa825eece71f5a8c15d91b056c25
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:fde34039d7b04934a891fddf8651f7147686cc194dd14ef9c544d9f194e3db54
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7625b80d9de0d6ccbc24d3a4c5243fa59067b74a09e5adcbf41abd0b3dc345bd
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5d095629c4afecfa399dffed86284dc4231689f617f0e254b3490299c477dd5
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9e54e106208b222d163a78aeaa2cd5bd8e56e84cd4e12d099c444853b53df5a7
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6404ff16418ff06858ba815c4899c94a4c015e7870eab3f1b01051d9d511b73
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3e76718220132de3d23a940da7a48146836d61db9316b76dbeaedf3227d328d
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:b933704c82ebaf750aa2519cd157aa39099844e58ed4ac2bed0623c91353a70d
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab2e7c5d1b7b91b754d6d981240bbb0789c029bfc4c8a94a02e7d3189581b5fd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cfcd8a09e8e46c589c8638cc20283a9b31e9d60ec45a6122361751489d45607f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.012674827305477964,
   "eval_steps": 500,
-  "global_step": 6500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4558,6 +4558,356 @@
       "learning_rate": 0.0004980497158287391,
       "loss": 17.1964,
       "step": 6500
     }
   ],
   "logging_steps": 10,
@@ -4577,7 +4927,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.4462433847435854e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.01364981402128396,
   "eval_steps": 500,
+  "global_step": 7000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004980497158287391,
       "loss": 17.1964,
       "step": 6500
+    },
+    {
+      "epoch": 0.012694327039794083,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.0004980464648132845,
+      "loss": 17.1415,
+      "step": 6510
+    },
+    {
+      "epoch": 0.012713826774110202,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004980432137978297,
+      "loss": 17.2665,
+      "step": 6520
+    },
+    {
+      "epoch": 0.012733326508426323,
+      "grad_norm": 8.875,
+      "learning_rate": 0.000498039962782375,
+      "loss": 17.1203,
+      "step": 6530
+    },
+    {
+      "epoch": 0.012752826242742443,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004980367117669203,
+      "loss": 17.1298,
+      "step": 6540
+    },
+    {
+      "epoch": 0.012772325977058562,
+      "grad_norm": 9.875,
+      "learning_rate": 0.0004980334607514657,
+      "loss": 17.1376,
+      "step": 6550
+    },
+    {
+      "epoch": 0.012791825711374683,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.000498030209736011,
+      "loss": 17.1568,
+      "step": 6560
+    },
+    {
+      "epoch": 0.012811325445690802,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.0004980269587205563,
+      "loss": 17.1515,
+      "step": 6570
+    },
+    {
+      "epoch": 0.012830825180006923,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.0004980237077051017,
+      "loss": 17.2512,
+      "step": 6580
+    },
+    {
+      "epoch": 0.012850324914323042,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.000498020456689647,
+      "loss": 17.1904,
+      "step": 6590
+    },
+    {
+      "epoch": 0.012869824648639162,
+      "grad_norm": 8.75,
+      "learning_rate": 0.0004980172056741923,
+      "loss": 17.1924,
+      "step": 6600
+    },
+    {
+      "epoch": 0.012889324382955283,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.0004980139546587376,
+      "loss": 17.179,
+      "step": 6610
+    },
+    {
+      "epoch": 0.012908824117271402,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.000498010703643283,
+      "loss": 17.1094,
+      "step": 6620
+    },
+    {
+      "epoch": 0.012928323851587523,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004980074526278283,
+      "loss": 17.2043,
+      "step": 6630
+    },
+    {
+      "epoch": 0.012947823585903642,
+      "grad_norm": 9.5,
+      "learning_rate": 0.0004980042016123736,
+      "loss": 17.3089,
+      "step": 6640
+    },
+    {
+      "epoch": 0.012967323320219762,
+      "grad_norm": 7.375,
+      "learning_rate": 0.000498000950596919,
+      "loss": 17.1986,
+      "step": 6650
+    },
+    {
+      "epoch": 0.012986823054535883,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.0004979976995814643,
+      "loss": 17.2439,
+      "step": 6660
+    },
+    {
+      "epoch": 0.013006322788852002,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004979944485660096,
+      "loss": 17.2103,
+      "step": 6670
+    },
+    {
+      "epoch": 0.013025822523168121,
+      "grad_norm": 7.4375,
+      "learning_rate": 0.0004979911975505549,
+      "loss": 17.1663,
+      "step": 6680
+    },
+    {
+      "epoch": 0.013045322257484242,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.0004979879465351003,
+      "loss": 17.2314,
+      "step": 6690
+    },
+    {
+      "epoch": 0.013064821991800361,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.0004979846955196456,
+      "loss": 17.1084,
+      "step": 6700
+    },
+    {
+      "epoch": 0.013084321726116482,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004979814445041909,
+      "loss": 17.2277,
+      "step": 6710
+    },
+    {
+      "epoch": 0.013103821460432602,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.0004979781934887363,
+      "loss": 17.2464,
+      "step": 6720
+    },
+    {
+      "epoch": 0.013123321194748721,
+      "grad_norm": 10.0625,
+      "learning_rate": 0.0004979749424732816,
+      "loss": 17.1748,
+      "step": 6730
+    },
+    {
+      "epoch": 0.013142820929064842,
+      "grad_norm": 10.5625,
+      "learning_rate": 0.0004979716914578268,
+      "loss": 17.3005,
+      "step": 6740
+    },
+    {
+      "epoch": 0.013162320663380961,
+      "grad_norm": 9.75,
+      "learning_rate": 0.0004979684404423721,
+      "loss": 17.2685,
+      "step": 6750
+    },
+    {
+      "epoch": 0.013181820397697082,
+      "grad_norm": 13.25,
+      "learning_rate": 0.0004979651894269175,
+      "loss": 17.1753,
+      "step": 6760
+    },
+    {
+      "epoch": 0.013201320132013201,
+      "grad_norm": 11.0,
+      "learning_rate": 0.0004979619384114628,
+      "loss": 17.1454,
+      "step": 6770
+    },
+    {
+      "epoch": 0.01322081986632932,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004979586873960081,
+      "loss": 17.0365,
+      "step": 6780
+    },
+    {
+      "epoch": 0.013240319600645442,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004979554363805534,
+      "loss": 17.2339,
+      "step": 6790
+    },
+    {
+      "epoch": 0.013259819334961561,
+      "grad_norm": 13.0625,
+      "learning_rate": 0.0004979521853650988,
+      "loss": 17.1252,
+      "step": 6800
+    },
+    {
+      "epoch": 0.01327931906927768,
+      "grad_norm": 7.53125,
+      "learning_rate": 0.0004979489343496441,
+      "loss": 17.1759,
+      "step": 6810
+    },
+    {
+      "epoch": 0.013298818803593801,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.0004979456833341894,
+      "loss": 17.1725,
+      "step": 6820
+    },
+    {
+      "epoch": 0.01331831853790992,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004979424323187348,
+      "loss": 17.1098,
+      "step": 6830
+    },
+    {
+      "epoch": 0.013337818272226042,
+      "grad_norm": 7.9375,
+      "learning_rate": 0.0004979391813032801,
+      "loss": 17.2499,
+      "step": 6840
+    },
+    {
+      "epoch": 0.013357318006542161,
+      "grad_norm": 10.75,
+      "learning_rate": 0.0004979359302878254,
+      "loss": 17.3004,
+      "step": 6850
+    },
+    {
+      "epoch": 0.01337681774085828,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.0004979326792723707,
+      "loss": 17.1902,
+      "step": 6860
+    },
+    {
+      "epoch": 0.013396317475174401,
+      "grad_norm": 7.65625,
+      "learning_rate": 0.0004979294282569161,
+      "loss": 17.0263,
+      "step": 6870
+    },
+    {
+      "epoch": 0.01341581720949052,
+      "grad_norm": 9.375,
+      "learning_rate": 0.0004979261772414614,
+      "loss": 17.2367,
+      "step": 6880
+    },
+    {
+      "epoch": 0.013435316943806641,
+      "grad_norm": 7.6875,
+      "learning_rate": 0.0004979229262260067,
+      "loss": 17.2496,
+      "step": 6890
+    },
+    {
+      "epoch": 0.01345481667812276,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004979196752105521,
+      "loss": 17.0661,
+      "step": 6900
+    },
+    {
+      "epoch": 0.01347431641243888,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004979164241950974,
+      "loss": 17.1662,
+      "step": 6910
+    },
+    {
+      "epoch": 0.013493816146755001,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.0004979131731796427,
+      "loss": 17.2432,
+      "step": 6920
+    },
+    {
+      "epoch": 0.01351331588107112,
+      "grad_norm": 9.5,
+      "learning_rate": 0.000497909922164188,
+      "loss": 17.2159,
+      "step": 6930
+    },
+    {
+      "epoch": 0.01353281561538724,
+      "grad_norm": 7.6875,
+      "learning_rate": 0.0004979066711487334,
+      "loss": 17.1567,
+      "step": 6940
+    },
+    {
+      "epoch": 0.01355231534970336,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004979034201332787,
+      "loss": 17.2054,
+      "step": 6950
+    },
+    {
+      "epoch": 0.01357181508401948,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.000497900169117824,
+      "loss": 17.1396,
+      "step": 6960
+    },
+    {
+      "epoch": 0.0135913148183356,
+      "grad_norm": 7.875,
+      "learning_rate": 0.0004978969181023694,
+      "loss": 17.2691,
+      "step": 6970
+    },
+    {
+      "epoch": 0.01361081455265172,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004978936670869146,
+      "loss": 17.1819,
+      "step": 6980
+    },
+    {
+      "epoch": 0.01363031428696784,
+      "grad_norm": 7.75,
+      "learning_rate": 0.0004978904160714599,
+      "loss": 17.2082,
+      "step": 6990
+    },
+    {
+      "epoch": 0.01364981402128396,
+      "grad_norm": 9.375,
+      "learning_rate": 0.0004978871650560052,
+      "loss": 17.2572,
+      "step": 7000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.557487423730588e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null