Training in progress, step 17000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:42301bc164cb007a8e9ffaaebd3b674826efaacc96f02799ea8c54ebdf5beff1
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e1f6084c2fd12874836176a807971d304a89f7ecfc63e2081a9bd54f224b13b
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e166c3997353d811bb7375dab7e17cf88064b52029e8056c729ba4ae8d2e8f22
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:729c0d767d06adf4295f1acf80d3c9a43aee84e3de6cc9a899725bd2d9ba998b
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8653c4f16bb3c4531444bd438e2a397c259c928e9f5a96f450fc3aa43ef0f5c
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:6775411b7c96ce112db0ff86dbc4c7f4f5876ba69512e78981d49611b5ed959e
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91185d0e7a47d1f7979000c680b3a146a800c2ff31f983b75b24ceb331884072
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c2e00f40f2b965358ee58725a6039af41eeb8a8f4527ae152ec5dad618307fd
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be0be34d9684d804e2f3030fceca4c7b93603e6596a44aaf270c97cb1740b1da
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:37ee15f1c9ceef9e456d1af53da3ed0fd0ec244051b974379f15c285ed42f8b7
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e539799e7e99b66c33c364546118319f901c9765aa17eaf7cf8b17906c00c95a
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e79d7f1dfea25dc4809dc0e5c220d70f3b690693b546131b59ad7f9ed9b129c
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ccc2a52ae0327def30cc40f7f273a4a1537961b9b580753fe57ec7ecdab69b35
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:84d957adbd57639a95ced1440a685d29db26c75001a9b3061d2f7af9b9a721b1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.031199574905791908,
   "eval_steps": 500,
-  "global_step": 16000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5608,6 +5608,356 @@
       "learning_rate": 0.0004949612511467957,
       "loss": 20.3333,
       "step": 16000
     }
   ],
   "logging_steps": 20,
@@ -5627,7 +5977,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.176271382718605e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.033149548337403904,
   "eval_steps": 500,
+  "global_step": 17000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004949612511467957,
       "loss": 20.3333,
       "step": 16000
+    },
+    {
+      "epoch": 0.03123857437442415,
+      "grad_norm": 11.4375,
+      "learning_rate": 0.0004949547491158863,
+      "loss": 20.2811,
+      "step": 16020
+    },
+    {
+      "epoch": 0.03127757384305639,
+      "grad_norm": 11.1875,
+      "learning_rate": 0.000494948247084977,
+      "loss": 20.367,
+      "step": 16040
+    },
+    {
+      "epoch": 0.03131657331168863,
+      "grad_norm": 11.75,
+      "learning_rate": 0.0004949417450540676,
+      "loss": 20.3134,
+      "step": 16060
+    },
+    {
+      "epoch": 0.03135557278032087,
+      "grad_norm": 10.25,
+      "learning_rate": 0.0004949352430231583,
+      "loss": 20.3922,
+      "step": 16080
+    },
+    {
+      "epoch": 0.03139457224895311,
+      "grad_norm": 10.375,
+      "learning_rate": 0.000494928740992249,
+      "loss": 20.3097,
+      "step": 16100
+    },
+    {
+      "epoch": 0.031433571717585346,
+      "grad_norm": 11.375,
+      "learning_rate": 0.0004949222389613396,
+      "loss": 20.3737,
+      "step": 16120
+    },
+    {
+      "epoch": 0.03147257118621759,
+      "grad_norm": 10.1875,
+      "learning_rate": 0.0004949157369304303,
+      "loss": 20.3886,
+      "step": 16140
+    },
+    {
+      "epoch": 0.03151157065484983,
+      "grad_norm": 11.1875,
+      "learning_rate": 0.0004949092348995209,
+      "loss": 20.2403,
+      "step": 16160
+    },
+    {
+      "epoch": 0.031550570123482065,
+      "grad_norm": 11.625,
+      "learning_rate": 0.0004949027328686116,
+      "loss": 20.3402,
+      "step": 16180
+    },
+    {
+      "epoch": 0.03158956959211431,
+      "grad_norm": 14.6875,
+      "learning_rate": 0.0004948962308377022,
+      "loss": 20.3529,
+      "step": 16200
+    },
+    {
+      "epoch": 0.03162856906074655,
+      "grad_norm": 11.5,
+      "learning_rate": 0.0004948897288067928,
+      "loss": 20.2767,
+      "step": 16220
+    },
+    {
+      "epoch": 0.031667568529378784,
+      "grad_norm": 9.6875,
+      "learning_rate": 0.0004948832267758834,
+      "loss": 20.2271,
+      "step": 16240
+    },
+    {
+      "epoch": 0.031706567998011026,
+      "grad_norm": 11.25,
+      "learning_rate": 0.0004948767247449741,
+      "loss": 20.3672,
+      "step": 16260
+    },
+    {
+      "epoch": 0.03174556746664327,
+      "grad_norm": 11.8125,
+      "learning_rate": 0.0004948702227140648,
+      "loss": 20.3693,
+      "step": 16280
+    },
+    {
+      "epoch": 0.03178456693527551,
+      "grad_norm": 13.6875,
+      "learning_rate": 0.0004948637206831554,
+      "loss": 20.2767,
+      "step": 16300
+    },
+    {
+      "epoch": 0.031823566403907745,
+      "grad_norm": 11.25,
+      "learning_rate": 0.0004948572186522461,
+      "loss": 20.2559,
+      "step": 16320
+    },
+    {
+      "epoch": 0.03186256587253999,
+      "grad_norm": 12.1875,
+      "learning_rate": 0.0004948507166213367,
+      "loss": 20.2962,
+      "step": 16340
+    },
+    {
+      "epoch": 0.03190156534117223,
+      "grad_norm": 11.4375,
+      "learning_rate": 0.0004948442145904274,
+      "loss": 20.2648,
+      "step": 16360
+    },
+    {
+      "epoch": 0.031940564809804464,
+      "grad_norm": 10.9375,
+      "learning_rate": 0.0004948377125595179,
+      "loss": 20.2703,
+      "step": 16380
+    },
+    {
+      "epoch": 0.031979564278436706,
+      "grad_norm": 12.1875,
+      "learning_rate": 0.0004948312105286086,
+      "loss": 20.3281,
+      "step": 16400
+    },
+    {
+      "epoch": 0.03201856374706895,
+      "grad_norm": 12.1875,
+      "learning_rate": 0.0004948247084976992,
+      "loss": 20.2317,
+      "step": 16420
+    },
+    {
+      "epoch": 0.03205756321570118,
+      "grad_norm": 11.375,
+      "learning_rate": 0.0004948182064667899,
+      "loss": 20.2883,
+      "step": 16440
+    },
+    {
+      "epoch": 0.032096562684333425,
+      "grad_norm": 12.8125,
+      "learning_rate": 0.0004948117044358806,
+      "loss": 20.2294,
+      "step": 16460
+    },
+    {
+      "epoch": 0.03213556215296567,
+      "grad_norm": 12.5625,
+      "learning_rate": 0.0004948052024049712,
+      "loss": 20.1226,
+      "step": 16480
+    },
+    {
+      "epoch": 0.0321745616215979,
+      "grad_norm": 11.375,
+      "learning_rate": 0.0004947987003740619,
+      "loss": 20.2422,
+      "step": 16500
+    },
+    {
+      "epoch": 0.032213561090230144,
+      "grad_norm": 11.375,
+      "learning_rate": 0.0004947921983431524,
+      "loss": 20.2142,
+      "step": 16520
+    },
+    {
+      "epoch": 0.032252560558862386,
+      "grad_norm": 12.5,
+      "learning_rate": 0.0004947856963122431,
+      "loss": 20.2658,
+      "step": 16540
+    },
+    {
+      "epoch": 0.03229156002749463,
+      "grad_norm": 9.8125,
+      "learning_rate": 0.0004947791942813337,
+      "loss": 20.1552,
+      "step": 16560
+    },
+    {
+      "epoch": 0.032330559496126864,
+      "grad_norm": 10.0625,
+      "learning_rate": 0.0004947726922504244,
+      "loss": 20.1369,
+      "step": 16580
+    },
+    {
+      "epoch": 0.032369558964759106,
+      "grad_norm": 11.375,
+      "learning_rate": 0.000494766190219515,
+      "loss": 20.1965,
+      "step": 16600
+    },
+    {
+      "epoch": 0.03240855843339135,
+      "grad_norm": 10.875,
+      "learning_rate": 0.0004947596881886057,
+      "loss": 20.2377,
+      "step": 16620
+    },
+    {
+      "epoch": 0.03244755790202358,
+      "grad_norm": 10.375,
+      "learning_rate": 0.0004947531861576964,
+      "loss": 20.2204,
+      "step": 16640
+    },
+    {
+      "epoch": 0.032486557370655825,
+      "grad_norm": 11.375,
+      "learning_rate": 0.000494746684126787,
+      "loss": 20.1081,
+      "step": 16660
+    },
+    {
+      "epoch": 0.03252555683928807,
+      "grad_norm": 11.1875,
+      "learning_rate": 0.0004947401820958777,
+      "loss": 20.3024,
+      "step": 16680
+    },
+    {
+      "epoch": 0.0325645563079203,
+      "grad_norm": 11.3125,
+      "learning_rate": 0.0004947336800649683,
+      "loss": 20.1351,
+      "step": 16700
+    },
+    {
+      "epoch": 0.032603555776552544,
+      "grad_norm": 11.1875,
+      "learning_rate": 0.0004947271780340589,
+      "loss": 20.1989,
+      "step": 16720
+    },
+    {
+      "epoch": 0.032642555245184786,
+      "grad_norm": 9.6875,
+      "learning_rate": 0.0004947206760031495,
+      "loss": 20.1502,
+      "step": 16740
+    },
+    {
+      "epoch": 0.03268155471381702,
+      "grad_norm": 11.125,
+      "learning_rate": 0.0004947141739722402,
+      "loss": 20.0948,
+      "step": 16760
+    },
+    {
+      "epoch": 0.03272055418244926,
+      "grad_norm": 11.4375,
+      "learning_rate": 0.0004947076719413309,
+      "loss": 20.1084,
+      "step": 16780
+    },
+    {
+      "epoch": 0.032759553651081505,
+      "grad_norm": 10.3125,
+      "learning_rate": 0.0004947011699104215,
+      "loss": 20.1207,
+      "step": 16800
+    },
+    {
+      "epoch": 0.03279855311971375,
+      "grad_norm": 11.1875,
+      "learning_rate": 0.0004946946678795122,
+      "loss": 20.0984,
+      "step": 16820
+    },
+    {
+      "epoch": 0.03283755258834598,
+      "grad_norm": 10.8125,
+      "learning_rate": 0.0004946881658486028,
+      "loss": 20.1778,
+      "step": 16840
+    },
+    {
+      "epoch": 0.032876552056978224,
+      "grad_norm": 10.8125,
+      "learning_rate": 0.0004946816638176935,
+      "loss": 20.2415,
+      "step": 16860
+    },
+    {
+      "epoch": 0.032915551525610466,
+      "grad_norm": 10.25,
+      "learning_rate": 0.0004946751617867841,
+      "loss": 20.1135,
+      "step": 16880
+    },
+    {
+      "epoch": 0.0329545509942427,
+      "grad_norm": 10.875,
+      "learning_rate": 0.0004946686597558748,
+      "loss": 20.1361,
+      "step": 16900
+    },
+    {
+      "epoch": 0.03299355046287494,
+      "grad_norm": 11.4375,
+      "learning_rate": 0.0004946621577249655,
+      "loss": 20.0907,
+      "step": 16920
+    },
+    {
+      "epoch": 0.033032549931507185,
+      "grad_norm": 14.5,
+      "learning_rate": 0.0004946556556940561,
+      "loss": 20.1267,
+      "step": 16940
+    },
+    {
+      "epoch": 0.03307154940013942,
+      "grad_norm": 12.875,
+      "learning_rate": 0.0004946491536631467,
+      "loss": 20.0818,
+      "step": 16960
+    },
+    {
+      "epoch": 0.03311054886877166,
+      "grad_norm": 10.875,
+      "learning_rate": 0.0004946426516322373,
+      "loss": 20.1085,
+      "step": 16980
+    },
+    {
+      "epoch": 0.033149548337403904,
+      "grad_norm": 10.5625,
+      "learning_rate": 0.000494636149601328,
+      "loss": 20.0712,
+      "step": 17000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 1.2497927616331776e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null