Training in progress, step 80000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c756f34d6447d0a12191f2f228e5fca6325d2585555740b00f437d7c3e7004bb
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:25eece902c4fc10f4ee2062692a9aedbe51bd7b7d97a5b7d579b674f96892276
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e5534657658835263ae3eba8ff79c99616131e773d428fcce31ad61af86046b
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d7f1d706e6cfbd7062b526c5f96351aba28490563e89cc3572dbd70ff071d52
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9cd212ee37c184ab654cd81424bd96e3d051626e53abebd61cf8f11452e1283
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:3762fb83fd702043dec9c363ac412c392bf99ebaba36635b7ce08abde68594fe
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ec46da082cb75a7e3753cfb221c9d642f2a32f3a83b3b478de73eadc477388c
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:62c6e6a2cde44218a43149d5222369dc44b7c914b2ad856e2e09dfb4dca020fb
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a45666dfa471b8966a5f388cff9679b0f97f0a453b0c8aca6fb55a560f78c7c
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:fb4157f68b08406d6bc17d2638ab784f508ffb332e537043a8486d779d68898e
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba2d0810d74e907463868f0c583c57c89e9a4bad46de26ffd127e43e9b609736
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:70fc5f5dac53b26b2e075af1f8abf3943ab8de6a2ae6129d92b62d3aa9705082
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb0dc069b89d8c308dec795d21a2ac94397c5df6c87082b4999d15bf441c0a2e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d73d6a55f40d828827c6493d8d4e36859284046429b1cc4d61ff3be96f72f5ef
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.11702386101712993,
   "eval_steps": 500,
-  "global_step": 79000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -27658,6 +27658,356 @@
       "learning_rate": 0.00048061492704205204,
       "loss": 16.3266,
       "step": 79000
     }
   ],
   "logging_steps": 20,
@@ -27677,7 +28027,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.808462373236769e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.1185051757135493,
   "eval_steps": 500,
+  "global_step": 80000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00048061492704205204,
       "loss": 16.3266,
       "step": 79000
+    },
+    {
+      "epoch": 0.11705348731105832,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.00048060998810704454,
+      "loss": 16.2829,
+      "step": 79020
+    },
+    {
+      "epoch": 0.1170831136049867,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.00048060504917203694,
+      "loss": 16.2504,
+      "step": 79040
+    },
+    {
+      "epoch": 0.11711273989891509,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004806001102370294,
+      "loss": 16.2487,
+      "step": 79060
+    },
+    {
+      "epoch": 0.11714236619284348,
+      "grad_norm": 6.375,
+      "learning_rate": 0.0004805951713020218,
+      "loss": 16.3028,
+      "step": 79080
+    },
+    {
+      "epoch": 0.11717199248677186,
+      "grad_norm": 7.59375,
+      "learning_rate": 0.0004805902323670143,
+      "loss": 16.2891,
+      "step": 79100
+    },
+    {
+      "epoch": 0.11720161878070025,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.0004805852934320067,
+      "loss": 16.3023,
+      "step": 79120
+    },
+    {
+      "epoch": 0.11723124507462863,
+      "grad_norm": 8.875,
+      "learning_rate": 0.0004805803544969991,
+      "loss": 16.3369,
+      "step": 79140
+    },
+    {
+      "epoch": 0.11726087136855702,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004805754155619915,
+      "loss": 16.2611,
+      "step": 79160
+    },
+    {
+      "epoch": 0.11729049766248541,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.000480570476626984,
+      "loss": 16.3287,
+      "step": 79180
+    },
+    {
+      "epoch": 0.1173201239564138,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004805655376919764,
+      "loss": 16.322,
+      "step": 79200
+    },
+    {
+      "epoch": 0.11734975025034218,
+      "grad_norm": 6.34375,
+      "learning_rate": 0.00048056059875696886,
+      "loss": 16.2184,
+      "step": 79220
+    },
+    {
+      "epoch": 0.11737937654427057,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.00048055565982196125,
+      "loss": 16.3263,
+      "step": 79240
+    },
+    {
+      "epoch": 0.11740900283819895,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.00048055072088695375,
+      "loss": 16.2757,
+      "step": 79260
+    },
+    {
+      "epoch": 0.11743862913212734,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.00048054578195194615,
+      "loss": 16.2582,
+      "step": 79280
+    },
+    {
+      "epoch": 0.11746825542605574,
+      "grad_norm": 7.0,
+      "learning_rate": 0.00048054084301693854,
+      "loss": 16.2983,
+      "step": 79300
+    },
+    {
+      "epoch": 0.11749788171998413,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.00048053590408193104,
+      "loss": 16.314,
+      "step": 79320
+    },
+    {
+      "epoch": 0.11752750801391251,
+      "grad_norm": 6.375,
+      "learning_rate": 0.00048053096514692344,
+      "loss": 16.2981,
+      "step": 79340
+    },
+    {
+      "epoch": 0.1175571343078409,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.0004805260262119159,
+      "loss": 16.2972,
+      "step": 79360
+    },
+    {
+      "epoch": 0.11758676060176929,
+      "grad_norm": 7.5,
+      "learning_rate": 0.0004805210872769083,
+      "loss": 16.2882,
+      "step": 79380
+    },
+    {
+      "epoch": 0.11761638689569767,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004805161483419008,
+      "loss": 16.2462,
+      "step": 79400
+    },
+    {
+      "epoch": 0.11764601318962606,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.0004805112094068932,
+      "loss": 16.2808,
+      "step": 79420
+    },
+    {
+      "epoch": 0.11767563948355445,
+      "grad_norm": 6.09375,
+      "learning_rate": 0.0004805062704718856,
+      "loss": 16.265,
+      "step": 79440
+    },
+    {
+      "epoch": 0.11770526577748283,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.000480501331536878,
+      "loss": 16.2757,
+      "step": 79460
+    },
+    {
+      "epoch": 0.11773489207141122,
+      "grad_norm": 6.09375,
+      "learning_rate": 0.0004804963926018705,
+      "loss": 16.3099,
+      "step": 79480
+    },
+    {
+      "epoch": 0.1177645183653396,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004804914536668629,
+      "loss": 16.2713,
+      "step": 79500
+    },
+    {
+      "epoch": 0.11779414465926799,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.00048048651473185536,
+      "loss": 16.2622,
+      "step": 79520
+    },
+    {
+      "epoch": 0.11782377095319638,
+      "grad_norm": 5.875,
+      "learning_rate": 0.00048048157579684775,
+      "loss": 16.2833,
+      "step": 79540
+    },
+    {
+      "epoch": 0.11785339724712476,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.00048047663686184025,
+      "loss": 16.2429,
+      "step": 79560
+    },
+    {
+      "epoch": 0.11788302354105315,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00048047169792683265,
+      "loss": 16.3167,
+      "step": 79580
+    },
+    {
+      "epoch": 0.11791264983498154,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004804667589918251,
+      "loss": 16.2256,
+      "step": 79600
+    },
+    {
+      "epoch": 0.11794227612890994,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.00048046182005681754,
+      "loss": 16.2095,
+      "step": 79620
+    },
+    {
+      "epoch": 0.11797190242283832,
+      "grad_norm": 7.5,
+      "learning_rate": 0.00048045688112180994,
+      "loss": 16.2748,
+      "step": 79640
+    },
+    {
+      "epoch": 0.11800152871676671,
+      "grad_norm": 6.5,
+      "learning_rate": 0.0004804519421868024,
+      "loss": 16.2733,
+      "step": 79660
+    },
+    {
+      "epoch": 0.1180311550106951,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.0004804470032517948,
+      "loss": 16.2384,
+      "step": 79680
+    },
+    {
+      "epoch": 0.11806078130462348,
+      "grad_norm": 6.03125,
+      "learning_rate": 0.0004804420643167873,
+      "loss": 16.2747,
+      "step": 79700
+    },
+    {
+      "epoch": 0.11809040759855187,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004804371253817797,
+      "loss": 16.2826,
+      "step": 79720
+    },
+    {
+      "epoch": 0.11812003389248026,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004804321864467721,
+      "loss": 16.2646,
+      "step": 79740
+    },
+    {
+      "epoch": 0.11814966018640864,
+      "grad_norm": 7.625,
+      "learning_rate": 0.0004804272475117645,
+      "loss": 16.2691,
+      "step": 79760
+    },
+    {
+      "epoch": 0.11817928648033703,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.000480422308576757,
+      "loss": 16.2096,
+      "step": 79780
+    },
+    {
+      "epoch": 0.11820891277426541,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.0004804173696417494,
+      "loss": 16.2607,
+      "step": 79800
+    },
+    {
+      "epoch": 0.1182385390681938,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.00048041243070674186,
+      "loss": 16.2072,
+      "step": 79820
+    },
+    {
+      "epoch": 0.11826816536212219,
+      "grad_norm": 5.96875,
+      "learning_rate": 0.00048040749177173425,
+      "loss": 16.2251,
+      "step": 79840
+    },
+    {
+      "epoch": 0.11829779165605057,
+      "grad_norm": 7.0,
+      "learning_rate": 0.00048040255283672675,
+      "loss": 16.2256,
+      "step": 79860
+    },
+    {
+      "epoch": 0.11832741794997896,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.00048039761390171915,
+      "loss": 16.3018,
+      "step": 79880
+    },
+    {
+      "epoch": 0.11835704424390735,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004803926749667116,
+      "loss": 16.273,
+      "step": 79900
+    },
+    {
+      "epoch": 0.11838667053783573,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.00048038773603170404,
+      "loss": 16.2277,
+      "step": 79920
+    },
+    {
+      "epoch": 0.11841629683176413,
+      "grad_norm": 6.34375,
+      "learning_rate": 0.0004803827970966965,
+      "loss": 16.2374,
+      "step": 79940
+    },
+    {
+      "epoch": 0.11844592312569252,
+      "grad_norm": 6.25,
+      "learning_rate": 0.0004803778581616889,
+      "loss": 16.2584,
+      "step": 79960
+    },
+    {
+      "epoch": 0.1184755494196209,
+      "grad_norm": 6.5,
+      "learning_rate": 0.0004803729192266813,
+      "loss": 16.2437,
+      "step": 79980
+    },
+    {
+      "epoch": 0.1185051757135493,
+      "grad_norm": 7.375,
+      "learning_rate": 0.0004803679802916738,
+      "loss": 16.2565,
+      "step": 80000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 5.881999294671618e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null