Training in progress, step 58000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:972aa91ec388a1f2f04b57475bbe0ef1d7a488751339adb89aa78c0871d0f22b
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:55010ca37211cc6b640c88e9f40807107bec277ebcc5b0b118f1cea15eed44f5
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f588ba0d0b39a0c0daf2cb6afacca8a7aef1f4bc72fe4409ce0b2281d2e356a
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc364080893bb423d47b8bfaac6a84d534e79aa0580cf54e20a609c7ac276c5b
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93c5029373839975c8e2ce486239c3c93c8bcc84856a9726f25e6b39e80d4bdb
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:8c37c5923f3d68f847ed300ddb34aea7ac5e2328c7df69f2be7f755bc9e45036
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba86940b99fa7512a6bd263e7bdaf7ba94fc8e695324bdfda4c03882f64aa78d
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:b439560e2350d72b3dd331a1a8b64962c6b47e1a1078857e970f6226f8e52122
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cf8627c515e0a9fd4095a16f3cf6f960eebbddd06bd5667ffafe332a0150e802
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:d72e4bdd3428ede798be981d61f831c294e4c1f306f292cd3880bbf3dd42566d
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:55937ecae83bb1b9ebb2721682f64ea1aca1aefba9e61d245b7d516977f878f9
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:13a06ce0cb98db3e26ada8fd779ab287dc006dddd3604ca6c762fd20a85c4365
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:039c09879ba9a48ef7918776fd751a67234de8e6a37518ae707982e7427ed8c9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:703b87170b0696f3b2a83c775117cb6a49f63bf8b6fd7a85d19b5f6decf028d6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.08443493769590386,
   "eval_steps": 500,
-  "global_step": 57000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -19958,6 +19958,356 @@
       "learning_rate": 0.00048604775555037517,
       "loss": 16.9712,
       "step": 57000
     }
   ],
   "logging_steps": 20,
@@ -19977,7 +20327,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.190665024641329e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.08591625239232323,
   "eval_steps": 500,
+  "global_step": 58000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00048604775555037517,
       "loss": 16.9712,
       "step": 57000
+    },
+    {
+      "epoch": 0.08446456398983225,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.00048604281661536756,
+      "loss": 17.056,
+      "step": 57020
+    },
+    {
+      "epoch": 0.08449419028376064,
+      "grad_norm": 7.4375,
+      "learning_rate": 0.00048603787768036007,
+      "loss": 17.0091,
+      "step": 57040
+    },
+    {
+      "epoch": 0.08452381657768904,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.00048603293874535246,
+      "loss": 16.9849,
+      "step": 57060
+    },
+    {
+      "epoch": 0.08455344287161742,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.0004860279998103449,
+      "loss": 17.0407,
+      "step": 57080
+    },
+    {
+      "epoch": 0.08458306916554581,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.0004860230608753373,
+      "loss": 17.0114,
+      "step": 57100
+    },
+    {
+      "epoch": 0.0846126954594742,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004860181219403298,
+      "loss": 17.0502,
+      "step": 57120
+    },
+    {
+      "epoch": 0.08464232175340258,
+      "grad_norm": 7.4375,
+      "learning_rate": 0.0004860131830053222,
+      "loss": 17.044,
+      "step": 57140
+    },
+    {
+      "epoch": 0.08467194804733097,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00048600824407031464,
+      "loss": 17.0582,
+      "step": 57160
+    },
+    {
+      "epoch": 0.08470157434125936,
+      "grad_norm": 7.59375,
+      "learning_rate": 0.00048600330513530704,
+      "loss": 17.0316,
+      "step": 57180
+    },
+    {
+      "epoch": 0.08473120063518774,
+      "grad_norm": 7.25,
+      "learning_rate": 0.00048599836620029954,
+      "loss": 17.0576,
+      "step": 57200
+    },
+    {
+      "epoch": 0.08476082692911613,
+      "grad_norm": 7.6875,
+      "learning_rate": 0.00048599342726529193,
+      "loss": 17.0589,
+      "step": 57220
+    },
+    {
+      "epoch": 0.08479045322304452,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.0004859884883302844,
+      "loss": 16.9831,
+      "step": 57240
+    },
+    {
+      "epoch": 0.0848200795169729,
+      "grad_norm": 6.125,
+      "learning_rate": 0.0004859835493952768,
+      "loss": 16.9896,
+      "step": 57260
+    },
+    {
+      "epoch": 0.08484970581090129,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004859786104602693,
+      "loss": 16.9824,
+      "step": 57280
+    },
+    {
+      "epoch": 0.08487933210482967,
+      "grad_norm": 8.25,
+      "learning_rate": 0.00048597367152526167,
+      "loss": 16.9623,
+      "step": 57300
+    },
+    {
+      "epoch": 0.08490895839875806,
+      "grad_norm": 7.03125,
+      "learning_rate": 0.0004859687325902541,
+      "loss": 16.9834,
+      "step": 57320
+    },
+    {
+      "epoch": 0.08493858469268645,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.00048596379365524657,
+      "loss": 16.9729,
+      "step": 57340
+    },
+    {
+      "epoch": 0.08496821098661483,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.00048595885472023896,
+      "loss": 17.0528,
+      "step": 57360
+    },
+    {
+      "epoch": 0.08499783728054323,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.0004859539157852314,
+      "loss": 17.0257,
+      "step": 57380
+    },
+    {
+      "epoch": 0.08502746357447162,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.0004859489768502238,
+      "loss": 16.8887,
+      "step": 57400
+    },
+    {
+      "epoch": 0.08505708986840001,
+      "grad_norm": 7.4375,
+      "learning_rate": 0.0004859440379152163,
+      "loss": 17.0345,
+      "step": 57420
+    },
+    {
+      "epoch": 0.0850867161623284,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.0004859390989802087,
+      "loss": 16.9734,
+      "step": 57440
+    },
+    {
+      "epoch": 0.08511634245625678,
+      "grad_norm": 6.875,
+      "learning_rate": 0.00048593416004520114,
+      "loss": 17.0113,
+      "step": 57460
+    },
+    {
+      "epoch": 0.08514596875018517,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.00048592922111019354,
+      "loss": 16.9691,
+      "step": 57480
+    },
+    {
+      "epoch": 0.08517559504411355,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.00048592428217518604,
+      "loss": 17.0288,
+      "step": 57500
+    },
+    {
+      "epoch": 0.08520522133804194,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.00048591934324017843,
+      "loss": 17.0317,
+      "step": 57520
+    },
+    {
+      "epoch": 0.08523484763197033,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.0004859144043051709,
+      "loss": 17.0653,
+      "step": 57540
+    },
+    {
+      "epoch": 0.08526447392589871,
+      "grad_norm": 6.375,
+      "learning_rate": 0.0004859094653701633,
+      "loss": 17.0473,
+      "step": 57560
+    },
+    {
+      "epoch": 0.0852941002198271,
+      "grad_norm": 7.375,
+      "learning_rate": 0.0004859045264351558,
+      "loss": 16.9975,
+      "step": 57580
+    },
+    {
+      "epoch": 0.08532372651375549,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.00048589958750014817,
+      "loss": 16.9854,
+      "step": 57600
+    },
+    {
+      "epoch": 0.08535335280768387,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.0004858946485651406,
+      "loss": 16.9975,
+      "step": 57620
+    },
+    {
+      "epoch": 0.08538297910161226,
+      "grad_norm": 6.625,
+      "learning_rate": 0.00048588970963013307,
+      "loss": 17.0178,
+      "step": 57640
+    },
+    {
+      "epoch": 0.08541260539554064,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.0004858847706951255,
+      "loss": 16.9128,
+      "step": 57660
+    },
+    {
+      "epoch": 0.08544223168946903,
+      "grad_norm": 7.5,
+      "learning_rate": 0.0004858798317601179,
+      "loss": 17.0329,
+      "step": 57680
+    },
+    {
+      "epoch": 0.08547185798339743,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.0004858748928251103,
+      "loss": 17.0416,
+      "step": 57700
+    },
+    {
+      "epoch": 0.08550148427732582,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.0004858699538901028,
+      "loss": 17.0423,
+      "step": 57720
+    },
+    {
+      "epoch": 0.0855311105712542,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0004858650149550952,
+      "loss": 16.9665,
+      "step": 57740
+    },
+    {
+      "epoch": 0.08556073686518259,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.00048586007602008765,
+      "loss": 16.9616,
+      "step": 57760
+    },
+    {
+      "epoch": 0.08559036315911098,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.00048585513708508004,
+      "loss": 16.9745,
+      "step": 57780
+    },
+    {
+      "epoch": 0.08561998945303936,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.00048585019815007254,
+      "loss": 16.9302,
+      "step": 57800
+    },
+    {
+      "epoch": 0.08564961574696775,
+      "grad_norm": 7.125,
+      "learning_rate": 0.00048584525921506493,
+      "loss": 16.9713,
+      "step": 57820
+    },
+    {
+      "epoch": 0.08567924204089614,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004858403202800574,
+      "loss": 16.9939,
+      "step": 57840
+    },
+    {
+      "epoch": 0.08570886833482452,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.0004858353813450498,
+      "loss": 16.9307,
+      "step": 57860
+    },
+    {
+      "epoch": 0.08573849462875291,
+      "grad_norm": 7.125,
+      "learning_rate": 0.0004858304424100423,
+      "loss": 17.044,
+      "step": 57880
+    },
+    {
+      "epoch": 0.0857681209226813,
+      "grad_norm": 7.25,
+      "learning_rate": 0.00048582550347503467,
+      "loss": 17.0308,
+      "step": 57900
+    },
+    {
+      "epoch": 0.08579774721660968,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.0004858205645400271,
+      "loss": 17.0287,
+      "step": 57920
+    },
+    {
+      "epoch": 0.08582737351053807,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.00048581562560501957,
+      "loss": 17.018,
+      "step": 57940
+    },
+    {
+      "epoch": 0.08585699980446646,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.000485810686670012,
+      "loss": 16.9714,
+      "step": 57960
+    },
+    {
+      "epoch": 0.08588662609839484,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004858057477350044,
+      "loss": 16.9836,
+      "step": 57980
+    },
+    {
+      "epoch": 0.08591625239232323,
+      "grad_norm": 8.125,
+      "learning_rate": 0.00048580080879999686,
+      "loss": 16.9677,
+      "step": 58000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 4.264201071083966e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null