Training in progress, step 71000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a07616faa045066ec0c6bc3a39f81fd70daf70607d96d0e781dd6c34dcb93bac
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:757633efe84a53c5ec97a90a7f4675f908dbeafb070171c08276f4ceae89bf82
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9a037d3fe4119e06bc9061da9083521b60e6c6995c76fa79acf6ccf4d47db48
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c319382408e536debfaba9985144c2b85aedc267f1adb41fa2fcd682a710d69
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce52a0fe6f5e9f18a4b09c87839e49a605cf4ac6c8b60ff37506c33748b93356
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:b55180ad5c333f626bc6ef839beda747e8f0633fdb8a2329d1af0642155fcad0
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3330dae2d90b27ab31e1b3a875fe5cc81976ac373bbcaffe36a7fd41e6b0b4f7
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:6fb9e669a1e66d6084675ac17f9361f1d66f6538870dda5d62bb9fedf0717021
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c261298b41c9253298dd0b7fe9d0a70e7ec2c12dc3d995355a08c0fc31994d03
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:93c86e46203b6a91184b0093d776c5c5cbb5568a55f409f62928f5b11605d793
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1a0b78acdb08786028f54a8bd9831bbf3ab754e9fc46f50ba80121f99f2998b
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:57649dd5fae41007b8326ad8bceda3664e8263c16462c398827f7c60518777a9
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a426a703a44cb3e7bbfa24a198521fd4285b0c71838a0067a8a121cdfc2dd80
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:065fc078fd1aeeb645695c18fb1eff98c533b26302779a57f06b17d1e0565e6a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.10369202874935562,
   "eval_steps": 500,
-  "global_step": 70000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -24508,6 +24508,356 @@
       "learning_rate": 0.000482837447795457,
       "loss": 16.578,
       "step": 70000
     }
   ],
   "logging_steps": 20,
@@ -24527,7 +24877,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.146634442721408e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.10517334344577499,
   "eval_steps": 500,
+  "global_step": 71000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.000482837447795457,
       "loss": 16.578,
       "step": 70000
+    },
+    {
+      "epoch": 0.10372165504328401,
+      "grad_norm": 7.25,
+      "learning_rate": 0.0004828325088604494,
+      "loss": 16.562,
+      "step": 70020
+    },
+    {
+      "epoch": 0.1037512813372124,
+      "grad_norm": 6.625,
+      "learning_rate": 0.00048282756992544187,
+      "loss": 16.5095,
+      "step": 70040
+    },
+    {
+      "epoch": 0.10378090763114078,
+      "grad_norm": 6.09375,
+      "learning_rate": 0.00048282263099043427,
+      "loss": 16.5207,
+      "step": 70060
+    },
+    {
+      "epoch": 0.10381053392506917,
+      "grad_norm": 6.625,
+      "learning_rate": 0.0004828176920554267,
+      "loss": 16.5471,
+      "step": 70080
+    },
+    {
+      "epoch": 0.10384016021899757,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00048281275312041916,
+      "loss": 16.524,
+      "step": 70100
+    },
+    {
+      "epoch": 0.10386978651292596,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.0004828078141854116,
+      "loss": 16.4794,
+      "step": 70120
+    },
+    {
+      "epoch": 0.10389941280685434,
+      "grad_norm": 6.75,
+      "learning_rate": 0.000482802875250404,
+      "loss": 16.5229,
+      "step": 70140
+    },
+    {
+      "epoch": 0.10392903910078273,
+      "grad_norm": 6.125,
+      "learning_rate": 0.00048279793631539645,
+      "loss": 16.5296,
+      "step": 70160
+    },
+    {
+      "epoch": 0.10395866539471112,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004827929973803889,
+      "loss": 16.5783,
+      "step": 70180
+    },
+    {
+      "epoch": 0.1039882916886395,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004827880584453813,
+      "loss": 16.5537,
+      "step": 70200
+    },
+    {
+      "epoch": 0.10401791798256789,
+      "grad_norm": 6.75,
+      "learning_rate": 0.00048278311951037374,
+      "loss": 16.5815,
+      "step": 70220
+    },
+    {
+      "epoch": 0.10404754427649628,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.0004827781805753662,
+      "loss": 16.5732,
+      "step": 70240
+    },
+    {
+      "epoch": 0.10407717057042466,
+      "grad_norm": 6.5,
+      "learning_rate": 0.00048277324164035864,
+      "loss": 16.5605,
+      "step": 70260
+    },
+    {
+      "epoch": 0.10410679686435305,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.00048276830270535103,
+      "loss": 16.5224,
+      "step": 70280
+    },
+    {
+      "epoch": 0.10413642315828144,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.0004827633637703435,
+      "loss": 16.5452,
+      "step": 70300
+    },
+    {
+      "epoch": 0.10416604945220982,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.0004827584248353359,
+      "loss": 16.5098,
+      "step": 70320
+    },
+    {
+      "epoch": 0.10419567574613821,
+      "grad_norm": 6.25,
+      "learning_rate": 0.0004827534859003284,
+      "loss": 16.5676,
+      "step": 70340
+    },
+    {
+      "epoch": 0.1042253020400666,
+      "grad_norm": 6.875,
+      "learning_rate": 0.00048274854696532077,
+      "loss": 16.4514,
+      "step": 70360
+    },
+    {
+      "epoch": 0.10425492833399498,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004827436080303132,
+      "loss": 16.5532,
+      "step": 70380
+    },
+    {
+      "epoch": 0.10428455462792338,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.00048273866909530566,
+      "loss": 16.5367,
+      "step": 70400
+    },
+    {
+      "epoch": 0.10431418092185177,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.0004827337301602981,
+      "loss": 16.5784,
+      "step": 70420
+    },
+    {
+      "epoch": 0.10434380721578015,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004827287912252905,
+      "loss": 16.5085,
+      "step": 70440
+    },
+    {
+      "epoch": 0.10437343350970854,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.00048272385229028295,
+      "loss": 16.5486,
+      "step": 70460
+    },
+    {
+      "epoch": 0.10440305980363693,
+      "grad_norm": 5.75,
+      "learning_rate": 0.0004827189133552754,
+      "loss": 16.4874,
+      "step": 70480
+    },
+    {
+      "epoch": 0.10443268609756531,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.00048271397442026785,
+      "loss": 16.4957,
+      "step": 70500
+    },
+    {
+      "epoch": 0.1044623123914937,
+      "grad_norm": 6.75,
+      "learning_rate": 0.00048270903548526024,
+      "loss": 16.5765,
+      "step": 70520
+    },
+    {
+      "epoch": 0.10449193868542209,
+      "grad_norm": 8.125,
+      "learning_rate": 0.00048270409655025263,
+      "loss": 16.5136,
+      "step": 70540
+    },
+    {
+      "epoch": 0.10452156497935047,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.00048269915761524514,
+      "loss": 16.5299,
+      "step": 70560
+    },
+    {
+      "epoch": 0.10455119127327886,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00048269421868023753,
+      "loss": 16.4576,
+      "step": 70580
+    },
+    {
+      "epoch": 0.10458081756720725,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.00048268927974523,
+      "loss": 16.4926,
+      "step": 70600
+    },
+    {
+      "epoch": 0.10461044386113563,
+      "grad_norm": 6.75,
+      "learning_rate": 0.0004826843408102224,
+      "loss": 16.4703,
+      "step": 70620
+    },
+    {
+      "epoch": 0.10464007015506402,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.0004826794018752149,
+      "loss": 16.5149,
+      "step": 70640
+    },
+    {
+      "epoch": 0.1046696964489924,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.00048267446294020727,
+      "loss": 16.4792,
+      "step": 70660
+    },
+    {
+      "epoch": 0.10469932274292079,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004826695240051997,
+      "loss": 16.5295,
+      "step": 70680
+    },
+    {
+      "epoch": 0.10472894903684918,
+      "grad_norm": 8.125,
+      "learning_rate": 0.00048266458507019216,
+      "loss": 16.5405,
+      "step": 70700
+    },
+    {
+      "epoch": 0.10475857533077758,
+      "grad_norm": 7.90625,
+      "learning_rate": 0.0004826596461351846,
+      "loss": 16.543,
+      "step": 70720
+    },
+    {
+      "epoch": 0.10478820162470596,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.000482654707200177,
+      "loss": 16.5194,
+      "step": 70740
+    },
+    {
+      "epoch": 0.10481782791863435,
+      "grad_norm": 6.75,
+      "learning_rate": 0.00048264976826516945,
+      "loss": 16.5217,
+      "step": 70760
+    },
+    {
+      "epoch": 0.10484745421256274,
+      "grad_norm": 6.09375,
+      "learning_rate": 0.0004826448293301619,
+      "loss": 16.5735,
+      "step": 70780
+    },
+    {
+      "epoch": 0.10487708050649112,
+      "grad_norm": 6.125,
+      "learning_rate": 0.00048263989039515435,
+      "loss": 16.4886,
+      "step": 70800
+    },
+    {
+      "epoch": 0.10490670680041951,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.00048263495146014674,
+      "loss": 16.4874,
+      "step": 70820
+    },
+    {
+      "epoch": 0.1049363330943479,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.00048263001252513913,
+      "loss": 16.5424,
+      "step": 70840
+    },
+    {
+      "epoch": 0.10496595938827628,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.00048262507359013164,
+      "loss": 16.5108,
+      "step": 70860
+    },
+    {
+      "epoch": 0.10499558568220467,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.00048262013465512403,
+      "loss": 16.4778,
+      "step": 70880
+    },
+    {
+      "epoch": 0.10502521197613306,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.0004826151957201165,
+      "loss": 16.4908,
+      "step": 70900
+    },
+    {
+      "epoch": 0.10505483827006144,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004826102567851089,
+      "loss": 16.4564,
+      "step": 70920
+    },
+    {
+      "epoch": 0.10508446456398983,
+      "grad_norm": 6.625,
+      "learning_rate": 0.0004826053178501014,
+      "loss": 16.5258,
+      "step": 70940
+    },
+    {
+      "epoch": 0.10511409085791822,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.00048260037891509377,
+      "loss": 16.5403,
+      "step": 70960
+    },
+    {
+      "epoch": 0.1051437171518466,
+      "grad_norm": 7.375,
+      "learning_rate": 0.0004825954399800862,
+      "loss": 16.575,
+      "step": 70980
+    },
+    {
+      "epoch": 0.10517334344577499,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.00048259050104507866,
+      "loss": 16.5599,
+      "step": 71000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 5.220171364156257e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null