Training in progress, step 1794, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +277 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a01d5cb64f0c537a59a0cdfcf1bc3a560ff1761f3abd4cf499bed94982d2222b
 size 389074464

 version https://git-lfs.github.com/spec/v1
+oid sha256:442d270b50645e8920121f8bcb1e3642bfd469619b17f2c672139a95fd23af56
 size 389074464

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e7686aa5ab2d44d69c5f4408fe4db2b21f4858bcadc7a6fc0ce0e75707e1a34
 size 198016005

 version https://git-lfs.github.com/spec/v1
+oid sha256:186f1f434e7d3e152023d123ef21c94c4a0d6ce9718d70926a1817fd851c08fe
 size 198016005

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4a99307857adad062a03c20182e56acb1143f23b345fb9eaeaec2099e02e31a
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:4453f8a4437d3cd9972effeb19da458043df1075458e51d17d709e5b85678e59
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.3411371237458196,
   "eval_steps": 500,
-  "global_step": 1400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -988,6 +988,279 @@
       "learning_rate": 2.3107863556921237e-05,
       "loss": 3.587228012084961,
       "step": 1400
     }
   ],
   "logging_steps": 10,
@@ -1002,12 +1275,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.6052200791938826e+18,
   "train_batch_size": 42,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.0,
   "eval_steps": 500,
+  "global_step": 1794,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 2.3107863556921237e-05,
       "loss": 3.587228012084961,
       "step": 1400
+    },
+    {
+      "epoch": 2.3578595317725752,
+      "grad_norm": 0.21534579992294312,
+      "learning_rate": 2.199705098358459e-05,
+      "loss": 3.6128841400146485,
+      "step": 1410
+    },
+    {
+      "epoch": 2.374581939799331,
+      "grad_norm": 0.19469194114208221,
+      "learning_rate": 2.091029194607431e-05,
+      "loss": 3.612522506713867,
+      "step": 1420
+    },
+    {
+      "epoch": 2.391304347826087,
+      "grad_norm": 0.19594796001911163,
+      "learning_rate": 1.984792156502072e-05,
+      "loss": 3.612636184692383,
+      "step": 1430
+    },
+    {
+      "epoch": 2.408026755852843,
+      "grad_norm": 0.19416235387325287,
+      "learning_rate": 1.8810267440397246e-05,
+      "loss": 3.611737823486328,
+      "step": 1440
+    },
+    {
+      "epoch": 2.4247491638795986,
+      "grad_norm": 0.20568251609802246,
+      "learning_rate": 1.779764955049925e-05,
+      "loss": 3.6048515319824217,
+      "step": 1450
+    },
+    {
+      "epoch": 2.4414715719063547,
+      "grad_norm": 0.21181504428386688,
+      "learning_rate": 1.6810380153273362e-05,
+      "loss": 3.60155029296875,
+      "step": 1460
+    },
+    {
+      "epoch": 2.4581939799331103,
+      "grad_norm": 0.20696775615215302,
+      "learning_rate": 1.584876369002751e-05,
+      "loss": 3.623727035522461,
+      "step": 1470
+    },
+    {
+      "epoch": 2.4749163879598663,
+      "grad_norm": 0.20903262495994568,
+      "learning_rate": 1.4913096691551077e-05,
+      "loss": 3.623518371582031,
+      "step": 1480
+    },
+    {
+      "epoch": 2.491638795986622,
+      "grad_norm": 0.22404134273529053,
+      "learning_rate": 1.4003667686674793e-05,
+      "loss": 3.6152099609375,
+      "step": 1490
+    },
+    {
+      "epoch": 2.508361204013378,
+      "grad_norm": 0.2037034034729004,
+      "learning_rate": 1.3120757113297777e-05,
+      "loss": 3.612331771850586,
+      "step": 1500
+    },
+    {
+      "epoch": 2.5250836120401337,
+      "grad_norm": 0.21633440256118774,
+      "learning_rate": 1.226463723190987e-05,
+      "loss": 3.601060485839844,
+      "step": 1510
+    },
+    {
+      "epoch": 2.5418060200668897,
+      "grad_norm": 0.20832663774490356,
+      "learning_rate": 1.1435572041635489e-05,
+      "loss": 3.6438526153564452,
+      "step": 1520
+    },
+    {
+      "epoch": 2.5585284280936453,
+      "grad_norm": 0.21630828082561493,
+      "learning_rate": 1.0633817198824858e-05,
+      "loss": 3.6141563415527345,
+      "step": 1530
+    },
+    {
+      "epoch": 2.5752508361204014,
+      "grad_norm": 0.21327731013298035,
+      "learning_rate": 9.859619938218222e-06,
+      "loss": 3.5744644165039063,
+      "step": 1540
+    },
+    {
+      "epoch": 2.591973244147157,
+      "grad_norm": 0.20320357382297516,
+      "learning_rate": 9.113218996706651e-06,
+      "loss": 3.6112804412841797,
+      "step": 1550
+    },
+    {
+      "epoch": 2.608695652173913,
+      "grad_norm": 0.20723755657672882,
+      "learning_rate": 8.394844539713587e-06,
+      "loss": 3.6093166351318358,
+      "step": 1560
+    },
+    {
+      "epoch": 2.625418060200669,
+      "grad_norm": 0.20720350742340088,
+      "learning_rate": 7.704718090219299e-06,
+      "loss": 3.6185359954833984,
+      "step": 1570
+    },
+    {
+      "epoch": 2.6421404682274248,
+      "grad_norm": 0.20689421892166138,
+      "learning_rate": 7.043052460450595e-06,
+      "loss": 3.5807472229003907,
+      "step": 1580
+    },
+    {
+      "epoch": 2.6588628762541804,
+      "grad_norm": 0.1947193443775177,
+      "learning_rate": 6.410051686256524e-06,
+      "loss": 3.63294677734375,
+      "step": 1590
+    },
+    {
+      "epoch": 2.6755852842809364,
+      "grad_norm": 0.20829661190509796,
+      "learning_rate": 5.805910964190464e-06,
+      "loss": 3.5655914306640626,
+      "step": 1600
+    },
+    {
+      "epoch": 2.6923076923076925,
+      "grad_norm": 0.21496719121932983,
+      "learning_rate": 5.2308165913179e-06,
+      "loss": 3.6056419372558595,
+      "step": 1610
+    },
+    {
+      "epoch": 2.709030100334448,
+      "grad_norm": 0.21569029986858368,
+      "learning_rate": 4.684945907768623e-06,
+      "loss": 3.63220329284668,
+      "step": 1620
+    },
+    {
+      "epoch": 2.7257525083612038,
+      "grad_norm": 0.21249784529209137,
+      "learning_rate": 4.168467242050822e-06,
+      "loss": 3.6161312103271483,
+      "step": 1630
+    },
+    {
+      "epoch": 2.74247491638796,
+      "grad_norm": 0.19498836994171143,
+      "learning_rate": 3.6815398591441676e-06,
+      "loss": 3.6304805755615233,
+      "step": 1640
+    },
+    {
+      "epoch": 2.759197324414716,
+      "grad_norm": 0.19185300171375275,
+      "learning_rate": 3.224313911387755e-06,
+      "loss": 3.610300064086914,
+      "step": 1650
+    },
+    {
+      "epoch": 2.7759197324414715,
+      "grad_norm": 0.2097301483154297,
+      "learning_rate": 2.79693039217801e-06,
+      "loss": 3.6425819396972656,
+      "step": 1660
+    },
+    {
+      "epoch": 2.7926421404682276,
+      "grad_norm": 0.2074955701828003,
+      "learning_rate": 2.399521092491075e-06,
+      "loss": 3.5936614990234377,
+      "step": 1670
+    },
+    {
+      "epoch": 2.809364548494983,
+      "grad_norm": 0.20003236830234528,
+      "learning_rate": 2.032208560242732e-06,
+      "loss": 3.5973114013671874,
+      "step": 1680
+    },
+    {
+      "epoch": 2.8260869565217392,
+      "grad_norm": 0.20352588593959808,
+      "learning_rate": 1.695106062498708e-06,
+      "loss": 3.6302867889404298,
+      "step": 1690
+    },
+    {
+      "epoch": 2.842809364548495,
+      "grad_norm": 0.20454245805740356,
+      "learning_rate": 1.3883175505468693e-06,
+      "loss": 3.614506149291992,
+      "step": 1700
+    },
+    {
+      "epoch": 2.859531772575251,
+      "grad_norm": 0.20201674103736877,
+      "learning_rate": 1.11193762784203e-06,
+      "loss": 3.5982948303222657,
+      "step": 1710
+    },
+    {
+      "epoch": 2.8762541806020065,
+      "grad_norm": 0.20037053525447845,
+      "learning_rate": 8.660515208334108e-07,
+      "loss": 3.6015445709228517,
+      "step": 1720
+    },
+    {
+      "epoch": 2.8929765886287626,
+      "grad_norm": 0.21147583425045013,
+      "learning_rate": 6.507350526835709e-07,
+      "loss": 3.5722988128662108,
+      "step": 1730
+    },
+    {
+      "epoch": 2.9096989966555182,
+      "grad_norm": 0.20878112316131592,
+      "learning_rate": 4.6605461988707965e-07,
+      "loss": 3.6185012817382813,
+      "step": 1740
+    },
+    {
+      "epoch": 2.9264214046822743,
+      "grad_norm": 0.19872544705867767,
+      "learning_rate": 3.1206717179601554e-07,
+      "loss": 3.6195068359375,
+      "step": 1750
+    },
+    {
+      "epoch": 2.94314381270903,
+      "grad_norm": 0.21033529937267303,
+      "learning_rate": 1.8882019305866972e-07,
+      "loss": 3.6103542327880858,
+      "step": 1760
+    },
+    {
+      "epoch": 2.959866220735786,
+      "grad_norm": 0.20153765380382538,
+      "learning_rate": 9.635168897684787e-08,
+      "loss": 3.585107421875,
+      "step": 1770
+    },
+    {
+      "epoch": 2.976588628762542,
+      "grad_norm": 0.21875017881393433,
+      "learning_rate": 3.4690173786255945e-08,
+      "loss": 3.6015293121337892,
+      "step": 1780
+    },
+    {
+      "epoch": 2.9933110367892977,
+      "grad_norm": 0.2030608206987381,
+      "learning_rate": 3.8546618637225196e-09,
+      "loss": 3.6415565490722654,
+      "step": 1790
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.058402188770345e+18,
   "train_batch_size": 42,
   "trial_name": null,
   "trial_params": null