Training in progress, step 3564, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +130 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:16b6a4a294a1cc7668de259fbd48b70aa442eeff2115186962274aec99084529
 size 2950734544

 version https://git-lfs.github.com/spec/v1
+oid sha256:0713a48c012baea1fd0fe85458998ff2d3f30ea39f3a397f2c2c05a6c662d181
 size 2950734544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:055b521fe84aaa03f833218cef3a750a0edce67c730dd29402f719d25a5c68f7
 size 5357408

 version https://git-lfs.github.com/spec/v1
+oid sha256:d3b243449f31e422589ead2437362b30eedba441a1287618227cb835db3b1b87
 size 5357408

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:792acc32e247e4fae3661c67d51f5f956241a846a12afb588fa3e1e4cdaec8cc
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:29117602da5370738e1e6b1d075a348793d2f8f1bc194b48c87fa9e2c883da52
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ccef800644988b261b844e19a8cab522da84de3a3b5e2c5a0e38ca72aeb92c9
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb2fbc646b980ba4517f5e7af3567931794b062e20b71d2610c9a2b1554d1be1
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": 3200,
   "best_metric": 9.882343832146034e-09,
   "best_model_checkpoint": "./humanizer-ckpt/checkpoint-3200",
-  "epoch": 2.6938947368421053,
   "eval_steps": 400,
-  "global_step": 3200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1192,6 +1192,132 @@
       "eval_samples_per_second": 96.598,
       "eval_steps_per_second": 12.075,
       "step": 3200
     }
   ],
   "logging_steps": 20,
@@ -1206,12 +1332,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.5407890399232e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": 3200,
   "best_metric": 9.882343832146034e-09,
   "best_model_checkpoint": "./humanizer-ckpt/checkpoint-3200",
+  "epoch": 3.0,
   "eval_steps": 400,
+  "global_step": 3564,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 96.598,
       "eval_steps_per_second": 12.075,
       "step": 3200
+    },
+    {
+      "epoch": 2.710736842105263,
+      "grad_norm": 0.00035652011865749955,
+      "learning_rate": 7.282783880194681e-06,
+      "loss": 6.31846342002973e-06,
+      "step": 3220
+    },
+    {
+      "epoch": 2.7275789473684213,
+      "grad_norm": 6.81265300954692e-05,
+      "learning_rate": 6.468825834761165e-06,
+      "loss": 1.4050155004952104e-05,
+      "step": 3240
+    },
+    {
+      "epoch": 2.744421052631579,
+      "grad_norm": 0.00012173700815765187,
+      "learning_rate": 5.7020891286328915e-06,
+      "loss": 5.758084807894193e-06,
+      "step": 3260
+    },
+    {
+      "epoch": 2.761263157894737,
+      "grad_norm": 0.0001060321883414872,
+      "learning_rate": 4.9828260159586795e-06,
+      "loss": 7.252940849866718e-06,
+      "step": 3280
+    },
+    {
+      "epoch": 2.7781052631578946,
+      "grad_norm": 0.00028356886468827724,
+      "learning_rate": 4.311273132212895e-06,
+      "loss": 6.122983177192509e-06,
+      "step": 3300
+    },
+    {
+      "epoch": 2.7949473684210524,
+      "grad_norm": 0.00015243480447679758,
+      "learning_rate": 3.6876514163432904e-06,
+      "loss": 6.343883433146402e-06,
+      "step": 3320
+    },
+    {
+      "epoch": 2.8117894736842106,
+      "grad_norm": 0.00012169565161457285,
+      "learning_rate": 3.1121660380825864e-06,
+      "loss": 5.4682153859175745e-06,
+      "step": 3340
+    },
+    {
+      "epoch": 2.8286315789473684,
+      "grad_norm": 6.715493509545922e-05,
+      "learning_rate": 2.5850063304482804e-06,
+      "loss": 5.410446465248242e-06,
+      "step": 3360
+    },
+    {
+      "epoch": 2.845473684210526,
+      "grad_norm": 5.5883188906591386e-05,
+      "learning_rate": 2.106345727452691e-06,
+      "loss": 7.158593507483601e-06,
+      "step": 3380
+    },
+    {
+      "epoch": 2.8623157894736844,
+      "grad_norm": 0.00011430613085394725,
+      "learning_rate": 1.6763417070435326e-06,
+      "loss": 1.355467684334144e-05,
+      "step": 3400
+    },
+    {
+      "epoch": 2.879157894736842,
+      "grad_norm": 0.00011039682431146502,
+      "learning_rate": 1.2951357392942618e-06,
+      "loss": 5.743457586504519e-06,
+      "step": 3420
+    },
+    {
+      "epoch": 2.896,
+      "grad_norm": 0.000167473524925299,
+      "learning_rate": 9.628532398607892e-07,
+      "loss": 8.994613745016978e-06,
+      "step": 3440
+    },
+    {
+      "epoch": 2.9128421052631577,
+      "grad_norm": 0.0004898277693428099,
+      "learning_rate": 6.796035287200186e-07,
+      "loss": 6.411921640392393e-06,
+      "step": 3460
+    },
+    {
+      "epoch": 2.929684210526316,
+      "grad_norm": 0.0002149147039745003,
+      "learning_rate": 4.4547979420403444e-07,
+      "loss": 5.693230923498049e-06,
+      "step": 3480
+    },
+    {
+      "epoch": 2.9465263157894737,
+      "grad_norm": 0.00029297475703060627,
+      "learning_rate": 2.605590623412923e-07,
+      "loss": 5.634667832055129e-06,
+      "step": 3500
+    },
+    {
+      "epoch": 2.9633684210526314,
+      "grad_norm": 0.00044662103755399585,
+      "learning_rate": 1.2490217151532312e-07,
+      "loss": 6.381121784215793e-06,
+      "step": 3520
+    },
+    {
+      "epoch": 2.9802105263157896,
+      "grad_norm": 0.00033745335531421006,
+      "learning_rate": 3.8553752449110854e-08,
+      "loss": 5.156782572157681e-06,
+      "step": 3540
+    },
+    {
+      "epoch": 2.9970526315789474,
+      "grad_norm": 0.00013488579133991152,
+      "learning_rate": 1.5422135216547338e-09,
+      "loss": 5.085239899926819e-06,
+      "step": 3560
     }
   ],
   "logging_steps": 20,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6.1703847936e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null