Training in progress, epoch 2, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +69 -5

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f73b33dedf56ea8e1aa20dd84acf052b46c8e2b35c744ad7fc8688e94dcb9d1b
 size 1713050034

 version https://git-lfs.github.com/spec/v1
+oid sha256:219be41537bbb98fca70ca3f58664027f22ac8b626937fc54ed0c58b1a583287
 size 1713050034

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ed7e3994da7109f1b4357e9e48212e35f2244f7521ab00b5fa1cdeffd035a87
 size 816721594

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c557980c0526bbe9b748ef020546f7bc8e22ad8fcbd68d484140a76b913f895
 size 816721594

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e80ecfd4bb3e12f16f0fadd4143e7efcd2344334f82d3a7d112c1a118bf729c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5f4c3829d3c61d60d4aa81f39b1ae90c914023d099d2c2879c131506416ca01
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:957618c4a816fe27d3be89c8df199dd30cef92286611cdb093e42cb95779a12f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:81c3e6445408f158b3d87cbf8d2d8e36840ad25379ff7117c45407306acac4e6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 6.921035674167797e-05,
-  "best_model_checkpoint": "ProtChem_ESM2_MolGen_Decoder/checkpoint-7944",
-  "epoch": 1.9998741267543583,
   "eval_steps": 500,
-  "global_step": 7944,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -128,6 +128,70 @@
       "eval_samples_per_second": 15.24,
       "eval_steps_per_second": 0.952,
       "step": 7944
     }
   ],
   "logging_steps": 500,
@@ -156,7 +220,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.334668285654661e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 6.317481165751815e-05,
+  "best_model_checkpoint": "ProtChem_ESM2_MolGen_Decoder/checkpoint-11916",
+  "epoch": 2.9998111901315374,
   "eval_steps": 500,
+  "global_step": 11916,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 15.24,
       "eval_steps_per_second": 0.952,
       "step": 7944
+    },
+    {
+      "epoch": 2.013971930266222,
+      "grad_norm": 0.003448486328125,
+      "learning_rate": 1.877715273687297e-05,
+      "loss": 0.0002,
+      "step": 8000
+    },
+    {
+      "epoch": 2.139845175907861,
+      "grad_norm": 0.01324462890625,
+      "learning_rate": 1.857005206448375e-05,
+      "loss": 0.0002,
+      "step": 8500
+    },
+    {
+      "epoch": 2.2657184215495,
+      "grad_norm": 0.0032806396484375,
+      "learning_rate": 1.8348091451167224e-05,
+      "loss": 0.0002,
+      "step": 9000
+    },
+    {
+      "epoch": 2.3915916671911384,
+      "grad_norm": 0.00182342529296875,
+      "learning_rate": 1.8111655762916885e-05,
+      "loss": 0.0002,
+      "step": 9500
+    },
+    {
+      "epoch": 2.5174649128327773,
+      "grad_norm": 0.007781982421875,
+      "learning_rate": 1.786115496461207e-05,
+      "loss": 0.0002,
+      "step": 10000
+    },
+    {
+      "epoch": 2.643338158474416,
+      "grad_norm": 0.00250244140625,
+      "learning_rate": 1.759702340916418e-05,
+      "loss": 0.0002,
+      "step": 10500
+    },
+    {
+      "epoch": 2.769211404116055,
+      "grad_norm": 0.00185394287109375,
+      "learning_rate": 1.7319719084375556e-05,
+      "loss": 0.0001,
+      "step": 11000
+    },
+    {
+      "epoch": 2.895084649757694,
+      "grad_norm": 0.00445556640625,
+      "learning_rate": 1.702972281881693e-05,
+      "loss": 0.0001,
+      "step": 11500
+    },
+    {
+      "epoch": 2.9998111901315374,
+      "eval_loss": 6.317481165751815e-05,
+      "eval_runtime": 16782.274,
+      "eval_samples_per_second": 15.248,
+      "eval_steps_per_second": 0.953,
+      "step": 11916
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 8.002002428481992e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null