Training in progress, epoch 1, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +69 -5

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:178530fd64f5986b87c68d4d25287bb190e9dc948cfcc39ffd1fb87d818d0756
 size 1713050034

 version https://git-lfs.github.com/spec/v1
+oid sha256:f73b33dedf56ea8e1aa20dd84acf052b46c8e2b35c744ad7fc8688e94dcb9d1b
 size 1713050034

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11c0457a86b221612d25748e0aa0dda1614f71fb98d3fe0ccd5424e96cd0f7ee
 size 816721594

 version https://git-lfs.github.com/spec/v1
+oid sha256:3ed7e3994da7109f1b4357e9e48212e35f2244f7521ab00b5fa1cdeffd035a87
 size 816721594

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:557d974c659fd0bb715c14aad20461250be107b6c6982f3edba5bffe93c03d67
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e80ecfd4bb3e12f16f0fadd4143e7efcd2344334f82d3a7d112c1a118bf729c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f243675b1550a6e1c2b2f39823155e548c4797f1bca6374367a6b4525499792
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:957618c4a816fe27d3be89c8df199dd30cef92286611cdb093e42cb95779a12f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 9.923595644067973e-05,
-  "best_model_checkpoint": "ProtChem_ESM2_MolGen_Decoder/checkpoint-3972",
-  "epoch": 0.9999370633771791,
   "eval_steps": 500,
-  "global_step": 3972,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -64,6 +64,70 @@
       "eval_samples_per_second": 15.226,
       "eval_steps_per_second": 0.952,
       "step": 3972
     }
   ],
   "logging_steps": 500,
@@ -92,7 +156,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.6673341428273306e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 6.921035674167797e-05,
+  "best_model_checkpoint": "ProtChem_ESM2_MolGen_Decoder/checkpoint-7944",
+  "epoch": 1.9998741267543583,
   "eval_steps": 500,
+  "global_step": 7944,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 15.226,
       "eval_steps_per_second": 0.952,
       "step": 3972
+    },
+    {
+      "epoch": 1.006985965133111,
+      "grad_norm": 0.0023651123046875,
+      "learning_rate": 1.9861585387115228e-05,
+      "loss": 0.0002,
+      "step": 4000
+    },
+    {
+      "epoch": 1.13285921077475,
+      "grad_norm": 0.005157470703125,
+      "learning_rate": 1.9784008444561692e-05,
+      "loss": 0.0002,
+      "step": 4500
+    },
+    {
+      "epoch": 1.2587324564163886,
+      "grad_norm": 0.009765625,
+      "learning_rate": 1.9689466635701106e-05,
+      "loss": 0.0002,
+      "step": 5000
+    },
+    {
+      "epoch": 1.3846057020580276,
+      "grad_norm": 0.006011962890625,
+      "learning_rate": 1.9578123890190405e-05,
+      "loss": 0.0002,
+      "step": 5500
+    },
+    {
+      "epoch": 1.5104789476996663,
+      "grad_norm": 0.01397705078125,
+      "learning_rate": 1.9450173269472915e-05,
+      "loss": 0.0002,
+      "step": 6000
+    },
+    {
+      "epoch": 1.6363521933413052,
+      "grad_norm": 0.0035247802734375,
+      "learning_rate": 1.9305836632021744e-05,
+      "loss": 0.0002,
+      "step": 6500
+    },
+    {
+      "epoch": 1.7622254389829441,
+      "grad_norm": 0.004669189453125,
+      "learning_rate": 1.9145364248650892e-05,
+      "loss": 0.0002,
+      "step": 7000
+    },
+    {
+      "epoch": 1.888098684624583,
+      "grad_norm": 0.0034332275390625,
+      "learning_rate": 1.8969034368561105e-05,
+      "loss": 0.0002,
+      "step": 7500
+    },
+    {
+      "epoch": 1.9998741267543583,
+      "eval_loss": 6.921035674167797e-05,
+      "eval_runtime": 16790.6132,
+      "eval_samples_per_second": 15.24,
+      "eval_steps_per_second": 0.952,
+      "step": 7944
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 5.334668285654661e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null