Training in progress, step 65000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6a123f0f4c5a11cb72b99972714ce78828980a28225a86c5c52efa0dfd4317d
 size 409608164

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3b254b4a3687ef7287ef183ae3baa563e0f62da93dadce76976959d35745f8b
 size 409608164

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15ee547f4d1e2effb0117ee97acb3ccd85c719ade258b1abcb9b39428c1fc01e
 size 814647162

 version https://git-lfs.github.com/spec/v1
+oid sha256:689e556e337a0cb773e690dd67bccbca34493c708d822c4bcef13f0068cadc46
 size 814647162

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3146ea4f58eeba70ef991619520fba1220f5a6969c7140f3e94b740993fab985
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:14af7f8b0e4bfa0614ea002967064faf2fe92f8684ee222995eaceafe28eaba1
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8abde5ea6d7508710178712fe236e737431b4a593f0d18f03c2b4dfe63cb7155
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ef1f0d7258e92b9f9871d81d52ebcc946c37081ef596ae367fa8c041eca7a05
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.5477160056657224,
   "best_model_checkpoint": "/mimer/NOBACKUP/groups/naiss2023-6-290/stefano/models//PROTAC-Splitter-EncoderDecoder-lr_cosine-opt25/checkpoint-10000",
-  "epoch": 5.918327086210298,
   "eval_steps": 2500,
-  "global_step": 60000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -973,6 +973,76 @@
       "learning_rate": 1.7313466151363385e-05,
       "loss": 0.0004,
       "step": 60000
     }
   ],
   "logging_steps": 500,
@@ -992,7 +1062,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.1976814397455616e+17,
   "train_batch_size": 128,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.5477160056657224,
   "best_model_checkpoint": "/mimer/NOBACKUP/groups/naiss2023-6-290/stefano/models//PROTAC-Splitter-EncoderDecoder-lr_cosine-opt25/checkpoint-10000",
+  "epoch": 6.411521010061156,
   "eval_steps": 2500,
+  "global_step": 65000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.7313466151363385e-05,
       "loss": 0.0004,
       "step": 60000
+    },
+    {
+      "epoch": 5.967646478595384,
+      "grad_norm": 0.005268739070743322,
+      "learning_rate": 1.6940380719660764e-05,
+      "loss": 0.0003,
+      "step": 60500
+    },
+    {
+      "epoch": 6.01696587098047,
+      "grad_norm": 0.006257560569792986,
+      "learning_rate": 1.6569287861489484e-05,
+      "loss": 0.0003,
+      "step": 61000
+    },
+    {
+      "epoch": 6.066285263365556,
+      "grad_norm": 0.012183616869151592,
+      "learning_rate": 1.620027932185401e-05,
+      "loss": 0.0003,
+      "step": 61500
+    },
+    {
+      "epoch": 6.115604655750641,
+      "grad_norm": 0.005832422524690628,
+      "learning_rate": 1.5834177764868226e-05,
+      "loss": 0.0003,
+      "step": 62000
+    },
+    {
+      "epoch": 6.164924048135727,
+      "grad_norm": 0.004268107004463673,
+      "learning_rate": 1.5469606390923247e-05,
+      "loss": 0.0003,
+      "step": 62500
+    },
+    {
+      "epoch": 6.2142434405208125,
+      "grad_norm": 0.004789270460605621,
+      "learning_rate": 1.5107391208925827e-05,
+      "loss": 0.0003,
+      "step": 63000
+    },
+    {
+      "epoch": 6.263562832905898,
+      "grad_norm": 0.012517180293798447,
+      "learning_rate": 1.474762176905931e-05,
+      "loss": 0.0003,
+      "step": 63500
+    },
+    {
+      "epoch": 6.312882225290984,
+      "grad_norm": 0.01253846287727356,
+      "learning_rate": 1.439038701684809e-05,
+      "loss": 0.0003,
+      "step": 64000
+    },
+    {
+      "epoch": 6.36220161767607,
+      "grad_norm": 0.014298639260232449,
+      "learning_rate": 1.4036481818516661e-05,
+      "loss": 0.0003,
+      "step": 64500
+    },
+    {
+      "epoch": 6.411521010061156,
+      "grad_norm": 0.0054347398690879345,
+      "learning_rate": 1.3684575241351546e-05,
+      "loss": 0.0003,
+      "step": 65000
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 5.631143303584635e+17,
   "train_batch_size": 128,
   "trial_name": null,
   "trial_params": null