Training in progress, step 20000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a35269a5a8059bfd512074147e599e5b7e945f087e51b4650e79de2a05a4a09
 size 409608164

 version https://git-lfs.github.com/spec/v1
+oid sha256:4970b6682a1c930084e40f6b66854251c8db1858d69b870ca1e393e61e954fbb
 size 409608164

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a6cc5d9868b146b49a14990265e2c212f954144903aee3dcd2a71130540a0c1
 size 814647162

 version https://git-lfs.github.com/spec/v1
+oid sha256:849764d845446bd3d7144aa94ca0667e5c1b39746e443c98875eb8b189928c2b
 size 814647162

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be0e325f2af75a29829ef526b8ddefc6d5a64a5dea8cae2897b367771fbababa
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:99e606ce353d140175992ac0b0aff4bee3d8fbdc73c16a9db59465a42c097743
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:402d40be6478192cb7e39b159f4802f15784c74e0f734155de53ea867af97ad4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb3093310f6b86f18bebb2d480e5705bfd30aebf684148c9e831e07105b40020
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.5462110481586402,
   "best_model_checkpoint": "/mimer/NOBACKUP/groups/naiss2023-6-290/stefano/models//PROTAC-Splitter-EncoderDecoder-lr_cosine_restarts-opt25/checkpoint-10000",
-  "epoch": 1.4795817715525745,
   "eval_steps": 2500,
-  "global_step": 15000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -343,6 +343,76 @@
       "learning_rate": 4.005099547419455e-05,
       "loss": 0.0034,
       "step": 15000
     }
   ],
   "logging_steps": 500,
@@ -362,7 +432,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2998395396121242e+17,
   "train_batch_size": 128,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.5462110481586402,
   "best_model_checkpoint": "/mimer/NOBACKUP/groups/naiss2023-6-290/stefano/models//PROTAC-Splitter-EncoderDecoder-lr_cosine_restarts-opt25/checkpoint-10000",
+  "epoch": 1.9727756954034326,
   "eval_steps": 2500,
+  "global_step": 20000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.005099547419455e-05,
       "loss": 0.0034,
       "step": 15000
+    },
+    {
+      "epoch": 1.5289011639376602,
+      "grad_norm": 0.060984883457422256,
+      "learning_rate": 2.92545439850333e-05,
+      "loss": 0.0025,
+      "step": 15500
+    },
+    {
+      "epoch": 1.578220556322746,
+      "grad_norm": 0.027485696598887444,
+      "learning_rate": 1.751592192566606e-05,
+      "loss": 0.0019,
+      "step": 16000
+    },
+    {
+      "epoch": 1.627539948707832,
+      "grad_norm": 0.027819139882922173,
+      "learning_rate": 7.434652130505285e-06,
+      "loss": 0.0015,
+      "step": 16500
+    },
+    {
+      "epoch": 1.6768593410929178,
+      "grad_norm": 0.02593580074608326,
+      "learning_rate": 1.2432360720283142e-06,
+      "loss": 0.0013,
+      "step": 17000
+    },
+    {
+      "epoch": 1.7261787334780037,
+      "grad_norm": 0.1803756207227707,
+      "learning_rate": 4.9687234537299765e-05,
+      "loss": 0.0028,
+      "step": 17500
+    },
+    {
+      "epoch": 1.7754981258630895,
+      "grad_norm": 0.0707838386297226,
+      "learning_rate": 4.5150706586350105e-05,
+      "loss": 0.0046,
+      "step": 18000
+    },
+    {
+      "epoch": 1.8248175182481752,
+      "grad_norm": 0.04294075071811676,
+      "learning_rate": 3.6151796114194655e-05,
+      "loss": 0.0022,
+      "step": 18500
+    },
+    {
+      "epoch": 1.874136910633261,
+      "grad_norm": 0.033499088138341904,
+      "learning_rate": 2.4683315662109008e-05,
+      "loss": 0.0017,
+      "step": 19000
+    },
+    {
+      "epoch": 1.9234563030183467,
+      "grad_norm": 0.028059741482138634,
+      "learning_rate": 1.3284965090982499e-05,
+      "loss": 0.0013,
+      "step": 19500
+    },
+    {
+      "epoch": 1.9727756954034326,
+      "grad_norm": 0.016390886157751083,
+      "learning_rate": 4.480913969818085e-06,
+      "loss": 0.0011,
+      "step": 20000
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 1.7329296562502707e+17,
   "train_batch_size": 128,
   "trial_name": null,
   "trial_params": null