ribesstefano commited on
Commit
be0f6de
·
verified ·
1 Parent(s): 0637211

Training in progress, step 20000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a35269a5a8059bfd512074147e599e5b7e945f087e51b4650e79de2a05a4a09
3
  size 409608164
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4970b6682a1c930084e40f6b66854251c8db1858d69b870ca1e393e61e954fbb
3
  size 409608164
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a6cc5d9868b146b49a14990265e2c212f954144903aee3dcd2a71130540a0c1
3
  size 814647162
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:849764d845446bd3d7144aa94ca0667e5c1b39746e443c98875eb8b189928c2b
3
  size 814647162
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be0e325f2af75a29829ef526b8ddefc6d5a64a5dea8cae2897b367771fbababa
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99e606ce353d140175992ac0b0aff4bee3d8fbdc73c16a9db59465a42c097743
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:402d40be6478192cb7e39b159f4802f15784c74e0f734155de53ea867af97ad4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb3093310f6b86f18bebb2d480e5705bfd30aebf684148c9e831e07105b40020
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.5462110481586402,
3
  "best_model_checkpoint": "/mimer/NOBACKUP/groups/naiss2023-6-290/stefano/models//PROTAC-Splitter-EncoderDecoder-lr_cosine_restarts-opt25/checkpoint-10000",
4
- "epoch": 1.4795817715525745,
5
  "eval_steps": 2500,
6
- "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -343,6 +343,76 @@
343
  "learning_rate": 4.005099547419455e-05,
344
  "loss": 0.0034,
345
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  }
347
  ],
348
  "logging_steps": 500,
@@ -362,7 +432,7 @@
362
  "attributes": {}
363
  }
364
  },
365
- "total_flos": 1.2998395396121242e+17,
366
  "train_batch_size": 128,
367
  "trial_name": null,
368
  "trial_params": null
 
1
  {
2
  "best_metric": 0.5462110481586402,
3
  "best_model_checkpoint": "/mimer/NOBACKUP/groups/naiss2023-6-290/stefano/models//PROTAC-Splitter-EncoderDecoder-lr_cosine_restarts-opt25/checkpoint-10000",
4
+ "epoch": 1.9727756954034326,
5
  "eval_steps": 2500,
6
+ "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
343
  "learning_rate": 4.005099547419455e-05,
344
  "loss": 0.0034,
345
  "step": 15000
346
+ },
347
+ {
348
+ "epoch": 1.5289011639376602,
349
+ "grad_norm": 0.060984883457422256,
350
+ "learning_rate": 2.92545439850333e-05,
351
+ "loss": 0.0025,
352
+ "step": 15500
353
+ },
354
+ {
355
+ "epoch": 1.578220556322746,
356
+ "grad_norm": 0.027485696598887444,
357
+ "learning_rate": 1.751592192566606e-05,
358
+ "loss": 0.0019,
359
+ "step": 16000
360
+ },
361
+ {
362
+ "epoch": 1.627539948707832,
363
+ "grad_norm": 0.027819139882922173,
364
+ "learning_rate": 7.434652130505285e-06,
365
+ "loss": 0.0015,
366
+ "step": 16500
367
+ },
368
+ {
369
+ "epoch": 1.6768593410929178,
370
+ "grad_norm": 0.02593580074608326,
371
+ "learning_rate": 1.2432360720283142e-06,
372
+ "loss": 0.0013,
373
+ "step": 17000
374
+ },
375
+ {
376
+ "epoch": 1.7261787334780037,
377
+ "grad_norm": 0.1803756207227707,
378
+ "learning_rate": 4.9687234537299765e-05,
379
+ "loss": 0.0028,
380
+ "step": 17500
381
+ },
382
+ {
383
+ "epoch": 1.7754981258630895,
384
+ "grad_norm": 0.0707838386297226,
385
+ "learning_rate": 4.5150706586350105e-05,
386
+ "loss": 0.0046,
387
+ "step": 18000
388
+ },
389
+ {
390
+ "epoch": 1.8248175182481752,
391
+ "grad_norm": 0.04294075071811676,
392
+ "learning_rate": 3.6151796114194655e-05,
393
+ "loss": 0.0022,
394
+ "step": 18500
395
+ },
396
+ {
397
+ "epoch": 1.874136910633261,
398
+ "grad_norm": 0.033499088138341904,
399
+ "learning_rate": 2.4683315662109008e-05,
400
+ "loss": 0.0017,
401
+ "step": 19000
402
+ },
403
+ {
404
+ "epoch": 1.9234563030183467,
405
+ "grad_norm": 0.028059741482138634,
406
+ "learning_rate": 1.3284965090982499e-05,
407
+ "loss": 0.0013,
408
+ "step": 19500
409
+ },
410
+ {
411
+ "epoch": 1.9727756954034326,
412
+ "grad_norm": 0.016390886157751083,
413
+ "learning_rate": 4.480913969818085e-06,
414
+ "loss": 0.0011,
415
+ "step": 20000
416
  }
417
  ],
418
  "logging_steps": 500,
 
432
  "attributes": {}
433
  }
434
  },
435
+ "total_flos": 1.7329296562502707e+17,
436
  "train_batch_size": 128,
437
  "trial_name": null,
438
  "trial_params": null