racheltong commited on
Commit
f4d1afa
·
verified ·
1 Parent(s): 9dd54c4

Training in progress, step 2972, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6108d5fc15eac39666d7d8946b105849460a794e2612d980177b45493ae9c867
3
  size 430662508
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8515efda803ef8bef233e3cfd356ff1989d857b0323229b36dd021c1f637e0c
3
  size 430662508
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:773e19e06a2d5b30e8fb0f8ba5ce6a5c64eaf57fbb2eb993e0a5c74e5c9fa482
3
  size 861448011
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec343eaf3ec90dcfa2cb5c146d36ebdef964ddfb3f32a26819b696032d54450
3
  size 861448011
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de713ad836ac2a6b95daa8ebda70a2ab44ed35690af2f2553b0f07bee69a7d75
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c247a1c27ea92d0f9537e5dc2de3c2133cbca7edc87c27620b9ddf470aa12e3d
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b395f36c1065f9c23984017cce241f708faf612ff255e494dd1099cec47d815
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69c3205a8951467b50ad397fe93754303c66d8710b2259939d6728fb38517dec
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66888136d67deff3e973c8956f772d3dd35e158601562f8a1fb2cfb066f1ff1c
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9586b5907a02d2d6a0b1232c6bac387f9d2ff5619761451c685a8b534a0534e5
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 2400,
3
  "best_metric": 0.9193424851234314,
4
  "best_model_checkpoint": "/content/drive/MyDrive/my_works/Work/CSA/models/comparison/BioLinkBERT/checkpoints/checkpoint-2400",
5
- "epoch": 1.8172332547963648,
6
  "eval_steps": 300,
7
- "global_step": 2700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -593,6 +593,41 @@
593
  "eval_samples_per_second": 293.839,
594
  "eval_steps_per_second": 9.235,
595
  "step": 2700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
596
  }
597
  ],
598
  "logging_steps": 50,
@@ -607,12 +642,12 @@
607
  "should_evaluate": false,
608
  "should_log": false,
609
  "should_save": true,
610
- "should_training_stop": false
611
  },
612
  "attributes": {}
613
  }
614
  },
615
- "total_flos": 5643905456973312.0,
616
  "train_batch_size": 16,
617
  "trial_name": null,
618
  "trial_params": null
 
2
  "best_global_step": 2400,
3
  "best_metric": 0.9193424851234314,
4
  "best_model_checkpoint": "/content/drive/MyDrive/my_works/Work/CSA/models/comparison/BioLinkBERT/checkpoints/checkpoint-2400",
5
+ "epoch": 2.0,
6
  "eval_steps": 300,
7
+ "global_step": 2972,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
593
  "eval_samples_per_second": 293.839,
594
  "eval_steps_per_second": 9.235,
595
  "step": 2700
596
+ },
597
+ {
598
+ "epoch": 1.850891955570515,
599
+ "grad_norm": 0.19244159758090973,
600
+ "learning_rate": 3.412487186003988e-07,
601
+ "loss": 0.025279722213745116,
602
+ "step": 2750
603
+ },
604
+ {
605
+ "epoch": 1.8845506563446652,
606
+ "grad_norm": 0.2358558177947998,
607
+ "learning_rate": 2.058465195282e-07,
608
+ "loss": 0.022235231399536134,
609
+ "step": 2800
610
+ },
611
+ {
612
+ "epoch": 1.9182093571188152,
613
+ "grad_norm": 0.4195547103881836,
614
+ "learning_rate": 1.0423204634579354e-07,
615
+ "loss": 0.025367400646209716,
616
+ "step": 2850
617
+ },
618
+ {
619
+ "epoch": 1.9518680578929652,
620
+ "grad_norm": 0.17993882298469543,
621
+ "learning_rate": 3.6755847160593327e-08,
622
+ "loss": 0.02375290870666504,
623
+ "step": 2900
624
+ },
625
+ {
626
+ "epoch": 1.9855267586671155,
627
+ "grad_norm": 0.07394929975271225,
628
+ "learning_rate": 3.6507003670482875e-09,
629
+ "loss": 0.03331618070602417,
630
+ "step": 2950
631
  }
632
  ],
633
  "logging_steps": 50,
 
642
  "should_evaluate": false,
643
  "should_log": false,
644
  "should_save": true,
645
+ "should_training_stop": true
646
  },
647
  "attributes": {}
648
  }
649
  },
650
+ "total_flos": 6211419091749888.0,
651
  "train_batch_size": 16,
652
  "trial_name": null,
653
  "trial_params": null