alessandronascimento commited on
Commit
79a2646
·
verified ·
1 Parent(s): 7bd85ac

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:178530fd64f5986b87c68d4d25287bb190e9dc948cfcc39ffd1fb87d818d0756
3
  size 1713050034
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f73b33dedf56ea8e1aa20dd84acf052b46c8e2b35c744ad7fc8688e94dcb9d1b
3
  size 1713050034
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11c0457a86b221612d25748e0aa0dda1614f71fb98d3fe0ccd5424e96cd0f7ee
3
  size 816721594
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed7e3994da7109f1b4357e9e48212e35f2244f7521ab00b5fa1cdeffd035a87
3
  size 816721594
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:557d974c659fd0bb715c14aad20461250be107b6c6982f3edba5bffe93c03d67
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e80ecfd4bb3e12f16f0fadd4143e7efcd2344334f82d3a7d112c1a118bf729c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f243675b1550a6e1c2b2f39823155e548c4797f1bca6374367a6b4525499792
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:957618c4a816fe27d3be89c8df199dd30cef92286611cdb093e42cb95779a12f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 9.923595644067973e-05,
3
- "best_model_checkpoint": "ProtChem_ESM2_MolGen_Decoder/checkpoint-3972",
4
- "epoch": 0.9999370633771791,
5
  "eval_steps": 500,
6
- "global_step": 3972,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -64,6 +64,70 @@
64
  "eval_samples_per_second": 15.226,
65
  "eval_steps_per_second": 0.952,
66
  "step": 3972
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  }
68
  ],
69
  "logging_steps": 500,
@@ -92,7 +156,7 @@
92
  "attributes": {}
93
  }
94
  },
95
- "total_flos": 2.6673341428273306e+18,
96
  "train_batch_size": 32,
97
  "trial_name": null,
98
  "trial_params": null
 
1
  {
2
+ "best_metric": 6.921035674167797e-05,
3
+ "best_model_checkpoint": "ProtChem_ESM2_MolGen_Decoder/checkpoint-7944",
4
+ "epoch": 1.9998741267543583,
5
  "eval_steps": 500,
6
+ "global_step": 7944,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
64
  "eval_samples_per_second": 15.226,
65
  "eval_steps_per_second": 0.952,
66
  "step": 3972
67
+ },
68
+ {
69
+ "epoch": 1.006985965133111,
70
+ "grad_norm": 0.0023651123046875,
71
+ "learning_rate": 1.9861585387115228e-05,
72
+ "loss": 0.0002,
73
+ "step": 4000
74
+ },
75
+ {
76
+ "epoch": 1.13285921077475,
77
+ "grad_norm": 0.005157470703125,
78
+ "learning_rate": 1.9784008444561692e-05,
79
+ "loss": 0.0002,
80
+ "step": 4500
81
+ },
82
+ {
83
+ "epoch": 1.2587324564163886,
84
+ "grad_norm": 0.009765625,
85
+ "learning_rate": 1.9689466635701106e-05,
86
+ "loss": 0.0002,
87
+ "step": 5000
88
+ },
89
+ {
90
+ "epoch": 1.3846057020580276,
91
+ "grad_norm": 0.006011962890625,
92
+ "learning_rate": 1.9578123890190405e-05,
93
+ "loss": 0.0002,
94
+ "step": 5500
95
+ },
96
+ {
97
+ "epoch": 1.5104789476996663,
98
+ "grad_norm": 0.01397705078125,
99
+ "learning_rate": 1.9450173269472915e-05,
100
+ "loss": 0.0002,
101
+ "step": 6000
102
+ },
103
+ {
104
+ "epoch": 1.6363521933413052,
105
+ "grad_norm": 0.0035247802734375,
106
+ "learning_rate": 1.9305836632021744e-05,
107
+ "loss": 0.0002,
108
+ "step": 6500
109
+ },
110
+ {
111
+ "epoch": 1.7622254389829441,
112
+ "grad_norm": 0.004669189453125,
113
+ "learning_rate": 1.9145364248650892e-05,
114
+ "loss": 0.0002,
115
+ "step": 7000
116
+ },
117
+ {
118
+ "epoch": 1.888098684624583,
119
+ "grad_norm": 0.0034332275390625,
120
+ "learning_rate": 1.8969034368561105e-05,
121
+ "loss": 0.0002,
122
+ "step": 7500
123
+ },
124
+ {
125
+ "epoch": 1.9998741267543583,
126
+ "eval_loss": 6.921035674167797e-05,
127
+ "eval_runtime": 16790.6132,
128
+ "eval_samples_per_second": 15.24,
129
+ "eval_steps_per_second": 0.952,
130
+ "step": 7944
131
  }
132
  ],
133
  "logging_steps": 500,
 
156
  "attributes": {}
157
  }
158
  },
159
+ "total_flos": 5.334668285654661e+18,
160
  "train_batch_size": 32,
161
  "trial_name": null,
162
  "trial_params": null