alessandronascimento commited on
Commit
52a1859
·
verified ·
1 Parent(s): a86a3d5

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f73b33dedf56ea8e1aa20dd84acf052b46c8e2b35c744ad7fc8688e94dcb9d1b
3
  size 1713050034
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:219be41537bbb98fca70ca3f58664027f22ac8b626937fc54ed0c58b1a583287
3
  size 1713050034
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ed7e3994da7109f1b4357e9e48212e35f2244f7521ab00b5fa1cdeffd035a87
3
  size 816721594
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c557980c0526bbe9b748ef020546f7bc8e22ad8fcbd68d484140a76b913f895
3
  size 816721594
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e80ecfd4bb3e12f16f0fadd4143e7efcd2344334f82d3a7d112c1a118bf729c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5f4c3829d3c61d60d4aa81f39b1ae90c914023d099d2c2879c131506416ca01
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:957618c4a816fe27d3be89c8df199dd30cef92286611cdb093e42cb95779a12f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81c3e6445408f158b3d87cbf8d2d8e36840ad25379ff7117c45407306acac4e6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 6.921035674167797e-05,
3
- "best_model_checkpoint": "ProtChem_ESM2_MolGen_Decoder/checkpoint-7944",
4
- "epoch": 1.9998741267543583,
5
  "eval_steps": 500,
6
- "global_step": 7944,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -128,6 +128,70 @@
128
  "eval_samples_per_second": 15.24,
129
  "eval_steps_per_second": 0.952,
130
  "step": 7944
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  }
132
  ],
133
  "logging_steps": 500,
@@ -156,7 +220,7 @@
156
  "attributes": {}
157
  }
158
  },
159
- "total_flos": 5.334668285654661e+18,
160
  "train_batch_size": 32,
161
  "trial_name": null,
162
  "trial_params": null
 
1
  {
2
+ "best_metric": 6.317481165751815e-05,
3
+ "best_model_checkpoint": "ProtChem_ESM2_MolGen_Decoder/checkpoint-11916",
4
+ "epoch": 2.9998111901315374,
5
  "eval_steps": 500,
6
+ "global_step": 11916,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
128
  "eval_samples_per_second": 15.24,
129
  "eval_steps_per_second": 0.952,
130
  "step": 7944
131
+ },
132
+ {
133
+ "epoch": 2.013971930266222,
134
+ "grad_norm": 0.003448486328125,
135
+ "learning_rate": 1.877715273687297e-05,
136
+ "loss": 0.0002,
137
+ "step": 8000
138
+ },
139
+ {
140
+ "epoch": 2.139845175907861,
141
+ "grad_norm": 0.01324462890625,
142
+ "learning_rate": 1.857005206448375e-05,
143
+ "loss": 0.0002,
144
+ "step": 8500
145
+ },
146
+ {
147
+ "epoch": 2.2657184215495,
148
+ "grad_norm": 0.0032806396484375,
149
+ "learning_rate": 1.8348091451167224e-05,
150
+ "loss": 0.0002,
151
+ "step": 9000
152
+ },
153
+ {
154
+ "epoch": 2.3915916671911384,
155
+ "grad_norm": 0.00182342529296875,
156
+ "learning_rate": 1.8111655762916885e-05,
157
+ "loss": 0.0002,
158
+ "step": 9500
159
+ },
160
+ {
161
+ "epoch": 2.5174649128327773,
162
+ "grad_norm": 0.007781982421875,
163
+ "learning_rate": 1.786115496461207e-05,
164
+ "loss": 0.0002,
165
+ "step": 10000
166
+ },
167
+ {
168
+ "epoch": 2.643338158474416,
169
+ "grad_norm": 0.00250244140625,
170
+ "learning_rate": 1.759702340916418e-05,
171
+ "loss": 0.0002,
172
+ "step": 10500
173
+ },
174
+ {
175
+ "epoch": 2.769211404116055,
176
+ "grad_norm": 0.00185394287109375,
177
+ "learning_rate": 1.7319719084375556e-05,
178
+ "loss": 0.0001,
179
+ "step": 11000
180
+ },
181
+ {
182
+ "epoch": 2.895084649757694,
183
+ "grad_norm": 0.00445556640625,
184
+ "learning_rate": 1.702972281881693e-05,
185
+ "loss": 0.0001,
186
+ "step": 11500
187
+ },
188
+ {
189
+ "epoch": 2.9998111901315374,
190
+ "eval_loss": 6.317481165751815e-05,
191
+ "eval_runtime": 16782.274,
192
+ "eval_samples_per_second": 15.248,
193
+ "eval_steps_per_second": 0.953,
194
+ "step": 11916
195
  }
196
  ],
197
  "logging_steps": 500,
 
220
  "attributes": {}
221
  }
222
  },
223
+ "total_flos": 8.002002428481992e+18,
224
  "train_batch_size": 32,
225
  "trial_name": null,
226
  "trial_params": null