mgh6 commited on
Commit
b79c73b
·
verified ·
1 Parent(s): a738494

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22fd646964f7141c8e110ca73e687d637ac5908b5938b0a42a9af0bcbf299f16
3
  size 2610104820
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fb1ac3363635a3b0090068f8794f7a3fff0ed98d9564aa75664f20978996f6a
3
  size 2610104820
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d40b56fa86399430acdee58a523d5e295b660277e1c6794d579ae6a32135e4d8
3
  size 5210004271
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44cc8995b543f42db2626d1ca4565368b0ca96d47a236ece7b6e6f2efb38e27c
3
  size 5210004271
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df0987ff7a6cbc2809c803560f955306ddcf16489adf408fc06355caaa080ce5
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7964ea9b27b8ec2d253f8e37dd1b85f1110775e8fe1122f0a165ddc0fdee12d
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b7daf508bc85526f69b8f3b71377a6a6256f8d93d4aaf541b8c9a6b631bc6c8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:863cf4a7956086b9fd348589f422e8af87ea5d78b5c7d0e595c32a5aaf7e77e5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9972065305108946,
5
  "eval_steps": 50,
6
- "global_step": 502,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -157,6 +157,81 @@
157
  "eval_samples_per_second": 31.354,
158
  "eval_steps_per_second": 15.677,
159
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  }
161
  ],
162
  "logging_steps": 50,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.9972065305108946,
5
  "eval_steps": 50,
6
+ "global_step": 753,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
157
  "eval_samples_per_second": 31.354,
158
  "eval_steps_per_second": 15.677,
159
  "step": 500
160
+ },
161
+ {
162
+ "epoch": 2.190700850456267,
163
+ "grad_norm": 0.20531371235847473,
164
+ "learning_rate": 7.808764940239044e-05,
165
+ "loss": 0.1764,
166
+ "step": 550
167
+ },
168
+ {
169
+ "epoch": 2.190700850456267,
170
+ "eval_loss": 0.20692695677280426,
171
+ "eval_runtime": 27.0503,
172
+ "eval_samples_per_second": 31.349,
173
+ "eval_steps_per_second": 15.675,
174
+ "step": 550
175
+ },
176
+ {
177
+ "epoch": 2.3893475696815445,
178
+ "grad_norm": 0.20478622615337372,
179
+ "learning_rate": 7.609561752988048e-05,
180
+ "loss": 0.1489,
181
+ "step": 600
182
+ },
183
+ {
184
+ "epoch": 2.3893475696815445,
185
+ "eval_loss": 0.19011318683624268,
186
+ "eval_runtime": 27.0394,
187
+ "eval_samples_per_second": 31.362,
188
+ "eval_steps_per_second": 15.681,
189
+ "step": 600
190
+ },
191
+ {
192
+ "epoch": 2.587994288906822,
193
+ "grad_norm": 0.20455621182918549,
194
+ "learning_rate": 7.410358565737052e-05,
195
+ "loss": 0.1389,
196
+ "step": 650
197
+ },
198
+ {
199
+ "epoch": 2.587994288906822,
200
+ "eval_loss": 0.17714831233024597,
201
+ "eval_runtime": 27.0003,
202
+ "eval_samples_per_second": 31.407,
203
+ "eval_steps_per_second": 15.704,
204
+ "step": 650
205
+ },
206
+ {
207
+ "epoch": 2.7866410081321,
208
+ "grad_norm": 0.2362823486328125,
209
+ "learning_rate": 7.211155378486057e-05,
210
+ "loss": 0.1276,
211
+ "step": 700
212
+ },
213
+ {
214
+ "epoch": 2.7866410081321,
215
+ "eval_loss": 0.16439422965049744,
216
+ "eval_runtime": 27.0429,
217
+ "eval_samples_per_second": 31.358,
218
+ "eval_steps_per_second": 15.679,
219
+ "step": 700
220
+ },
221
+ {
222
+ "epoch": 2.985287727357378,
223
+ "grad_norm": 0.19321954250335693,
224
+ "learning_rate": 7.01195219123506e-05,
225
+ "loss": 0.1208,
226
+ "step": 750
227
+ },
228
+ {
229
+ "epoch": 2.985287727357378,
230
+ "eval_loss": 0.15227381885051727,
231
+ "eval_runtime": 27.0597,
232
+ "eval_samples_per_second": 31.338,
233
+ "eval_steps_per_second": 15.669,
234
+ "step": 750
235
  }
236
  ],
237
  "logging_steps": 50,