ljcamargo commited on
Commit
5cbb9d9
·
verified ·
1 Parent(s): 19be349

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12de36f02475ba36424b6cbbc78a99fb5d247b1f59b0671ec136b90196dbc42e
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f80a24ef27cb8d65adc7ffae4be6c2732033a5e9141dfd66815315f593ae70b8
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71a973f442004b75157ae01481531805c844e77a68190e59a5218c09d8d6df94
3
  size 2479123301
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8da859acf7ac6600d1453a6ad96ad20e3865a29e481bed11e5055dfb249272fa
3
  size 2479123301
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc1a0da602f8abf4bf342932694d528cc1f0baa4d5027de58ad34f4d9855d085
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f0a73f4374b5e05c786b790825aa9c223b53f3ee4551182bcb497e6cf37794f
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38dd85de4e747e5477e492c54af5b212cebc40d19045c2dfc5361392de0ed8a7
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e8d05c49d91a8f0b7f5e1dfd9876ebcb94dbdc34961d432fb69d2b41418c55
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.12,
6
  "eval_steps": 500,
7
- "global_step": 300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -218,6 +218,76 @@
218
  "learning_rate": 4.4168336673346694e-05,
219
  "loss": 1.4183,
220
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  }
222
  ],
223
  "logging_steps": 10,
@@ -237,7 +307,7 @@
237
  "attributes": {}
238
  }
239
  },
240
- "total_flos": 5404828786993152.0,
241
  "train_batch_size": 2,
242
  "trial_name": null,
243
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.16,
6
  "eval_steps": 500,
7
+ "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
218
  "learning_rate": 4.4168336673346694e-05,
219
  "loss": 1.4183,
220
  "step": 300
221
+ },
222
+ {
223
+ "epoch": 0.124,
224
+ "grad_norm": 22.46800422668457,
225
+ "learning_rate": 4.3967935871743486e-05,
226
+ "loss": 1.1226,
227
+ "step": 310
228
+ },
229
+ {
230
+ "epoch": 0.128,
231
+ "grad_norm": 24.424856185913086,
232
+ "learning_rate": 4.3767535070140284e-05,
233
+ "loss": 1.3413,
234
+ "step": 320
235
+ },
236
+ {
237
+ "epoch": 0.132,
238
+ "grad_norm": 14.698283195495605,
239
+ "learning_rate": 4.3567134268537076e-05,
240
+ "loss": 1.2009,
241
+ "step": 330
242
+ },
243
+ {
244
+ "epoch": 0.136,
245
+ "grad_norm": 23.5820369720459,
246
+ "learning_rate": 4.336673346693387e-05,
247
+ "loss": 1.0799,
248
+ "step": 340
249
+ },
250
+ {
251
+ "epoch": 0.14,
252
+ "grad_norm": 26.510631561279297,
253
+ "learning_rate": 4.316633266533066e-05,
254
+ "loss": 1.0801,
255
+ "step": 350
256
+ },
257
+ {
258
+ "epoch": 0.144,
259
+ "grad_norm": 18.498275756835938,
260
+ "learning_rate": 4.296593186372745e-05,
261
+ "loss": 1.1631,
262
+ "step": 360
263
+ },
264
+ {
265
+ "epoch": 0.148,
266
+ "grad_norm": 35.2937126159668,
267
+ "learning_rate": 4.2765531062124256e-05,
268
+ "loss": 1.3777,
269
+ "step": 370
270
+ },
271
+ {
272
+ "epoch": 0.152,
273
+ "grad_norm": 40.05356216430664,
274
+ "learning_rate": 4.256513026052105e-05,
275
+ "loss": 1.1189,
276
+ "step": 380
277
+ },
278
+ {
279
+ "epoch": 0.156,
280
+ "grad_norm": 18.918344497680664,
281
+ "learning_rate": 4.236472945891784e-05,
282
+ "loss": 1.0721,
283
+ "step": 390
284
+ },
285
+ {
286
+ "epoch": 0.16,
287
+ "grad_norm": 20.29583168029785,
288
+ "learning_rate": 4.216432865731463e-05,
289
+ "loss": 0.9337,
290
+ "step": 400
291
  }
292
  ],
293
  "logging_steps": 10,
 
307
  "attributes": {}
308
  }
309
  },
310
+ "total_flos": 7213993175126016.0,
311
  "train_batch_size": 2,
312
  "trial_name": null,
313
  "trial_params": null