ljcamargo commited on
Commit
110ceeb
·
verified ·
1 Parent(s): f838cfd

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f80a24ef27cb8d65adc7ffae4be6c2732033a5e9141dfd66815315f593ae70b8
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b7bdfa9432865811566d801973f8536379093191662a7081831a5e5a10ab7ea
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8da859acf7ac6600d1453a6ad96ad20e3865a29e481bed11e5055dfb249272fa
3
- size 2479123301
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98b8806bdda72b37431cd7a1864b5e940d2adca924c0255d88519f24424d35ce
3
+ size 2479955235
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f0a73f4374b5e05c786b790825aa9c223b53f3ee4551182bcb497e6cf37794f
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ff58b41c3672e659a0eb46d9ed11a0ca17415e7a2643a3ddfbaebb9f4e67f8f
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98e8d05c49d91a8f0b7f5e1dfd9876ebcb94dbdc34961d432fb69d2b41418c55
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04a7344de2b10f0c83d4bc41d4ac3ff0903cfb5c2b6ee1654e8dd79d5d353dd0
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.16,
6
  "eval_steps": 500,
7
- "global_step": 400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -288,6 +288,76 @@
288
  "learning_rate": 4.216432865731463e-05,
289
  "loss": 0.9337,
290
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  }
292
  ],
293
  "logging_steps": 10,
@@ -307,7 +377,7 @@
307
  "attributes": {}
308
  }
309
  },
310
- "total_flos": 7213993175126016.0,
311
  "train_batch_size": 2,
312
  "trial_name": null,
313
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.2,
6
  "eval_steps": 500,
7
+ "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
288
  "learning_rate": 4.216432865731463e-05,
289
  "loss": 0.9337,
290
  "step": 400
291
+ },
292
+ {
293
+ "epoch": 0.164,
294
+ "grad_norm": 19.447803497314453,
295
+ "learning_rate": 4.2081993569131834e-05,
296
+ "loss": 1.2047,
297
+ "step": 410
298
+ },
299
+ {
300
+ "epoch": 0.168,
301
+ "grad_norm": 26.82716178894043,
302
+ "learning_rate": 4.188102893890675e-05,
303
+ "loss": 1.1294,
304
+ "step": 420
305
+ },
306
+ {
307
+ "epoch": 0.172,
308
+ "grad_norm": 12.995594024658203,
309
+ "learning_rate": 4.168006430868168e-05,
310
+ "loss": 1.0033,
311
+ "step": 430
312
+ },
313
+ {
314
+ "epoch": 0.176,
315
+ "grad_norm": 21.796598434448242,
316
+ "learning_rate": 4.14790996784566e-05,
317
+ "loss": 0.8864,
318
+ "step": 440
319
+ },
320
+ {
321
+ "epoch": 0.18,
322
+ "grad_norm": 13.911988258361816,
323
+ "learning_rate": 4.1278135048231516e-05,
324
+ "loss": 0.8974,
325
+ "step": 450
326
+ },
327
+ {
328
+ "epoch": 0.184,
329
+ "grad_norm": 25.945011138916016,
330
+ "learning_rate": 4.1077170418006434e-05,
331
+ "loss": 1.261,
332
+ "step": 460
333
+ },
334
+ {
335
+ "epoch": 0.188,
336
+ "grad_norm": 19.943857192993164,
337
+ "learning_rate": 4.087620578778135e-05,
338
+ "loss": 1.0262,
339
+ "step": 470
340
+ },
341
+ {
342
+ "epoch": 0.192,
343
+ "grad_norm": 23.558696746826172,
344
+ "learning_rate": 4.067524115755627e-05,
345
+ "loss": 0.9572,
346
+ "step": 480
347
+ },
348
+ {
349
+ "epoch": 0.196,
350
+ "grad_norm": 42.70231628417969,
351
+ "learning_rate": 4.047427652733119e-05,
352
+ "loss": 1.044,
353
+ "step": 490
354
+ },
355
+ {
356
+ "epoch": 0.2,
357
+ "grad_norm": 16.41856575012207,
358
+ "learning_rate": 4.027331189710611e-05,
359
+ "loss": 1.0562,
360
+ "step": 500
361
  }
362
  ],
363
  "logging_steps": 10,
 
377
  "attributes": {}
378
  }
379
  },
380
+ "total_flos": 9020891125518336.0,
381
  "train_batch_size": 2,
382
  "trial_name": null,
383
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:494a633dff0a175cc686333ee75294c5676e89507a1657f43bb2a04f0d770f70
3
  size 6289
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a61a9de7250209bad41a9d2af614e0cacba19d7115d8d918ae2499b5ad24f2b6
3
  size 6289