ljcamargo commited on
Commit
53eeb4a
·
verified ·
1 Parent(s): 196800d

Training in progress, step 700, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8887655314c6a7fc55cd02e4ec047c0048e2272c7fd352777bb7dacb970f435
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e9c89c5afa1545ed2abce6fb61b801899049ea15f9cad45a8a50d3ec0f9d3c1
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4544c40f9c86f284c54a215c2c1aec6b854bdd8ff99351fe18c83fabae909708
3
  size 2479955235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b0b1b8a92e051537f0d9657a16e5d51d9f5dddc753f3ecef5357cb38fad4fca
3
  size 2479955235
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e35963fbe17703d43e57c264c8bf401c049828d6ea5abe6c269f936eebec007
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d166d430557d2569c721c8dec1c8ddf3bfe3fec272b03dceb3e3268be418ae2c
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dacf7348dc62d0b10f0ec94c4cde6e6d272f324c59483c8d9d8548880fb6b96
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55fdec1914c1bee37a4826392246117bd3062dd019b2d4c1b1f435e39b62b9ce
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.24,
6
  "eval_steps": 500,
7
- "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -428,6 +428,76 @@
428
  "learning_rate": 3.826366559485531e-05,
429
  "loss": 0.9308,
430
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
  }
432
  ],
433
  "logging_steps": 10,
@@ -447,7 +517,7 @@
447
  "attributes": {}
448
  }
449
  },
450
- "total_flos": 1.081989562033152e+16,
451
  "train_batch_size": 2,
452
  "trial_name": null,
453
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.28,
6
  "eval_steps": 500,
7
+ "global_step": 700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
428
  "learning_rate": 3.826366559485531e-05,
429
  "loss": 0.9308,
430
  "step": 600
431
+ },
432
+ {
433
+ "epoch": 0.244,
434
+ "grad_norm": 20.76468849182129,
435
+ "learning_rate": 3.806270096463023e-05,
436
+ "loss": 0.8735,
437
+ "step": 610
438
+ },
439
+ {
440
+ "epoch": 0.248,
441
+ "grad_norm": 26.41815757751465,
442
+ "learning_rate": 3.786173633440515e-05,
443
+ "loss": 1.2878,
444
+ "step": 620
445
+ },
446
+ {
447
+ "epoch": 0.252,
448
+ "grad_norm": 41.02421951293945,
449
+ "learning_rate": 3.7660771704180066e-05,
450
+ "loss": 1.1299,
451
+ "step": 630
452
+ },
453
+ {
454
+ "epoch": 0.256,
455
+ "grad_norm": 17.34744644165039,
456
+ "learning_rate": 3.7459807073954985e-05,
457
+ "loss": 0.8315,
458
+ "step": 640
459
+ },
460
+ {
461
+ "epoch": 0.26,
462
+ "grad_norm": 14.293941497802734,
463
+ "learning_rate": 3.725884244372991e-05,
464
+ "loss": 0.8405,
465
+ "step": 650
466
+ },
467
+ {
468
+ "epoch": 0.264,
469
+ "grad_norm": 15.149956703186035,
470
+ "learning_rate": 3.705787781350483e-05,
471
+ "loss": 1.0297,
472
+ "step": 660
473
+ },
474
+ {
475
+ "epoch": 0.268,
476
+ "grad_norm": 17.754810333251953,
477
+ "learning_rate": 3.685691318327975e-05,
478
+ "loss": 0.9322,
479
+ "step": 670
480
+ },
481
+ {
482
+ "epoch": 0.272,
483
+ "grad_norm": 21.743669509887695,
484
+ "learning_rate": 3.6655948553054666e-05,
485
+ "loss": 0.9069,
486
+ "step": 680
487
+ },
488
+ {
489
+ "epoch": 0.276,
490
+ "grad_norm": 29.161598205566406,
491
+ "learning_rate": 3.6454983922829585e-05,
492
+ "loss": 0.8633,
493
+ "step": 690
494
+ },
495
+ {
496
+ "epoch": 0.28,
497
+ "grad_norm": 16.16539192199707,
498
+ "learning_rate": 3.6254019292604503e-05,
499
+ "loss": 0.7662,
500
+ "step": 700
501
  }
502
  ],
503
  "logging_steps": 10,
 
517
  "attributes": {}
518
  }
519
  },
520
+ "total_flos": 1.2633123965792256e+16,
521
  "train_batch_size": 2,
522
  "trial_name": null,
523
  "trial_params": null