JacobLinCool commited on
Commit
510cf6b
·
verified ·
1 Parent(s): 94c74ae

Training in progress, step 70, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:042e0c3a918c14b727dcfc1ea925b2d3c7674a5622906b94464180e5e38cf6fe
3
  size 8668296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66f6660bd73672b3675091a0d67850217bcf7e44f530e1d6bb4f07c7bc2ae294
3
  size 8668296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec58794edaff89de4b0e55040b97e67e16b429077cbe7d7491978d263b5897d8
3
  size 17405562
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:365a83e2b9244e8cf01476b8783cea8987932fb83387898b4d10f00e6f768206
3
  size 17405562
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4aea8230fc4c9acbc31d25294372a7c4056d3b0a0b7e5461bb93deed7a35475b
3
  size 13990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23cc361cb1ce03d818b88d94dcf4157d4e7dfe0dd7c27d3f1c597b16cda9f983
3
  size 13990
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2ebe7f3850f9be52d6f93fc9f544af2e9ed508950333a07fde53b974c0f7a69
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e79f9d4e7265c13ec8562f770a4b1f0d931af3562e2beb00601dee97b33f105c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 8.267716535433072,
3
  "best_model_checkpoint": "./exp/wft-test-model/checkpoint-60",
4
- "epoch": 3.03,
5
  "eval_steps": 10,
6
- "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -480,6 +480,7 @@
480
  },
481
  {
482
  "epoch": 2.13,
 
483
  "grad_norm": 2.5712523460388184,
484
  "learning_rate": 0.00049,
485
  "loss": 0.4704,
@@ -487,6 +488,7 @@
487
  },
488
  {
489
  "epoch": 2.14,
 
490
  "grad_norm": 2.026331901550293,
491
  "learning_rate": 0.00048,
492
  "loss": 0.2717,
@@ -494,6 +496,7 @@
494
  },
495
  {
496
  "epoch": 2.15,
 
497
  "grad_norm": 1.7977256774902344,
498
  "learning_rate": 0.00047,
499
  "loss": 0.5672,
@@ -501,6 +504,7 @@
501
  },
502
  {
503
  "epoch": 2.16,
 
504
  "grad_norm": 2.375070810317993,
505
  "learning_rate": 0.00046,
506
  "loss": 0.4817,
@@ -508,6 +512,7 @@
508
  },
509
  {
510
  "epoch": 2.17,
 
511
  "grad_norm": 3.009258985519409,
512
  "learning_rate": 0.00045000000000000004,
513
  "loss": 0.533,
@@ -515,6 +520,7 @@
515
  },
516
  {
517
  "epoch": 2.18,
 
518
  "grad_norm": 2.3523738384246826,
519
  "learning_rate": 0.00044,
520
  "loss": 0.4127,
@@ -522,6 +528,7 @@
522
  },
523
  {
524
  "epoch": 2.19,
 
525
  "grad_norm": 5.932104110717773,
526
  "learning_rate": 0.00043,
527
  "loss": 0.3008,
@@ -529,6 +536,7 @@
529
  },
530
  {
531
  "epoch": 3.01,
 
532
  "grad_norm": 1.4854482412338257,
533
  "learning_rate": 0.00042,
534
  "loss": 0.3457,
@@ -536,6 +544,7 @@
536
  },
537
  {
538
  "epoch": 3.02,
 
539
  "grad_norm": 2.2406277656555176,
540
  "learning_rate": 0.00041,
541
  "loss": 0.3722,
@@ -543,6 +552,7 @@
543
  },
544
  {
545
  "epoch": 3.03,
 
546
  "grad_norm": 1.1445629596710205,
547
  "learning_rate": 0.0004,
548
  "loss": 0.2341,
@@ -561,6 +571,90 @@
561
  "eval_wer": 8.267716535433072,
562
  "eval_wer_time": 0.0059778690338134766,
563
  "step": 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
564
  }
565
  ],
566
  "logging_steps": 1,
@@ -580,7 +674,7 @@
580
  "attributes": {}
581
  }
582
  },
583
- "total_flos": 6406358999040000.0,
584
  "train_batch_size": 4,
585
  "trial_name": null,
586
  "trial_params": null
 
1
  {
2
  "best_metric": 8.267716535433072,
3
  "best_model_checkpoint": "./exp/wft-test-model/checkpoint-60",
4
+ "epoch": 3.13,
5
  "eval_steps": 10,
6
+ "global_step": 70,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
480
  },
481
  {
482
  "epoch": 2.13,
483
+ "eval_pred": null,
484
  "grad_norm": 2.5712523460388184,
485
  "learning_rate": 0.00049,
486
  "loss": 0.4704,
 
488
  },
489
  {
490
  "epoch": 2.14,
491
+ "eval_pred": null,
492
  "grad_norm": 2.026331901550293,
493
  "learning_rate": 0.00048,
494
  "loss": 0.2717,
 
496
  },
497
  {
498
  "epoch": 2.15,
499
+ "eval_pred": null,
500
  "grad_norm": 1.7977256774902344,
501
  "learning_rate": 0.00047,
502
  "loss": 0.5672,
 
504
  },
505
  {
506
  "epoch": 2.16,
507
+ "eval_pred": null,
508
  "grad_norm": 2.375070810317993,
509
  "learning_rate": 0.00046,
510
  "loss": 0.4817,
 
512
  },
513
  {
514
  "epoch": 2.17,
515
+ "eval_pred": null,
516
  "grad_norm": 3.009258985519409,
517
  "learning_rate": 0.00045000000000000004,
518
  "loss": 0.533,
 
520
  },
521
  {
522
  "epoch": 2.18,
523
+ "eval_pred": null,
524
  "grad_norm": 2.3523738384246826,
525
  "learning_rate": 0.00044,
526
  "loss": 0.4127,
 
528
  },
529
  {
530
  "epoch": 2.19,
531
+ "eval_pred": null,
532
  "grad_norm": 5.932104110717773,
533
  "learning_rate": 0.00043,
534
  "loss": 0.3008,
 
536
  },
537
  {
538
  "epoch": 3.01,
539
+ "eval_pred": null,
540
  "grad_norm": 1.4854482412338257,
541
  "learning_rate": 0.00042,
542
  "loss": 0.3457,
 
544
  },
545
  {
546
  "epoch": 3.02,
547
+ "eval_pred": null,
548
  "grad_norm": 2.2406277656555176,
549
  "learning_rate": 0.00041,
550
  "loss": 0.3722,
 
552
  },
553
  {
554
  "epoch": 3.03,
555
+ "eval_pred": null,
556
  "grad_norm": 1.1445629596710205,
557
  "learning_rate": 0.0004,
558
  "loss": 0.2341,
 
571
  "eval_wer": 8.267716535433072,
572
  "eval_wer_time": 0.0059778690338134766,
573
  "step": 60
574
+ },
575
+ {
576
+ "epoch": 3.04,
577
+ "grad_norm": 1.4139453172683716,
578
+ "learning_rate": 0.00039000000000000005,
579
+ "loss": 0.2319,
580
+ "step": 61
581
+ },
582
+ {
583
+ "epoch": 3.05,
584
+ "grad_norm": 1.311193823814392,
585
+ "learning_rate": 0.00038,
586
+ "loss": 0.235,
587
+ "step": 62
588
+ },
589
+ {
590
+ "epoch": 3.06,
591
+ "grad_norm": 1.1820735931396484,
592
+ "learning_rate": 0.00037,
593
+ "loss": 0.1914,
594
+ "step": 63
595
+ },
596
+ {
597
+ "epoch": 3.07,
598
+ "grad_norm": 1.2226531505584717,
599
+ "learning_rate": 0.00035999999999999997,
600
+ "loss": 0.3147,
601
+ "step": 64
602
+ },
603
+ {
604
+ "epoch": 3.08,
605
+ "grad_norm": 1.2730989456176758,
606
+ "learning_rate": 0.00035,
607
+ "loss": 0.2505,
608
+ "step": 65
609
+ },
610
+ {
611
+ "epoch": 3.09,
612
+ "grad_norm": 1.0159199237823486,
613
+ "learning_rate": 0.00034,
614
+ "loss": 0.15,
615
+ "step": 66
616
+ },
617
+ {
618
+ "epoch": 3.1,
619
+ "grad_norm": 1.2265311479568481,
620
+ "learning_rate": 0.00033,
621
+ "loss": 0.1765,
622
+ "step": 67
623
+ },
624
+ {
625
+ "epoch": 3.11,
626
+ "grad_norm": 0.8005520105361938,
627
+ "learning_rate": 0.00032,
628
+ "loss": 0.0663,
629
+ "step": 68
630
+ },
631
+ {
632
+ "epoch": 3.12,
633
+ "grad_norm": 1.507002353668213,
634
+ "learning_rate": 0.00031,
635
+ "loss": 0.1685,
636
+ "step": 69
637
+ },
638
+ {
639
+ "epoch": 3.13,
640
+ "grad_norm": 1.3489922285079956,
641
+ "learning_rate": 0.0003,
642
+ "loss": 0.2233,
643
+ "step": 70
644
+ },
645
+ {
646
+ "epoch": 3.13,
647
+ "eval_cer": 83.52272727272727,
648
+ "eval_cer_time": 0.0032608509063720703,
649
+ "eval_decode_time": 0.5359196662902832,
650
+ "eval_loss": 0.20264121890068054,
651
+ "eval_pred": "| i | Label | Prediction |\n| --- | --- | --- |\n| 0 | MISTER QUILTER IS THE APOSTLE OF THE MIDDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPEL | MISTER QUILTER IS THE APOSTLE OF THE MDLE CLASSES AND WE ARE GLAD TO WELCOME HIS GOSPELLLTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTH-\"-\"-\"TH-\"TH-\"-\"THTHTH-\"\"-\"-THTHTHTHTHTH\"-\"-\"-\"-\"-\"-\"-\"-\"-\"--\"\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"--\"\"-\"-\"--\"\"-=\"--\"-\"-\"\"-\"-\"--\"\"-\"- |\n| 1 | NOR IS MISTER QUILTER'S MANNER LESS INTERESTING THAN HIS MATTER | NOR IS MISTER QUILTER'S MANNER LESS INTERESTING THAN HIS MATTERMTHMMMMMTHTHTHTHTHTHTHTHTHTHTHTHTHTHTH-\"-\"\"-\"-\"-TH\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"- |\n| 2 | HE TELLS US THAT AT THIS FESTIVE SEASON OF THE YEAR WITH CHRISTMAS AND ROAST BEEF LOOMING BEFORE US SIMILES DRAWN FROM EATING AND ITS RESULTS OCCUR MOST READILY TO THE MIND | HE TELLS US THAT AT THIS FESTIVE SEASON OF THE YEAR WITH CHRISTMAS AND ROAST BEEF LOOMING BEFORE US SIMILES DRAWN FROM EATING AND ITS RESULTS OCCUR MOST READILY TO THE MINDMMMTHMTHMMMMTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTH%%THTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTH%%%%%%%%%%%%%% |\n| 3 | HE HAS GRAVE DOUBTS WHETHER SIR FREDERICK LEIGHTON'S WORK IS REALLY GREEK AFTER ALL AND CAN DISCOVER IN IT BUT LITTLE OF ROCKY ITHACA | HE HAS GRAVE DBSTS WHETHER SIR FDERICK LITTON'S WORK IS REALLY GREE AF AFTER ALL AND CAN DISCOVER IN IT BUT LITTLE OF ROCKY ITHAKATHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTH |\n| 4 | LINNELL'S PICTURES ARE A SORT OF UP GUARDS AND AT EM PAINTINGS AND MASON'S EXQUISITE IDYLLS ARE AS NATIONAL AS A JINGO POEM MISTER BIRKET FOSTER'S LANDSCAPES SMILE AT ONE MUCH IN THE SAME WAY THAT MISTER CARKER USED TO FLASH HIS TEETH AND MISTER JOHN COLLIER GIVES HIS SITTER A CHEERFUL SLAP ON THE BACK BEFORE HE SAYS LIKE A SHAMPOOER IN A TURKISH BATH NEXT MAN | LINNELL'S PICTURES ARE A SORT OF UP GUARDS AND AT EM PAINTINGS AND MASON'S EXQUISATE IDIDLLS ARE AS NATIONAL AS A JINGO POEM MISTER BIRKET FOSTER'S LANDSCAPES SMILE AT ONE MUCH IN THE SAME WAY THAT MISTER CARKER USED TO FLASH HIS TEETH AND MISTER JOHN COLLIER GIVES HIS SITTER A CHEERFUL SLAP ON THE BACK BEFORE HE SAYS LIKE A SHAMPOOER IN A TURKISH BATH NEXT MAN |\n| 5 | IT IS OBVIOUSLY UNNECESSARY FOR US TO POINT OUT HOW LUMINOUS THESE CRITICISMS ARE HOW DELICATE IN EXPRESSION | IT IS OBVIOUSLY UNNESSESSARY FOR US TO POINT OUT HOW LUMINUS THESE CRITICISMS ARE HOW DALIICATE IN EXPRESSIONTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTHTH-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\" |\n| 6 | ON THE GENERAL PRINCIPLES OF ART MISTER QUILTER WRITES WITH EQUAL LUCIDITY | ON THE GENERAL PRINCIPLES OF ART MISTER QUILTER WRITES WITH EQUAL LUCIDITYEEEEEETHETHETH \u266aD \u266aEDE======\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"-\"- |\n| 7 | PAINTING HE TELLS US IS OF A DIFFERENT QUALITY TO MATHEMATICS AND FINISH IN ART IS ADDING MORE FACT | PAINTING HE TELLS US IS OF A DIFFERENT QUALITY TO MATHEMATICS AND FINISH IN ART IS ADDING MORE FACT%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\"-\"-\"-\"-%%%\"-\"-\"-\"-\"-%\"-\"-%\"-\"-\"-\"-\"-\"-\"-\"-\"-\"- |\n| 8 | AS FOR ETCHINGS THEY ARE OF TWO KINDS BRITISH AND FOREIGN | AS FOR ETCHINGS THEY ARE OF TWO KINDS BRITISH AND FOREIGNBTHTHTHTHTHTHTHTHTH |\n| 9 | HE LAMENTS MOST BITTERLY THE DIVORCE THAT HAS BEEN MADE BETWEEN DECORATIVE ART AND WHAT WE USUALLY CALL PICTURES MAKES THE CUSTOMARY APPEAL TO THE LAST JUDGMENT AND REMINDS US THAT IN THE GREAT DAYS OF ART MICHAEL ANGELO WAS THE FURNISHING UPHOLSTERER | HE LAMENTS MOST BITTERLY THE DIVORCE THAT HAS BEEN MADE BETWEEN DECORATIVE ART AND WHAT WE USUALLY CALL PICTURES MAKES THE CUSTOMARY APPEAL TO THE LAST JGMENT AND REMINDS US THAT IN THE GREAT DAYS OF ART M ANGELO WAS THE FURNISHING UPHOLSTERER |\n",
652
+ "eval_runtime": 1.3431,
653
+ "eval_samples_per_second": 7.445,
654
+ "eval_steps_per_second": 1.489,
655
+ "eval_wer": 9.05511811023622,
656
+ "eval_wer_time": 0.006304025650024414,
657
+ "step": 70
658
  }
659
  ],
660
  "logging_steps": 1,
 
674
  "attributes": {}
675
  }
676
  },
677
+ "total_flos": 7515685232640000.0,
678
  "train_batch_size": 4,
679
  "trial_name": null,
680
  "trial_params": null