euler03 commited on
Commit
7c9f5d1
·
verified ·
1 Parent(s): 4d1611b

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a10a2ecaff875b9c46ad2bbd2fed17c2a0a46c72399b0499d9bca795a82b01a
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d52b5f85bae69987c6e3f333d957b4a00ee3c2c2c4e07e3f04dbb474f7a3832
3
  size 267832560
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ffdf6481862c29fc9f519ed97553ae9c619649345ac1473ff2b63f00a952157
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35a22e202003692875f84b686ed8ca24482f192a3e635d1468082f0bb00792ff
3
  size 535727290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e4a426a8b73c74a38f2a3b1243f7c773cf4681b425c8731e354a12e8672e330
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2847d9d71bf9a8c602c37034ef9e075859f3ed69e4a00df162b9a4a32971121
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cbe50d058b46466dc5c0d3a5f85c97b4ca24f57c286062ca922883cd2d25c9c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34672d517f99ba83bdc5002baecfd9b3f2d2fcacea6cdddb384020730eba75c1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.0,
3
  "best_model_checkpoint": "./results/checkpoint-500",
4
- "epoch": 0.17094017094017094,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -370,6 +370,732 @@
370
  "eval_samples_per_second": 319.453,
371
  "eval_steps_per_second": 19.988,
372
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  }
374
  ],
375
  "logging_steps": 10,
@@ -384,7 +1110,7 @@
384
  "early_stopping_threshold": 0.001
385
  },
386
  "attributes": {
387
- "early_stopping_patience_counter": 0
388
  }
389
  },
390
  "TrainerControl": {
@@ -398,7 +1124,7 @@
398
  "attributes": {}
399
  }
400
  },
401
- "total_flos": 264934797312000.0,
402
  "train_batch_size": 16,
403
  "trial_name": null,
404
  "trial_params": null
 
1
  {
2
  "best_metric": 0.0,
3
  "best_model_checkpoint": "./results/checkpoint-500",
4
+ "epoch": 0.5128205128205128,
5
  "eval_steps": 500,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
370
  "eval_samples_per_second": 319.453,
371
  "eval_steps_per_second": 19.988,
372
  "step": 500
373
+ },
374
+ {
375
+ "epoch": 0.17435897435897435,
376
+ "grad_norm": 0.4656302034854889,
377
+ "learning_rate": 4.709401709401709e-05,
378
+ "loss": 0.6506,
379
+ "step": 510
380
+ },
381
+ {
382
+ "epoch": 0.17777777777777778,
383
+ "grad_norm": 0.6288455724716187,
384
+ "learning_rate": 4.703703703703704e-05,
385
+ "loss": 0.6422,
386
+ "step": 520
387
+ },
388
+ {
389
+ "epoch": 0.1811965811965812,
390
+ "grad_norm": 0.39913907647132874,
391
+ "learning_rate": 4.698005698005698e-05,
392
+ "loss": 0.6146,
393
+ "step": 530
394
+ },
395
+ {
396
+ "epoch": 0.18461538461538463,
397
+ "grad_norm": 0.40889817476272583,
398
+ "learning_rate": 4.692307692307693e-05,
399
+ "loss": 0.6272,
400
+ "step": 540
401
+ },
402
+ {
403
+ "epoch": 0.18803418803418803,
404
+ "grad_norm": 0.9223109483718872,
405
+ "learning_rate": 4.686609686609687e-05,
406
+ "loss": 0.6391,
407
+ "step": 550
408
+ },
409
+ {
410
+ "epoch": 0.19145299145299147,
411
+ "grad_norm": 0.43170908093452454,
412
+ "learning_rate": 4.680911680911681e-05,
413
+ "loss": 0.6613,
414
+ "step": 560
415
+ },
416
+ {
417
+ "epoch": 0.19487179487179487,
418
+ "grad_norm": 0.6207427978515625,
419
+ "learning_rate": 4.675213675213676e-05,
420
+ "loss": 0.6471,
421
+ "step": 570
422
+ },
423
+ {
424
+ "epoch": 0.19829059829059828,
425
+ "grad_norm": 0.7672275304794312,
426
+ "learning_rate": 4.66951566951567e-05,
427
+ "loss": 0.6629,
428
+ "step": 580
429
+ },
430
+ {
431
+ "epoch": 0.20170940170940171,
432
+ "grad_norm": 0.4669424891471863,
433
+ "learning_rate": 4.6638176638176636e-05,
434
+ "loss": 0.6588,
435
+ "step": 590
436
+ },
437
+ {
438
+ "epoch": 0.20512820512820512,
439
+ "grad_norm": 0.6726049184799194,
440
+ "learning_rate": 4.6581196581196586e-05,
441
+ "loss": 0.6258,
442
+ "step": 600
443
+ },
444
+ {
445
+ "epoch": 0.20854700854700856,
446
+ "grad_norm": 0.7948060035705566,
447
+ "learning_rate": 4.652421652421652e-05,
448
+ "loss": 0.5705,
449
+ "step": 610
450
+ },
451
+ {
452
+ "epoch": 0.21196581196581196,
453
+ "grad_norm": 0.419849693775177,
454
+ "learning_rate": 4.646723646723647e-05,
455
+ "loss": 0.6468,
456
+ "step": 620
457
+ },
458
+ {
459
+ "epoch": 0.2153846153846154,
460
+ "grad_norm": 1.0143113136291504,
461
+ "learning_rate": 4.6410256410256415e-05,
462
+ "loss": 0.6297,
463
+ "step": 630
464
+ },
465
+ {
466
+ "epoch": 0.2188034188034188,
467
+ "grad_norm": 0.7109899520874023,
468
+ "learning_rate": 4.635327635327635e-05,
469
+ "loss": 0.673,
470
+ "step": 640
471
+ },
472
+ {
473
+ "epoch": 0.2222222222222222,
474
+ "grad_norm": 0.760080099105835,
475
+ "learning_rate": 4.62962962962963e-05,
476
+ "loss": 0.6227,
477
+ "step": 650
478
+ },
479
+ {
480
+ "epoch": 0.22564102564102564,
481
+ "grad_norm": 0.7442237138748169,
482
+ "learning_rate": 4.6239316239316244e-05,
483
+ "loss": 0.5715,
484
+ "step": 660
485
+ },
486
+ {
487
+ "epoch": 0.22905982905982905,
488
+ "grad_norm": 0.39145609736442566,
489
+ "learning_rate": 4.618233618233619e-05,
490
+ "loss": 0.6727,
491
+ "step": 670
492
+ },
493
+ {
494
+ "epoch": 0.23247863247863249,
495
+ "grad_norm": 0.868276059627533,
496
+ "learning_rate": 4.612535612535613e-05,
497
+ "loss": 0.6344,
498
+ "step": 680
499
+ },
500
+ {
501
+ "epoch": 0.2358974358974359,
502
+ "grad_norm": 0.6120406985282898,
503
+ "learning_rate": 4.6068376068376066e-05,
504
+ "loss": 0.5954,
505
+ "step": 690
506
+ },
507
+ {
508
+ "epoch": 0.23931623931623933,
509
+ "grad_norm": 0.5536867380142212,
510
+ "learning_rate": 4.6011396011396016e-05,
511
+ "loss": 0.6476,
512
+ "step": 700
513
+ },
514
+ {
515
+ "epoch": 0.24273504273504273,
516
+ "grad_norm": 0.4315416216850281,
517
+ "learning_rate": 4.595441595441596e-05,
518
+ "loss": 0.6215,
519
+ "step": 710
520
+ },
521
+ {
522
+ "epoch": 0.24615384615384617,
523
+ "grad_norm": 0.517528235912323,
524
+ "learning_rate": 4.5897435897435895e-05,
525
+ "loss": 0.6258,
526
+ "step": 720
527
+ },
528
+ {
529
+ "epoch": 0.24957264957264957,
530
+ "grad_norm": 1.3188592195510864,
531
+ "learning_rate": 4.5840455840455844e-05,
532
+ "loss": 0.6469,
533
+ "step": 730
534
+ },
535
+ {
536
+ "epoch": 0.252991452991453,
537
+ "grad_norm": 1.2717797756195068,
538
+ "learning_rate": 4.578347578347579e-05,
539
+ "loss": 0.5683,
540
+ "step": 740
541
+ },
542
+ {
543
+ "epoch": 0.2564102564102564,
544
+ "grad_norm": 1.0561293363571167,
545
+ "learning_rate": 4.572649572649573e-05,
546
+ "loss": 0.6769,
547
+ "step": 750
548
+ },
549
+ {
550
+ "epoch": 0.25982905982905985,
551
+ "grad_norm": 1.4157183170318604,
552
+ "learning_rate": 4.566951566951567e-05,
553
+ "loss": 0.6901,
554
+ "step": 760
555
+ },
556
+ {
557
+ "epoch": 0.26324786324786326,
558
+ "grad_norm": 0.4029109477996826,
559
+ "learning_rate": 4.5612535612535616e-05,
560
+ "loss": 0.593,
561
+ "step": 770
562
+ },
563
+ {
564
+ "epoch": 0.26666666666666666,
565
+ "grad_norm": 1.0039498805999756,
566
+ "learning_rate": 4.555555555555556e-05,
567
+ "loss": 0.6798,
568
+ "step": 780
569
+ },
570
+ {
571
+ "epoch": 0.27008547008547007,
572
+ "grad_norm": 0.6905536651611328,
573
+ "learning_rate": 4.54985754985755e-05,
574
+ "loss": 0.6352,
575
+ "step": 790
576
+ },
577
+ {
578
+ "epoch": 0.27350427350427353,
579
+ "grad_norm": 0.8582714796066284,
580
+ "learning_rate": 4.544159544159544e-05,
581
+ "loss": 0.6438,
582
+ "step": 800
583
+ },
584
+ {
585
+ "epoch": 0.27692307692307694,
586
+ "grad_norm": 0.4063926339149475,
587
+ "learning_rate": 4.538461538461539e-05,
588
+ "loss": 0.6503,
589
+ "step": 810
590
+ },
591
+ {
592
+ "epoch": 0.28034188034188035,
593
+ "grad_norm": 1.0651031732559204,
594
+ "learning_rate": 4.532763532763533e-05,
595
+ "loss": 0.6296,
596
+ "step": 820
597
+ },
598
+ {
599
+ "epoch": 0.28376068376068375,
600
+ "grad_norm": 0.618545651435852,
601
+ "learning_rate": 4.5270655270655274e-05,
602
+ "loss": 0.6695,
603
+ "step": 830
604
+ },
605
+ {
606
+ "epoch": 0.28717948717948716,
607
+ "grad_norm": 1.4270812273025513,
608
+ "learning_rate": 4.521367521367522e-05,
609
+ "loss": 0.588,
610
+ "step": 840
611
+ },
612
+ {
613
+ "epoch": 0.2905982905982906,
614
+ "grad_norm": 1.277422547340393,
615
+ "learning_rate": 4.515669515669516e-05,
616
+ "loss": 0.6822,
617
+ "step": 850
618
+ },
619
+ {
620
+ "epoch": 0.294017094017094,
621
+ "grad_norm": 0.44470494985580444,
622
+ "learning_rate": 4.50997150997151e-05,
623
+ "loss": 0.6401,
624
+ "step": 860
625
+ },
626
+ {
627
+ "epoch": 0.29743589743589743,
628
+ "grad_norm": 0.6381728053092957,
629
+ "learning_rate": 4.5042735042735046e-05,
630
+ "loss": 0.693,
631
+ "step": 870
632
+ },
633
+ {
634
+ "epoch": 0.30085470085470084,
635
+ "grad_norm": 0.4355703294277191,
636
+ "learning_rate": 4.498575498575499e-05,
637
+ "loss": 0.6083,
638
+ "step": 880
639
+ },
640
+ {
641
+ "epoch": 0.30427350427350425,
642
+ "grad_norm": 1.0187709331512451,
643
+ "learning_rate": 4.492877492877493e-05,
644
+ "loss": 0.5236,
645
+ "step": 890
646
+ },
647
+ {
648
+ "epoch": 0.3076923076923077,
649
+ "grad_norm": 0.7143679261207581,
650
+ "learning_rate": 4.4871794871794874e-05,
651
+ "loss": 0.6413,
652
+ "step": 900
653
+ },
654
+ {
655
+ "epoch": 0.3111111111111111,
656
+ "grad_norm": 1.0808229446411133,
657
+ "learning_rate": 4.481481481481482e-05,
658
+ "loss": 0.6026,
659
+ "step": 910
660
+ },
661
+ {
662
+ "epoch": 0.3145299145299145,
663
+ "grad_norm": 0.796187698841095,
664
+ "learning_rate": 4.475783475783476e-05,
665
+ "loss": 0.6812,
666
+ "step": 920
667
+ },
668
+ {
669
+ "epoch": 0.31794871794871793,
670
+ "grad_norm": 0.5163740515708923,
671
+ "learning_rate": 4.47008547008547e-05,
672
+ "loss": 0.6537,
673
+ "step": 930
674
+ },
675
+ {
676
+ "epoch": 0.3213675213675214,
677
+ "grad_norm": 0.7213220596313477,
678
+ "learning_rate": 4.4643874643874646e-05,
679
+ "loss": 0.6765,
680
+ "step": 940
681
+ },
682
+ {
683
+ "epoch": 0.3247863247863248,
684
+ "grad_norm": 0.44362661242485046,
685
+ "learning_rate": 4.458689458689459e-05,
686
+ "loss": 0.6249,
687
+ "step": 950
688
+ },
689
+ {
690
+ "epoch": 0.3282051282051282,
691
+ "grad_norm": 0.4917695224285126,
692
+ "learning_rate": 4.452991452991453e-05,
693
+ "loss": 0.63,
694
+ "step": 960
695
+ },
696
+ {
697
+ "epoch": 0.3316239316239316,
698
+ "grad_norm": 0.709846556186676,
699
+ "learning_rate": 4.4472934472934475e-05,
700
+ "loss": 0.5544,
701
+ "step": 970
702
+ },
703
+ {
704
+ "epoch": 0.335042735042735,
705
+ "grad_norm": 1.065099835395813,
706
+ "learning_rate": 4.441595441595442e-05,
707
+ "loss": 0.6338,
708
+ "step": 980
709
+ },
710
+ {
711
+ "epoch": 0.3384615384615385,
712
+ "grad_norm": 0.42223694920539856,
713
+ "learning_rate": 4.435897435897436e-05,
714
+ "loss": 0.5828,
715
+ "step": 990
716
+ },
717
+ {
718
+ "epoch": 0.3418803418803419,
719
+ "grad_norm": 1.5173028707504272,
720
+ "learning_rate": 4.4301994301994304e-05,
721
+ "loss": 0.6229,
722
+ "step": 1000
723
+ },
724
+ {
725
+ "epoch": 0.3418803418803419,
726
+ "eval_accuracy": 0.661082143772972,
727
+ "eval_f1": 0.0,
728
+ "eval_loss": 0.6458322405815125,
729
+ "eval_precision": 0.0,
730
+ "eval_recall": 0.0,
731
+ "eval_roc_auc": 0.5011399036892176,
732
+ "eval_runtime": 36.5197,
733
+ "eval_samples_per_second": 320.347,
734
+ "eval_steps_per_second": 20.044,
735
+ "step": 1000
736
+ },
737
+ {
738
+ "epoch": 0.3452991452991453,
739
+ "grad_norm": 0.8043766617774963,
740
+ "learning_rate": 4.424501424501425e-05,
741
+ "loss": 0.6463,
742
+ "step": 1010
743
+ },
744
+ {
745
+ "epoch": 0.3487179487179487,
746
+ "grad_norm": 0.6817493438720703,
747
+ "learning_rate": 4.418803418803419e-05,
748
+ "loss": 0.6266,
749
+ "step": 1020
750
+ },
751
+ {
752
+ "epoch": 0.35213675213675216,
753
+ "grad_norm": 0.6765307784080505,
754
+ "learning_rate": 4.413105413105413e-05,
755
+ "loss": 0.6203,
756
+ "step": 1030
757
+ },
758
+ {
759
+ "epoch": 0.35555555555555557,
760
+ "grad_norm": 0.6116905808448792,
761
+ "learning_rate": 4.4074074074074076e-05,
762
+ "loss": 0.5933,
763
+ "step": 1040
764
+ },
765
+ {
766
+ "epoch": 0.358974358974359,
767
+ "grad_norm": 0.3634931445121765,
768
+ "learning_rate": 4.401709401709402e-05,
769
+ "loss": 0.6612,
770
+ "step": 1050
771
+ },
772
+ {
773
+ "epoch": 0.3623931623931624,
774
+ "grad_norm": 0.8377366065979004,
775
+ "learning_rate": 4.396011396011396e-05,
776
+ "loss": 0.6933,
777
+ "step": 1060
778
+ },
779
+ {
780
+ "epoch": 0.3658119658119658,
781
+ "grad_norm": 0.7808057069778442,
782
+ "learning_rate": 4.3903133903133905e-05,
783
+ "loss": 0.6101,
784
+ "step": 1070
785
+ },
786
+ {
787
+ "epoch": 0.36923076923076925,
788
+ "grad_norm": 0.5020534992218018,
789
+ "learning_rate": 4.384615384615385e-05,
790
+ "loss": 0.6333,
791
+ "step": 1080
792
+ },
793
+ {
794
+ "epoch": 0.37264957264957266,
795
+ "grad_norm": 0.9217988848686218,
796
+ "learning_rate": 4.378917378917379e-05,
797
+ "loss": 0.652,
798
+ "step": 1090
799
+ },
800
+ {
801
+ "epoch": 0.37606837606837606,
802
+ "grad_norm": 0.426917165517807,
803
+ "learning_rate": 4.3732193732193733e-05,
804
+ "loss": 0.6776,
805
+ "step": 1100
806
+ },
807
+ {
808
+ "epoch": 0.37948717948717947,
809
+ "grad_norm": 1.00786292552948,
810
+ "learning_rate": 4.3675213675213676e-05,
811
+ "loss": 0.6308,
812
+ "step": 1110
813
+ },
814
+ {
815
+ "epoch": 0.38290598290598293,
816
+ "grad_norm": 0.5222122669219971,
817
+ "learning_rate": 4.361823361823362e-05,
818
+ "loss": 0.5881,
819
+ "step": 1120
820
+ },
821
+ {
822
+ "epoch": 0.38632478632478634,
823
+ "grad_norm": 1.309751272201538,
824
+ "learning_rate": 4.356125356125356e-05,
825
+ "loss": 0.6988,
826
+ "step": 1130
827
+ },
828
+ {
829
+ "epoch": 0.38974358974358975,
830
+ "grad_norm": 0.5627844929695129,
831
+ "learning_rate": 4.3504273504273505e-05,
832
+ "loss": 0.6396,
833
+ "step": 1140
834
+ },
835
+ {
836
+ "epoch": 0.39316239316239315,
837
+ "grad_norm": 0.40362900495529175,
838
+ "learning_rate": 4.344729344729345e-05,
839
+ "loss": 0.639,
840
+ "step": 1150
841
+ },
842
+ {
843
+ "epoch": 0.39658119658119656,
844
+ "grad_norm": 0.632331371307373,
845
+ "learning_rate": 4.339031339031339e-05,
846
+ "loss": 0.6187,
847
+ "step": 1160
848
+ },
849
+ {
850
+ "epoch": 0.4,
851
+ "grad_norm": 1.1355897188186646,
852
+ "learning_rate": 4.3333333333333334e-05,
853
+ "loss": 0.6317,
854
+ "step": 1170
855
+ },
856
+ {
857
+ "epoch": 0.40341880341880343,
858
+ "grad_norm": 0.8610725998878479,
859
+ "learning_rate": 4.327635327635328e-05,
860
+ "loss": 0.631,
861
+ "step": 1180
862
+ },
863
+ {
864
+ "epoch": 0.40683760683760684,
865
+ "grad_norm": 0.6825465559959412,
866
+ "learning_rate": 4.321937321937322e-05,
867
+ "loss": 0.6825,
868
+ "step": 1190
869
+ },
870
+ {
871
+ "epoch": 0.41025641025641024,
872
+ "grad_norm": 1.3887457847595215,
873
+ "learning_rate": 4.316239316239317e-05,
874
+ "loss": 0.6221,
875
+ "step": 1200
876
+ },
877
+ {
878
+ "epoch": 0.41367521367521365,
879
+ "grad_norm": 0.5809090733528137,
880
+ "learning_rate": 4.3105413105413106e-05,
881
+ "loss": 0.6117,
882
+ "step": 1210
883
+ },
884
+ {
885
+ "epoch": 0.4170940170940171,
886
+ "grad_norm": 0.4157603681087494,
887
+ "learning_rate": 4.304843304843305e-05,
888
+ "loss": 0.613,
889
+ "step": 1220
890
+ },
891
+ {
892
+ "epoch": 0.4205128205128205,
893
+ "grad_norm": 0.4386206269264221,
894
+ "learning_rate": 4.2991452991453e-05,
895
+ "loss": 0.6458,
896
+ "step": 1230
897
+ },
898
+ {
899
+ "epoch": 0.4239316239316239,
900
+ "grad_norm": 1.4249426126480103,
901
+ "learning_rate": 4.2934472934472935e-05,
902
+ "loss": 0.66,
903
+ "step": 1240
904
+ },
905
+ {
906
+ "epoch": 0.42735042735042733,
907
+ "grad_norm": 1.3717528581619263,
908
+ "learning_rate": 4.287749287749288e-05,
909
+ "loss": 0.6497,
910
+ "step": 1250
911
+ },
912
+ {
913
+ "epoch": 0.4307692307692308,
914
+ "grad_norm": 0.6880800724029541,
915
+ "learning_rate": 4.282051282051282e-05,
916
+ "loss": 0.6231,
917
+ "step": 1260
918
+ },
919
+ {
920
+ "epoch": 0.4341880341880342,
921
+ "grad_norm": 0.9455773234367371,
922
+ "learning_rate": 4.2763532763532764e-05,
923
+ "loss": 0.6524,
924
+ "step": 1270
925
+ },
926
+ {
927
+ "epoch": 0.4376068376068376,
928
+ "grad_norm": 1.2795006036758423,
929
+ "learning_rate": 4.270655270655271e-05,
930
+ "loss": 0.6039,
931
+ "step": 1280
932
+ },
933
+ {
934
+ "epoch": 0.441025641025641,
935
+ "grad_norm": 0.4846753776073456,
936
+ "learning_rate": 4.264957264957265e-05,
937
+ "loss": 0.6066,
938
+ "step": 1290
939
+ },
940
+ {
941
+ "epoch": 0.4444444444444444,
942
+ "grad_norm": 0.49425560235977173,
943
+ "learning_rate": 4.259259259259259e-05,
944
+ "loss": 0.6545,
945
+ "step": 1300
946
+ },
947
+ {
948
+ "epoch": 0.4478632478632479,
949
+ "grad_norm": 0.924453854560852,
950
+ "learning_rate": 4.253561253561254e-05,
951
+ "loss": 0.6406,
952
+ "step": 1310
953
+ },
954
+ {
955
+ "epoch": 0.4512820512820513,
956
+ "grad_norm": 0.46777766942977905,
957
+ "learning_rate": 4.247863247863248e-05,
958
+ "loss": 0.6275,
959
+ "step": 1320
960
+ },
961
+ {
962
+ "epoch": 0.4547008547008547,
963
+ "grad_norm": 0.7829861044883728,
964
+ "learning_rate": 4.242165242165243e-05,
965
+ "loss": 0.6445,
966
+ "step": 1330
967
+ },
968
+ {
969
+ "epoch": 0.4581196581196581,
970
+ "grad_norm": 0.6596978306770325,
971
+ "learning_rate": 4.2364672364672364e-05,
972
+ "loss": 0.648,
973
+ "step": 1340
974
+ },
975
+ {
976
+ "epoch": 0.46153846153846156,
977
+ "grad_norm": 0.9732853770256042,
978
+ "learning_rate": 4.230769230769231e-05,
979
+ "loss": 0.6738,
980
+ "step": 1350
981
+ },
982
+ {
983
+ "epoch": 0.46495726495726497,
984
+ "grad_norm": 0.4845993220806122,
985
+ "learning_rate": 4.225071225071226e-05,
986
+ "loss": 0.6464,
987
+ "step": 1360
988
+ },
989
+ {
990
+ "epoch": 0.4683760683760684,
991
+ "grad_norm": 0.40009310841560364,
992
+ "learning_rate": 4.219373219373219e-05,
993
+ "loss": 0.6193,
994
+ "step": 1370
995
+ },
996
+ {
997
+ "epoch": 0.4717948717948718,
998
+ "grad_norm": 1.296000361442566,
999
+ "learning_rate": 4.2136752136752136e-05,
1000
+ "loss": 0.608,
1001
+ "step": 1380
1002
+ },
1003
+ {
1004
+ "epoch": 0.4752136752136752,
1005
+ "grad_norm": 0.3851681351661682,
1006
+ "learning_rate": 4.2079772079772086e-05,
1007
+ "loss": 0.636,
1008
+ "step": 1390
1009
+ },
1010
+ {
1011
+ "epoch": 0.47863247863247865,
1012
+ "grad_norm": 1.5586471557617188,
1013
+ "learning_rate": 4.202279202279202e-05,
1014
+ "loss": 0.652,
1015
+ "step": 1400
1016
+ },
1017
+ {
1018
+ "epoch": 0.48205128205128206,
1019
+ "grad_norm": 1.1093754768371582,
1020
+ "learning_rate": 4.196581196581197e-05,
1021
+ "loss": 0.6397,
1022
+ "step": 1410
1023
+ },
1024
+ {
1025
+ "epoch": 0.48547008547008547,
1026
+ "grad_norm": 0.6494556665420532,
1027
+ "learning_rate": 4.190883190883191e-05,
1028
+ "loss": 0.6691,
1029
+ "step": 1420
1030
+ },
1031
+ {
1032
+ "epoch": 0.4888888888888889,
1033
+ "grad_norm": 0.6842040419578552,
1034
+ "learning_rate": 4.185185185185185e-05,
1035
+ "loss": 0.653,
1036
+ "step": 1430
1037
+ },
1038
+ {
1039
+ "epoch": 0.49230769230769234,
1040
+ "grad_norm": 0.39208441972732544,
1041
+ "learning_rate": 4.17948717948718e-05,
1042
+ "loss": 0.6303,
1043
+ "step": 1440
1044
+ },
1045
+ {
1046
+ "epoch": 0.49572649572649574,
1047
+ "grad_norm": 0.3755127787590027,
1048
+ "learning_rate": 4.1737891737891737e-05,
1049
+ "loss": 0.6619,
1050
+ "step": 1450
1051
+ },
1052
+ {
1053
+ "epoch": 0.49914529914529915,
1054
+ "grad_norm": 0.3358234167098999,
1055
+ "learning_rate": 4.168091168091168e-05,
1056
+ "loss": 0.6782,
1057
+ "step": 1460
1058
+ },
1059
+ {
1060
+ "epoch": 0.5025641025641026,
1061
+ "grad_norm": 0.30498063564300537,
1062
+ "learning_rate": 4.162393162393163e-05,
1063
+ "loss": 0.6582,
1064
+ "step": 1470
1065
+ },
1066
+ {
1067
+ "epoch": 0.505982905982906,
1068
+ "grad_norm": 0.7140593528747559,
1069
+ "learning_rate": 4.1566951566951565e-05,
1070
+ "loss": 0.6749,
1071
+ "step": 1480
1072
+ },
1073
+ {
1074
+ "epoch": 0.5094017094017094,
1075
+ "grad_norm": 0.4288971424102783,
1076
+ "learning_rate": 4.1509971509971515e-05,
1077
+ "loss": 0.6355,
1078
+ "step": 1490
1079
+ },
1080
+ {
1081
+ "epoch": 0.5128205128205128,
1082
+ "grad_norm": 0.8717936277389526,
1083
+ "learning_rate": 4.145299145299146e-05,
1084
+ "loss": 0.6258,
1085
+ "step": 1500
1086
+ },
1087
+ {
1088
+ "epoch": 0.5128205128205128,
1089
+ "eval_accuracy": 0.661082143772972,
1090
+ "eval_f1": 0.0,
1091
+ "eval_loss": 0.6402843594551086,
1092
+ "eval_precision": 0.0,
1093
+ "eval_recall": 0.0,
1094
+ "eval_roc_auc": 0.5119639749280213,
1095
+ "eval_runtime": 35.8008,
1096
+ "eval_samples_per_second": 326.78,
1097
+ "eval_steps_per_second": 20.446,
1098
+ "step": 1500
1099
  }
1100
  ],
1101
  "logging_steps": 10,
 
1110
  "early_stopping_threshold": 0.001
1111
  },
1112
  "attributes": {
1113
+ "early_stopping_patience_counter": 2
1114
  }
1115
  },
1116
  "TrainerControl": {
 
1124
  "attributes": {}
1125
  }
1126
  },
1127
+ "total_flos": 794804391936000.0,
1128
  "train_batch_size": 16,
1129
  "trial_name": null,
1130
  "trial_params": null