End of training
Browse files
all_results.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
"eval_loss": 0.8710034489631653,
|
| 4 |
-
"eval_runtime":
|
| 5 |
-
"eval_samples_per_second": 1.
|
| 6 |
-
"eval_steps_per_second": 0.
|
| 7 |
"eval_wer": 60.05599273607748,
|
| 8 |
-
"train_loss": 0.
|
| 9 |
-
"train_runtime":
|
| 10 |
-
"train_samples_per_second":
|
| 11 |
-
"train_steps_per_second": 0.
|
| 12 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 30.0,
|
| 3 |
"eval_loss": 0.8710034489631653,
|
| 4 |
+
"eval_runtime": 337.4912,
|
| 5 |
+
"eval_samples_per_second": 1.517,
|
| 6 |
+
"eval_steps_per_second": 0.047,
|
| 7 |
"eval_wer": 60.05599273607748,
|
| 8 |
+
"train_loss": 0.04157245059808095,
|
| 9 |
+
"train_runtime": 1332.7289,
|
| 10 |
+
"train_samples_per_second": 57.626,
|
| 11 |
+
"train_steps_per_second": 0.9
|
| 12 |
}
|
eval_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
"eval_loss": 0.8710034489631653,
|
| 4 |
-
"eval_runtime":
|
| 5 |
-
"eval_samples_per_second": 1.
|
| 6 |
-
"eval_steps_per_second": 0.
|
| 7 |
"eval_wer": 60.05599273607748
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 30.0,
|
| 3 |
"eval_loss": 0.8710034489631653,
|
| 4 |
+
"eval_runtime": 337.4912,
|
| 5 |
+
"eval_samples_per_second": 1.517,
|
| 6 |
+
"eval_steps_per_second": 0.047,
|
| 7 |
"eval_wer": 60.05599273607748
|
| 8 |
}
|
runs/Dec20_19-12-39_129-146-32-172/events.out.tfevents.1671565316.129-146-32-172.141824.2
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec56a28eb4e411c0adfb0babd31b28a184b5ee3604d6eb7ab0a277729f38d64b
|
| 3 |
+
size 358
|
train_results.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
-
"train_loss": 0.
|
| 4 |
-
"train_runtime":
|
| 5 |
-
"train_samples_per_second":
|
| 6 |
-
"train_steps_per_second": 0.
|
| 7 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 30.0,
|
| 3 |
+
"train_loss": 0.04157245059808095,
|
| 4 |
+
"train_runtime": 1332.7289,
|
| 5 |
+
"train_samples_per_second": 57.626,
|
| 6 |
+
"train_steps_per_second": 0.9
|
| 7 |
}
|
trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.8710034489631653,
|
| 3 |
"best_model_checkpoint": "./checkpoint-400",
|
| 4 |
-
"epoch":
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -698,18 +698,156 @@
|
|
| 698 |
"step": 1000
|
| 699 |
},
|
| 700 |
{
|
| 701 |
-
"epoch": 25.
|
| 702 |
-
"
|
| 703 |
-
"
|
| 704 |
-
"
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 708 |
}
|
| 709 |
],
|
| 710 |
-
"max_steps":
|
| 711 |
-
"num_train_epochs":
|
| 712 |
-
"total_flos": 4.
|
| 713 |
"trial_name": null,
|
| 714 |
"trial_params": null
|
| 715 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.8710034489631653,
|
| 3 |
"best_model_checkpoint": "./checkpoint-400",
|
| 4 |
+
"epoch": 30.0,
|
| 5 |
+
"global_step": 1200,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 698 |
"step": 1000
|
| 699 |
},
|
| 700 |
{
|
| 701 |
+
"epoch": 25.25,
|
| 702 |
+
"learning_rate": 1.6581196581196582e-06,
|
| 703 |
+
"loss": 0.2704,
|
| 704 |
+
"step": 1010
|
| 705 |
+
},
|
| 706 |
+
{
|
| 707 |
+
"epoch": 25.5,
|
| 708 |
+
"learning_rate": 1.5726495726495727e-06,
|
| 709 |
+
"loss": 0.2582,
|
| 710 |
+
"step": 1020
|
| 711 |
+
},
|
| 712 |
+
{
|
| 713 |
+
"epoch": 25.75,
|
| 714 |
+
"learning_rate": 1.4871794871794873e-06,
|
| 715 |
+
"loss": 0.2634,
|
| 716 |
+
"step": 1030
|
| 717 |
+
},
|
| 718 |
+
{
|
| 719 |
+
"epoch": 26.0,
|
| 720 |
+
"learning_rate": 1.401709401709402e-06,
|
| 721 |
+
"loss": 0.2451,
|
| 722 |
+
"step": 1040
|
| 723 |
+
},
|
| 724 |
+
{
|
| 725 |
+
"epoch": 26.25,
|
| 726 |
+
"learning_rate": 1.3162393162393163e-06,
|
| 727 |
+
"loss": 0.2546,
|
| 728 |
+
"step": 1050
|
| 729 |
+
},
|
| 730 |
+
{
|
| 731 |
+
"epoch": 26.5,
|
| 732 |
+
"learning_rate": 1.230769230769231e-06,
|
| 733 |
+
"loss": 0.2604,
|
| 734 |
+
"step": 1060
|
| 735 |
+
},
|
| 736 |
+
{
|
| 737 |
+
"epoch": 26.75,
|
| 738 |
+
"learning_rate": 1.1452991452991454e-06,
|
| 739 |
+
"loss": 0.2459,
|
| 740 |
+
"step": 1070
|
| 741 |
+
},
|
| 742 |
+
{
|
| 743 |
+
"epoch": 27.0,
|
| 744 |
+
"learning_rate": 1.0598290598290598e-06,
|
| 745 |
+
"loss": 0.255,
|
| 746 |
+
"step": 1080
|
| 747 |
+
},
|
| 748 |
+
{
|
| 749 |
+
"epoch": 27.25,
|
| 750 |
+
"learning_rate": 9.743589743589745e-07,
|
| 751 |
+
"loss": 0.2505,
|
| 752 |
+
"step": 1090
|
| 753 |
+
},
|
| 754 |
+
{
|
| 755 |
+
"epoch": 27.5,
|
| 756 |
+
"learning_rate": 8.88888888888889e-07,
|
| 757 |
+
"loss": 0.2454,
|
| 758 |
+
"step": 1100
|
| 759 |
+
},
|
| 760 |
+
{
|
| 761 |
+
"epoch": 27.5,
|
| 762 |
+
"eval_loss": 0.9438697099685669,
|
| 763 |
+
"eval_runtime": 297.6469,
|
| 764 |
+
"eval_samples_per_second": 1.72,
|
| 765 |
+
"eval_steps_per_second": 0.054,
|
| 766 |
+
"eval_wer": 59.193401937046005,
|
| 767 |
+
"step": 1100
|
| 768 |
+
},
|
| 769 |
+
{
|
| 770 |
+
"epoch": 27.75,
|
| 771 |
+
"learning_rate": 8.034188034188035e-07,
|
| 772 |
+
"loss": 0.2506,
|
| 773 |
+
"step": 1110
|
| 774 |
+
},
|
| 775 |
+
{
|
| 776 |
+
"epoch": 28.0,
|
| 777 |
+
"learning_rate": 7.179487179487179e-07,
|
| 778 |
+
"loss": 0.2497,
|
| 779 |
+
"step": 1120
|
| 780 |
+
},
|
| 781 |
+
{
|
| 782 |
+
"epoch": 28.25,
|
| 783 |
+
"learning_rate": 6.324786324786325e-07,
|
| 784 |
+
"loss": 0.2477,
|
| 785 |
+
"step": 1130
|
| 786 |
+
},
|
| 787 |
+
{
|
| 788 |
+
"epoch": 28.5,
|
| 789 |
+
"learning_rate": 5.470085470085471e-07,
|
| 790 |
+
"loss": 0.2489,
|
| 791 |
+
"step": 1140
|
| 792 |
+
},
|
| 793 |
+
{
|
| 794 |
+
"epoch": 28.75,
|
| 795 |
+
"learning_rate": 4.615384615384616e-07,
|
| 796 |
+
"loss": 0.2428,
|
| 797 |
+
"step": 1150
|
| 798 |
+
},
|
| 799 |
+
{
|
| 800 |
+
"epoch": 29.0,
|
| 801 |
+
"learning_rate": 3.760683760683761e-07,
|
| 802 |
+
"loss": 0.2395,
|
| 803 |
+
"step": 1160
|
| 804 |
+
},
|
| 805 |
+
{
|
| 806 |
+
"epoch": 29.25,
|
| 807 |
+
"learning_rate": 2.905982905982906e-07,
|
| 808 |
+
"loss": 0.2438,
|
| 809 |
+
"step": 1170
|
| 810 |
+
},
|
| 811 |
+
{
|
| 812 |
+
"epoch": 29.5,
|
| 813 |
+
"learning_rate": 2.0512820512820514e-07,
|
| 814 |
+
"loss": 0.2403,
|
| 815 |
+
"step": 1180
|
| 816 |
+
},
|
| 817 |
+
{
|
| 818 |
+
"epoch": 29.75,
|
| 819 |
+
"learning_rate": 1.1965811965811967e-07,
|
| 820 |
+
"loss": 0.2469,
|
| 821 |
+
"step": 1190
|
| 822 |
+
},
|
| 823 |
+
{
|
| 824 |
+
"epoch": 30.0,
|
| 825 |
+
"learning_rate": 3.418803418803419e-08,
|
| 826 |
+
"loss": 0.2297,
|
| 827 |
+
"step": 1200
|
| 828 |
+
},
|
| 829 |
+
{
|
| 830 |
+
"epoch": 30.0,
|
| 831 |
+
"eval_loss": 0.9485259652137756,
|
| 832 |
+
"eval_runtime": 355.4454,
|
| 833 |
+
"eval_samples_per_second": 1.44,
|
| 834 |
+
"eval_steps_per_second": 0.045,
|
| 835 |
+
"eval_wer": 59.042070217917676,
|
| 836 |
+
"step": 1200
|
| 837 |
+
},
|
| 838 |
+
{
|
| 839 |
+
"epoch": 30.0,
|
| 840 |
+
"step": 1200,
|
| 841 |
+
"total_flos": 4.8586623123456e+18,
|
| 842 |
+
"train_loss": 0.04157245059808095,
|
| 843 |
+
"train_runtime": 1332.7289,
|
| 844 |
+
"train_samples_per_second": 57.626,
|
| 845 |
+
"train_steps_per_second": 0.9
|
| 846 |
}
|
| 847 |
],
|
| 848 |
+
"max_steps": 1200,
|
| 849 |
+
"num_train_epochs": 30,
|
| 850 |
+
"total_flos": 4.8586623123456e+18,
|
| 851 |
"trial_name": null,
|
| 852 |
"trial_params": null
|
| 853 |
}
|