EYEDOL commited on
Commit
23b5905
·
verified ·
1 Parent(s): 58a3c54

Upload folder using huggingface_hub

Browse files
checkpoint-200/config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "activation_function": "gelu",
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "WhisperForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "begin_suppress_tokens": null,
10
+ "bos_token_id": 50257,
11
+ "classifier_proj_size": 256,
12
+ "d_model": 768,
13
+ "decoder_attention_heads": 12,
14
+ "decoder_ffn_dim": 3072,
15
+ "decoder_layerdrop": 0.0,
16
+ "decoder_layers": 12,
17
+ "decoder_start_token_id": 50258,
18
+ "dropout": 0.0,
19
+ "encoder_attention_heads": 12,
20
+ "encoder_ffn_dim": 3072,
21
+ "encoder_layerdrop": 0.0,
22
+ "encoder_layers": 12,
23
+ "eos_token_id": 50257,
24
+ "forced_decoder_ids": null,
25
+ "init_std": 0.02,
26
+ "is_encoder_decoder": true,
27
+ "mask_feature_length": 10,
28
+ "mask_feature_min_masks": 0,
29
+ "mask_feature_prob": 0.0,
30
+ "mask_time_length": 10,
31
+ "mask_time_min_masks": 2,
32
+ "mask_time_prob": 0.05,
33
+ "max_length": null,
34
+ "max_source_positions": 1500,
35
+ "max_target_positions": 448,
36
+ "median_filter_width": 7,
37
+ "model_type": "whisper",
38
+ "num_hidden_layers": 12,
39
+ "num_mel_bins": 80,
40
+ "pad_token_id": 50257,
41
+ "scale_embedding": false,
42
+ "torch_dtype": "float32",
43
+ "transformers_version": "4.51.3",
44
+ "use_cache": false,
45
+ "use_weighted_layer_sum": false,
46
+ "vocab_size": 51865
47
+ }
checkpoint-200/generation_config.json ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 5,
5
+ 3
6
+ ],
7
+ [
8
+ 5,
9
+ 9
10
+ ],
11
+ [
12
+ 8,
13
+ 0
14
+ ],
15
+ [
16
+ 8,
17
+ 4
18
+ ],
19
+ [
20
+ 8,
21
+ 7
22
+ ],
23
+ [
24
+ 8,
25
+ 8
26
+ ],
27
+ [
28
+ 9,
29
+ 0
30
+ ],
31
+ [
32
+ 9,
33
+ 7
34
+ ],
35
+ [
36
+ 9,
37
+ 9
38
+ ],
39
+ [
40
+ 10,
41
+ 5
42
+ ]
43
+ ],
44
+ "begin_suppress_tokens": [
45
+ 220,
46
+ 50257
47
+ ],
48
+ "bos_token_id": 50257,
49
+ "decoder_start_token_id": 50258,
50
+ "eos_token_id": 50257,
51
+ "forced_decoder_ids": [
52
+ [
53
+ 1,
54
+ null
55
+ ],
56
+ [
57
+ 2,
58
+ 50359
59
+ ]
60
+ ],
61
+ "is_multilingual": true,
62
+ "lang_to_id": {
63
+ "<|af|>": 50327,
64
+ "<|am|>": 50334,
65
+ "<|ar|>": 50272,
66
+ "<|as|>": 50350,
67
+ "<|az|>": 50304,
68
+ "<|ba|>": 50355,
69
+ "<|be|>": 50330,
70
+ "<|bg|>": 50292,
71
+ "<|bn|>": 50302,
72
+ "<|bo|>": 50347,
73
+ "<|br|>": 50309,
74
+ "<|bs|>": 50315,
75
+ "<|ca|>": 50270,
76
+ "<|cs|>": 50283,
77
+ "<|cy|>": 50297,
78
+ "<|da|>": 50285,
79
+ "<|de|>": 50261,
80
+ "<|el|>": 50281,
81
+ "<|en|>": 50259,
82
+ "<|es|>": 50262,
83
+ "<|et|>": 50307,
84
+ "<|eu|>": 50310,
85
+ "<|fa|>": 50300,
86
+ "<|fi|>": 50277,
87
+ "<|fo|>": 50338,
88
+ "<|fr|>": 50265,
89
+ "<|gl|>": 50319,
90
+ "<|gu|>": 50333,
91
+ "<|haw|>": 50352,
92
+ "<|ha|>": 50354,
93
+ "<|he|>": 50279,
94
+ "<|hi|>": 50276,
95
+ "<|hr|>": 50291,
96
+ "<|ht|>": 50339,
97
+ "<|hu|>": 50286,
98
+ "<|hy|>": 50312,
99
+ "<|id|>": 50275,
100
+ "<|is|>": 50311,
101
+ "<|it|>": 50274,
102
+ "<|ja|>": 50266,
103
+ "<|jw|>": 50356,
104
+ "<|ka|>": 50329,
105
+ "<|kk|>": 50316,
106
+ "<|km|>": 50323,
107
+ "<|kn|>": 50306,
108
+ "<|ko|>": 50264,
109
+ "<|la|>": 50294,
110
+ "<|lb|>": 50345,
111
+ "<|ln|>": 50353,
112
+ "<|lo|>": 50336,
113
+ "<|lt|>": 50293,
114
+ "<|lv|>": 50301,
115
+ "<|mg|>": 50349,
116
+ "<|mi|>": 50295,
117
+ "<|mk|>": 50308,
118
+ "<|ml|>": 50296,
119
+ "<|mn|>": 50314,
120
+ "<|mr|>": 50320,
121
+ "<|ms|>": 50282,
122
+ "<|mt|>": 50343,
123
+ "<|my|>": 50346,
124
+ "<|ne|>": 50313,
125
+ "<|nl|>": 50271,
126
+ "<|nn|>": 50342,
127
+ "<|no|>": 50288,
128
+ "<|oc|>": 50328,
129
+ "<|pa|>": 50321,
130
+ "<|pl|>": 50269,
131
+ "<|ps|>": 50340,
132
+ "<|pt|>": 50267,
133
+ "<|ro|>": 50284,
134
+ "<|ru|>": 50263,
135
+ "<|sa|>": 50344,
136
+ "<|sd|>": 50332,
137
+ "<|si|>": 50322,
138
+ "<|sk|>": 50298,
139
+ "<|sl|>": 50305,
140
+ "<|sn|>": 50324,
141
+ "<|so|>": 50326,
142
+ "<|sq|>": 50317,
143
+ "<|sr|>": 50303,
144
+ "<|su|>": 50357,
145
+ "<|sv|>": 50273,
146
+ "<|sw|>": 50318,
147
+ "<|ta|>": 50287,
148
+ "<|te|>": 50299,
149
+ "<|tg|>": 50331,
150
+ "<|th|>": 50289,
151
+ "<|tk|>": 50341,
152
+ "<|tl|>": 50348,
153
+ "<|tr|>": 50268,
154
+ "<|tt|>": 50351,
155
+ "<|uk|>": 50280,
156
+ "<|ur|>": 50290,
157
+ "<|uz|>": 50337,
158
+ "<|vi|>": 50278,
159
+ "<|yi|>": 50335,
160
+ "<|yo|>": 50325,
161
+ "<|zh|>": 50260
162
+ },
163
+ "max_initial_timestamp_index": 50,
164
+ "max_length": 448,
165
+ "no_timestamps_token_id": 50363,
166
+ "pad_token_id": 50257,
167
+ "prev_sot_token_id": 50361,
168
+ "return_timestamps": false,
169
+ "suppress_tokens": [],
170
+ "task_to_id": {
171
+ "transcribe": 50359,
172
+ "translate": 50358
173
+ },
174
+ "transformers_version": "4.51.3"
175
+ }
checkpoint-200/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b2892e2e8a768289512d80a4527ba6a28e97b8786cb115dc57ba64fb5dd1b35
3
+ size 966995080
checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fee27de3f002951623132d14fb9111b69495b5382d4bd6cfe5b785e99689855
3
+ size 1925064044
checkpoint-200/preprocessor_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "dither": 0.0,
4
+ "feature_extractor_type": "WhisperFeatureExtractor",
5
+ "feature_size": 80,
6
+ "hop_length": 160,
7
+ "n_fft": 400,
8
+ "n_samples": 480000,
9
+ "nb_max_frames": 3000,
10
+ "padding_side": "right",
11
+ "padding_value": 0.0,
12
+ "processor_class": "WhisperProcessor",
13
+ "return_attention_mask": false,
14
+ "sampling_rate": 16000
15
+ }
checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92ade350e84ed8edc73f8df3ecb0b81d4efe23f223f59b19ab9f901c5bd3f39f
3
+ size 14244
checkpoint-200/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:210384cc8aac9b5f42f3d07f3f98c31fe1102ba37532cf4f431d4379f4392fbb
3
+ size 988
checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b551e44d36a93f06118444de4d46a48d68025b941ad3f20a21f7cd1e45875cd
3
+ size 1064
checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 200,
3
+ "best_metric": 41.783249393769815,
4
+ "best_model_checkpoint": "./JUDIC/checkpoint-200",
5
+ "epoch": 2.3529411764705883,
6
+ "eval_steps": 200,
7
+ "global_step": 200,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.23529411764705882,
14
+ "grad_norm": 39.55274200439453,
15
+ "learning_rate": 8.000000000000001e-07,
16
+ "loss": 2.8704,
17
+ "step": 20
18
+ },
19
+ {
20
+ "epoch": 0.47058823529411764,
21
+ "grad_norm": 16.861303329467773,
22
+ "learning_rate": 1.8000000000000001e-06,
23
+ "loss": 2.4422,
24
+ "step": 40
25
+ },
26
+ {
27
+ "epoch": 0.7058823529411765,
28
+ "grad_norm": 10.871281623840332,
29
+ "learning_rate": 2.8000000000000003e-06,
30
+ "loss": 1.9044,
31
+ "step": 60
32
+ },
33
+ {
34
+ "epoch": 0.9411764705882353,
35
+ "grad_norm": 7.858098030090332,
36
+ "learning_rate": 3.8000000000000005e-06,
37
+ "loss": 1.4465,
38
+ "step": 80
39
+ },
40
+ {
41
+ "epoch": 1.1764705882352942,
42
+ "grad_norm": 7.4879231452941895,
43
+ "learning_rate": 4.800000000000001e-06,
44
+ "loss": 1.3179,
45
+ "step": 100
46
+ },
47
+ {
48
+ "epoch": 1.4117647058823528,
49
+ "grad_norm": 7.5675835609436035,
50
+ "learning_rate": 5.8e-06,
51
+ "loss": 1.0743,
52
+ "step": 120
53
+ },
54
+ {
55
+ "epoch": 1.6470588235294117,
56
+ "grad_norm": 6.898285865783691,
57
+ "learning_rate": 6.800000000000001e-06,
58
+ "loss": 1.0679,
59
+ "step": 140
60
+ },
61
+ {
62
+ "epoch": 1.8823529411764706,
63
+ "grad_norm": 7.592809677124023,
64
+ "learning_rate": 7.800000000000002e-06,
65
+ "loss": 1.0381,
66
+ "step": 160
67
+ },
68
+ {
69
+ "epoch": 2.1176470588235294,
70
+ "grad_norm": 8.085546493530273,
71
+ "learning_rate": 8.8e-06,
72
+ "loss": 0.8211,
73
+ "step": 180
74
+ },
75
+ {
76
+ "epoch": 2.3529411764705883,
77
+ "grad_norm": 5.359922885894775,
78
+ "learning_rate": 9.800000000000001e-06,
79
+ "loss": 0.7182,
80
+ "step": 200
81
+ },
82
+ {
83
+ "epoch": 2.3529411764705883,
84
+ "eval_loss": 0.6759204864501953,
85
+ "eval_runtime": 326.8541,
86
+ "eval_samples_per_second": 2.083,
87
+ "eval_steps_per_second": 0.263,
88
+ "eval_wer": 41.783249393769815,
89
+ "step": 200
90
+ }
91
+ ],
92
+ "logging_steps": 20,
93
+ "max_steps": 425,
94
+ "num_input_tokens_seen": 0,
95
+ "num_train_epochs": 5,
96
+ "save_steps": 200,
97
+ "stateful_callbacks": {
98
+ "TrainerControl": {
99
+ "args": {
100
+ "should_epoch_stop": false,
101
+ "should_evaluate": false,
102
+ "should_log": false,
103
+ "should_save": true,
104
+ "should_training_stop": false
105
+ },
106
+ "attributes": {}
107
+ }
108
+ },
109
+ "total_flos": 1.846946562048e+18,
110
+ "train_batch_size": 16,
111
+ "trial_name": null,
112
+ "trial_params": null
113
+ }
checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8560064d3c95b83f5ff13cbbf3f02ecfc5b06383d6bc753a071d8b592db8d83
3
+ size 5432
checkpoint-400/config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "activation_function": "gelu",
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "WhisperForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "begin_suppress_tokens": null,
10
+ "bos_token_id": 50257,
11
+ "classifier_proj_size": 256,
12
+ "d_model": 768,
13
+ "decoder_attention_heads": 12,
14
+ "decoder_ffn_dim": 3072,
15
+ "decoder_layerdrop": 0.0,
16
+ "decoder_layers": 12,
17
+ "decoder_start_token_id": 50258,
18
+ "dropout": 0.0,
19
+ "encoder_attention_heads": 12,
20
+ "encoder_ffn_dim": 3072,
21
+ "encoder_layerdrop": 0.0,
22
+ "encoder_layers": 12,
23
+ "eos_token_id": 50257,
24
+ "forced_decoder_ids": null,
25
+ "init_std": 0.02,
26
+ "is_encoder_decoder": true,
27
+ "mask_feature_length": 10,
28
+ "mask_feature_min_masks": 0,
29
+ "mask_feature_prob": 0.0,
30
+ "mask_time_length": 10,
31
+ "mask_time_min_masks": 2,
32
+ "mask_time_prob": 0.05,
33
+ "max_length": null,
34
+ "max_source_positions": 1500,
35
+ "max_target_positions": 448,
36
+ "median_filter_width": 7,
37
+ "model_type": "whisper",
38
+ "num_hidden_layers": 12,
39
+ "num_mel_bins": 80,
40
+ "pad_token_id": 50257,
41
+ "scale_embedding": false,
42
+ "torch_dtype": "float32",
43
+ "transformers_version": "4.51.3",
44
+ "use_cache": false,
45
+ "use_weighted_layer_sum": false,
46
+ "vocab_size": 51865
47
+ }
checkpoint-400/generation_config.json ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 5,
5
+ 3
6
+ ],
7
+ [
8
+ 5,
9
+ 9
10
+ ],
11
+ [
12
+ 8,
13
+ 0
14
+ ],
15
+ [
16
+ 8,
17
+ 4
18
+ ],
19
+ [
20
+ 8,
21
+ 7
22
+ ],
23
+ [
24
+ 8,
25
+ 8
26
+ ],
27
+ [
28
+ 9,
29
+ 0
30
+ ],
31
+ [
32
+ 9,
33
+ 7
34
+ ],
35
+ [
36
+ 9,
37
+ 9
38
+ ],
39
+ [
40
+ 10,
41
+ 5
42
+ ]
43
+ ],
44
+ "begin_suppress_tokens": [
45
+ 220,
46
+ 50257
47
+ ],
48
+ "bos_token_id": 50257,
49
+ "decoder_start_token_id": 50258,
50
+ "eos_token_id": 50257,
51
+ "forced_decoder_ids": [
52
+ [
53
+ 1,
54
+ null
55
+ ],
56
+ [
57
+ 2,
58
+ 50359
59
+ ]
60
+ ],
61
+ "is_multilingual": true,
62
+ "lang_to_id": {
63
+ "<|af|>": 50327,
64
+ "<|am|>": 50334,
65
+ "<|ar|>": 50272,
66
+ "<|as|>": 50350,
67
+ "<|az|>": 50304,
68
+ "<|ba|>": 50355,
69
+ "<|be|>": 50330,
70
+ "<|bg|>": 50292,
71
+ "<|bn|>": 50302,
72
+ "<|bo|>": 50347,
73
+ "<|br|>": 50309,
74
+ "<|bs|>": 50315,
75
+ "<|ca|>": 50270,
76
+ "<|cs|>": 50283,
77
+ "<|cy|>": 50297,
78
+ "<|da|>": 50285,
79
+ "<|de|>": 50261,
80
+ "<|el|>": 50281,
81
+ "<|en|>": 50259,
82
+ "<|es|>": 50262,
83
+ "<|et|>": 50307,
84
+ "<|eu|>": 50310,
85
+ "<|fa|>": 50300,
86
+ "<|fi|>": 50277,
87
+ "<|fo|>": 50338,
88
+ "<|fr|>": 50265,
89
+ "<|gl|>": 50319,
90
+ "<|gu|>": 50333,
91
+ "<|haw|>": 50352,
92
+ "<|ha|>": 50354,
93
+ "<|he|>": 50279,
94
+ "<|hi|>": 50276,
95
+ "<|hr|>": 50291,
96
+ "<|ht|>": 50339,
97
+ "<|hu|>": 50286,
98
+ "<|hy|>": 50312,
99
+ "<|id|>": 50275,
100
+ "<|is|>": 50311,
101
+ "<|it|>": 50274,
102
+ "<|ja|>": 50266,
103
+ "<|jw|>": 50356,
104
+ "<|ka|>": 50329,
105
+ "<|kk|>": 50316,
106
+ "<|km|>": 50323,
107
+ "<|kn|>": 50306,
108
+ "<|ko|>": 50264,
109
+ "<|la|>": 50294,
110
+ "<|lb|>": 50345,
111
+ "<|ln|>": 50353,
112
+ "<|lo|>": 50336,
113
+ "<|lt|>": 50293,
114
+ "<|lv|>": 50301,
115
+ "<|mg|>": 50349,
116
+ "<|mi|>": 50295,
117
+ "<|mk|>": 50308,
118
+ "<|ml|>": 50296,
119
+ "<|mn|>": 50314,
120
+ "<|mr|>": 50320,
121
+ "<|ms|>": 50282,
122
+ "<|mt|>": 50343,
123
+ "<|my|>": 50346,
124
+ "<|ne|>": 50313,
125
+ "<|nl|>": 50271,
126
+ "<|nn|>": 50342,
127
+ "<|no|>": 50288,
128
+ "<|oc|>": 50328,
129
+ "<|pa|>": 50321,
130
+ "<|pl|>": 50269,
131
+ "<|ps|>": 50340,
132
+ "<|pt|>": 50267,
133
+ "<|ro|>": 50284,
134
+ "<|ru|>": 50263,
135
+ "<|sa|>": 50344,
136
+ "<|sd|>": 50332,
137
+ "<|si|>": 50322,
138
+ "<|sk|>": 50298,
139
+ "<|sl|>": 50305,
140
+ "<|sn|>": 50324,
141
+ "<|so|>": 50326,
142
+ "<|sq|>": 50317,
143
+ "<|sr|>": 50303,
144
+ "<|su|>": 50357,
145
+ "<|sv|>": 50273,
146
+ "<|sw|>": 50318,
147
+ "<|ta|>": 50287,
148
+ "<|te|>": 50299,
149
+ "<|tg|>": 50331,
150
+ "<|th|>": 50289,
151
+ "<|tk|>": 50341,
152
+ "<|tl|>": 50348,
153
+ "<|tr|>": 50268,
154
+ "<|tt|>": 50351,
155
+ "<|uk|>": 50280,
156
+ "<|ur|>": 50290,
157
+ "<|uz|>": 50337,
158
+ "<|vi|>": 50278,
159
+ "<|yi|>": 50335,
160
+ "<|yo|>": 50325,
161
+ "<|zh|>": 50260
162
+ },
163
+ "max_initial_timestamp_index": 50,
164
+ "max_length": 448,
165
+ "no_timestamps_token_id": 50363,
166
+ "pad_token_id": 50257,
167
+ "prev_sot_token_id": 50361,
168
+ "return_timestamps": false,
169
+ "suppress_tokens": [],
170
+ "task_to_id": {
171
+ "transcribe": 50359,
172
+ "translate": 50358
173
+ },
174
+ "transformers_version": "4.51.3"
175
+ }
checkpoint-400/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ccf6e0b18e00966cc3aa9593f5bc67a572fe68bf2f1c193e3a9f23842ef250c
3
+ size 966995080
checkpoint-400/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31770e956f5864ecbeb44af1a8424c7ec91ab93053c2e7fc7840ec0c5aa72442
3
+ size 1925064044
checkpoint-400/preprocessor_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "dither": 0.0,
4
+ "feature_extractor_type": "WhisperFeatureExtractor",
5
+ "feature_size": 80,
6
+ "hop_length": 160,
7
+ "n_fft": 400,
8
+ "n_samples": 480000,
9
+ "nb_max_frames": 3000,
10
+ "padding_side": "right",
11
+ "padding_value": 0.0,
12
+ "processor_class": "WhisperProcessor",
13
+ "return_attention_mask": false,
14
+ "sampling_rate": 16000
15
+ }
checkpoint-400/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3327069f4314534e1155a5e53b9e4445074a676d823a310baf6fdcb1c7c4b00a
3
+ size 14244
checkpoint-400/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:643802dd3517303465f449d0a6e794d1e1488b20ad8fa83264855b4ced649eac
3
+ size 988
checkpoint-400/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98c75408be126709cd3a95cfde165c89fc584aa0301e5e1c94cc95f4373b1d92
3
+ size 1064
checkpoint-400/trainer_state.json ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 400,
3
+ "best_metric": 37.20698874588074,
4
+ "best_model_checkpoint": "./JUDIC/checkpoint-400",
5
+ "epoch": 4.705882352941177,
6
+ "eval_steps": 200,
7
+ "global_step": 400,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.23529411764705882,
14
+ "grad_norm": 39.55274200439453,
15
+ "learning_rate": 8.000000000000001e-07,
16
+ "loss": 2.8704,
17
+ "step": 20
18
+ },
19
+ {
20
+ "epoch": 0.47058823529411764,
21
+ "grad_norm": 16.861303329467773,
22
+ "learning_rate": 1.8000000000000001e-06,
23
+ "loss": 2.4422,
24
+ "step": 40
25
+ },
26
+ {
27
+ "epoch": 0.7058823529411765,
28
+ "grad_norm": 10.871281623840332,
29
+ "learning_rate": 2.8000000000000003e-06,
30
+ "loss": 1.9044,
31
+ "step": 60
32
+ },
33
+ {
34
+ "epoch": 0.9411764705882353,
35
+ "grad_norm": 7.858098030090332,
36
+ "learning_rate": 3.8000000000000005e-06,
37
+ "loss": 1.4465,
38
+ "step": 80
39
+ },
40
+ {
41
+ "epoch": 1.1764705882352942,
42
+ "grad_norm": 7.4879231452941895,
43
+ "learning_rate": 4.800000000000001e-06,
44
+ "loss": 1.3179,
45
+ "step": 100
46
+ },
47
+ {
48
+ "epoch": 1.4117647058823528,
49
+ "grad_norm": 7.5675835609436035,
50
+ "learning_rate": 5.8e-06,
51
+ "loss": 1.0743,
52
+ "step": 120
53
+ },
54
+ {
55
+ "epoch": 1.6470588235294117,
56
+ "grad_norm": 6.898285865783691,
57
+ "learning_rate": 6.800000000000001e-06,
58
+ "loss": 1.0679,
59
+ "step": 140
60
+ },
61
+ {
62
+ "epoch": 1.8823529411764706,
63
+ "grad_norm": 7.592809677124023,
64
+ "learning_rate": 7.800000000000002e-06,
65
+ "loss": 1.0381,
66
+ "step": 160
67
+ },
68
+ {
69
+ "epoch": 2.1176470588235294,
70
+ "grad_norm": 8.085546493530273,
71
+ "learning_rate": 8.8e-06,
72
+ "loss": 0.8211,
73
+ "step": 180
74
+ },
75
+ {
76
+ "epoch": 2.3529411764705883,
77
+ "grad_norm": 5.359922885894775,
78
+ "learning_rate": 9.800000000000001e-06,
79
+ "loss": 0.7182,
80
+ "step": 200
81
+ },
82
+ {
83
+ "epoch": 2.3529411764705883,
84
+ "eval_loss": 0.6759204864501953,
85
+ "eval_runtime": 326.8541,
86
+ "eval_samples_per_second": 2.083,
87
+ "eval_steps_per_second": 0.263,
88
+ "eval_wer": 41.783249393769815,
89
+ "step": 200
90
+ },
91
+ {
92
+ "epoch": 2.588235294117647,
93
+ "grad_norm": 3.346689462661743,
94
+ "learning_rate": 9.28888888888889e-06,
95
+ "loss": 0.7775,
96
+ "step": 220
97
+ },
98
+ {
99
+ "epoch": 2.8235294117647056,
100
+ "grad_norm": 4.038958549499512,
101
+ "learning_rate": 8.400000000000001e-06,
102
+ "loss": 0.6748,
103
+ "step": 240
104
+ },
105
+ {
106
+ "epoch": 3.0588235294117645,
107
+ "grad_norm": 3.325453758239746,
108
+ "learning_rate": 7.511111111111111e-06,
109
+ "loss": 0.66,
110
+ "step": 260
111
+ },
112
+ {
113
+ "epoch": 3.2941176470588234,
114
+ "grad_norm": 3.325045108795166,
115
+ "learning_rate": 6.6222222222222236e-06,
116
+ "loss": 0.5344,
117
+ "step": 280
118
+ },
119
+ {
120
+ "epoch": 3.5294117647058822,
121
+ "grad_norm": 3.9428045749664307,
122
+ "learning_rate": 5.733333333333334e-06,
123
+ "loss": 0.5228,
124
+ "step": 300
125
+ },
126
+ {
127
+ "epoch": 3.764705882352941,
128
+ "grad_norm": 3.552807092666626,
129
+ "learning_rate": 4.8444444444444446e-06,
130
+ "loss": 0.5771,
131
+ "step": 320
132
+ },
133
+ {
134
+ "epoch": 4.0,
135
+ "grad_norm": 4.557582378387451,
136
+ "learning_rate": 3.955555555555556e-06,
137
+ "loss": 0.4784,
138
+ "step": 340
139
+ },
140
+ {
141
+ "epoch": 4.235294117647059,
142
+ "grad_norm": 4.543866157531738,
143
+ "learning_rate": 3.066666666666667e-06,
144
+ "loss": 0.4485,
145
+ "step": 360
146
+ },
147
+ {
148
+ "epoch": 4.470588235294118,
149
+ "grad_norm": 3.4673140048980713,
150
+ "learning_rate": 2.1777777777777777e-06,
151
+ "loss": 0.4194,
152
+ "step": 380
153
+ },
154
+ {
155
+ "epoch": 4.705882352941177,
156
+ "grad_norm": 3.8203773498535156,
157
+ "learning_rate": 1.288888888888889e-06,
158
+ "loss": 0.3985,
159
+ "step": 400
160
+ },
161
+ {
162
+ "epoch": 4.705882352941177,
163
+ "eval_loss": 0.44990459084510803,
164
+ "eval_runtime": 307.4758,
165
+ "eval_samples_per_second": 2.215,
166
+ "eval_steps_per_second": 0.28,
167
+ "eval_wer": 37.20698874588074,
168
+ "step": 400
169
+ }
170
+ ],
171
+ "logging_steps": 20,
172
+ "max_steps": 425,
173
+ "num_input_tokens_seen": 0,
174
+ "num_train_epochs": 5,
175
+ "save_steps": 200,
176
+ "stateful_callbacks": {
177
+ "TrainerControl": {
178
+ "args": {
179
+ "should_epoch_stop": false,
180
+ "should_evaluate": false,
181
+ "should_log": false,
182
+ "should_save": true,
183
+ "should_training_stop": false
184
+ },
185
+ "attributes": {}
186
+ }
187
+ },
188
+ "total_flos": 3.693893124096e+18,
189
+ "train_batch_size": 16,
190
+ "trial_name": null,
191
+ "trial_params": null
192
+ }
checkpoint-400/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8560064d3c95b83f5ff13cbbf3f02ecfc5b06383d6bc753a071d8b592db8d83
3
+ size 5432
checkpoint-425/config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "activation_function": "gelu",
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "WhisperForConditionalGeneration"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "begin_suppress_tokens": null,
10
+ "bos_token_id": 50257,
11
+ "classifier_proj_size": 256,
12
+ "d_model": 768,
13
+ "decoder_attention_heads": 12,
14
+ "decoder_ffn_dim": 3072,
15
+ "decoder_layerdrop": 0.0,
16
+ "decoder_layers": 12,
17
+ "decoder_start_token_id": 50258,
18
+ "dropout": 0.0,
19
+ "encoder_attention_heads": 12,
20
+ "encoder_ffn_dim": 3072,
21
+ "encoder_layerdrop": 0.0,
22
+ "encoder_layers": 12,
23
+ "eos_token_id": 50257,
24
+ "forced_decoder_ids": null,
25
+ "init_std": 0.02,
26
+ "is_encoder_decoder": true,
27
+ "mask_feature_length": 10,
28
+ "mask_feature_min_masks": 0,
29
+ "mask_feature_prob": 0.0,
30
+ "mask_time_length": 10,
31
+ "mask_time_min_masks": 2,
32
+ "mask_time_prob": 0.05,
33
+ "max_length": null,
34
+ "max_source_positions": 1500,
35
+ "max_target_positions": 448,
36
+ "median_filter_width": 7,
37
+ "model_type": "whisper",
38
+ "num_hidden_layers": 12,
39
+ "num_mel_bins": 80,
40
+ "pad_token_id": 50257,
41
+ "scale_embedding": false,
42
+ "torch_dtype": "float32",
43
+ "transformers_version": "4.51.3",
44
+ "use_cache": false,
45
+ "use_weighted_layer_sum": false,
46
+ "vocab_size": 51865
47
+ }
checkpoint-425/generation_config.json ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 5,
5
+ 3
6
+ ],
7
+ [
8
+ 5,
9
+ 9
10
+ ],
11
+ [
12
+ 8,
13
+ 0
14
+ ],
15
+ [
16
+ 8,
17
+ 4
18
+ ],
19
+ [
20
+ 8,
21
+ 7
22
+ ],
23
+ [
24
+ 8,
25
+ 8
26
+ ],
27
+ [
28
+ 9,
29
+ 0
30
+ ],
31
+ [
32
+ 9,
33
+ 7
34
+ ],
35
+ [
36
+ 9,
37
+ 9
38
+ ],
39
+ [
40
+ 10,
41
+ 5
42
+ ]
43
+ ],
44
+ "begin_suppress_tokens": [
45
+ 220,
46
+ 50257
47
+ ],
48
+ "bos_token_id": 50257,
49
+ "decoder_start_token_id": 50258,
50
+ "eos_token_id": 50257,
51
+ "forced_decoder_ids": [
52
+ [
53
+ 1,
54
+ null
55
+ ],
56
+ [
57
+ 2,
58
+ 50359
59
+ ]
60
+ ],
61
+ "is_multilingual": true,
62
+ "lang_to_id": {
63
+ "<|af|>": 50327,
64
+ "<|am|>": 50334,
65
+ "<|ar|>": 50272,
66
+ "<|as|>": 50350,
67
+ "<|az|>": 50304,
68
+ "<|ba|>": 50355,
69
+ "<|be|>": 50330,
70
+ "<|bg|>": 50292,
71
+ "<|bn|>": 50302,
72
+ "<|bo|>": 50347,
73
+ "<|br|>": 50309,
74
+ "<|bs|>": 50315,
75
+ "<|ca|>": 50270,
76
+ "<|cs|>": 50283,
77
+ "<|cy|>": 50297,
78
+ "<|da|>": 50285,
79
+ "<|de|>": 50261,
80
+ "<|el|>": 50281,
81
+ "<|en|>": 50259,
82
+ "<|es|>": 50262,
83
+ "<|et|>": 50307,
84
+ "<|eu|>": 50310,
85
+ "<|fa|>": 50300,
86
+ "<|fi|>": 50277,
87
+ "<|fo|>": 50338,
88
+ "<|fr|>": 50265,
89
+ "<|gl|>": 50319,
90
+ "<|gu|>": 50333,
91
+ "<|haw|>": 50352,
92
+ "<|ha|>": 50354,
93
+ "<|he|>": 50279,
94
+ "<|hi|>": 50276,
95
+ "<|hr|>": 50291,
96
+ "<|ht|>": 50339,
97
+ "<|hu|>": 50286,
98
+ "<|hy|>": 50312,
99
+ "<|id|>": 50275,
100
+ "<|is|>": 50311,
101
+ "<|it|>": 50274,
102
+ "<|ja|>": 50266,
103
+ "<|jw|>": 50356,
104
+ "<|ka|>": 50329,
105
+ "<|kk|>": 50316,
106
+ "<|km|>": 50323,
107
+ "<|kn|>": 50306,
108
+ "<|ko|>": 50264,
109
+ "<|la|>": 50294,
110
+ "<|lb|>": 50345,
111
+ "<|ln|>": 50353,
112
+ "<|lo|>": 50336,
113
+ "<|lt|>": 50293,
114
+ "<|lv|>": 50301,
115
+ "<|mg|>": 50349,
116
+ "<|mi|>": 50295,
117
+ "<|mk|>": 50308,
118
+ "<|ml|>": 50296,
119
+ "<|mn|>": 50314,
120
+ "<|mr|>": 50320,
121
+ "<|ms|>": 50282,
122
+ "<|mt|>": 50343,
123
+ "<|my|>": 50346,
124
+ "<|ne|>": 50313,
125
+ "<|nl|>": 50271,
126
+ "<|nn|>": 50342,
127
+ "<|no|>": 50288,
128
+ "<|oc|>": 50328,
129
+ "<|pa|>": 50321,
130
+ "<|pl|>": 50269,
131
+ "<|ps|>": 50340,
132
+ "<|pt|>": 50267,
133
+ "<|ro|>": 50284,
134
+ "<|ru|>": 50263,
135
+ "<|sa|>": 50344,
136
+ "<|sd|>": 50332,
137
+ "<|si|>": 50322,
138
+ "<|sk|>": 50298,
139
+ "<|sl|>": 50305,
140
+ "<|sn|>": 50324,
141
+ "<|so|>": 50326,
142
+ "<|sq|>": 50317,
143
+ "<|sr|>": 50303,
144
+ "<|su|>": 50357,
145
+ "<|sv|>": 50273,
146
+ "<|sw|>": 50318,
147
+ "<|ta|>": 50287,
148
+ "<|te|>": 50299,
149
+ "<|tg|>": 50331,
150
+ "<|th|>": 50289,
151
+ "<|tk|>": 50341,
152
+ "<|tl|>": 50348,
153
+ "<|tr|>": 50268,
154
+ "<|tt|>": 50351,
155
+ "<|uk|>": 50280,
156
+ "<|ur|>": 50290,
157
+ "<|uz|>": 50337,
158
+ "<|vi|>": 50278,
159
+ "<|yi|>": 50335,
160
+ "<|yo|>": 50325,
161
+ "<|zh|>": 50260
162
+ },
163
+ "max_initial_timestamp_index": 50,
164
+ "max_length": 448,
165
+ "no_timestamps_token_id": 50363,
166
+ "pad_token_id": 50257,
167
+ "prev_sot_token_id": 50361,
168
+ "return_timestamps": false,
169
+ "suppress_tokens": [],
170
+ "task_to_id": {
171
+ "transcribe": 50359,
172
+ "translate": 50358
173
+ },
174
+ "transformers_version": "4.51.3"
175
+ }
checkpoint-425/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa0580286b2265f3e3c6fca4171d309f706b3b965875e5ec6b26415a2773d337
3
+ size 966995080
checkpoint-425/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c33a446caff0c3f938fb4f078e3ce9835bc7dd1a206d454322d7c157072a2575
3
+ size 1925064044
checkpoint-425/preprocessor_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "dither": 0.0,
4
+ "feature_extractor_type": "WhisperFeatureExtractor",
5
+ "feature_size": 80,
6
+ "hop_length": 160,
7
+ "n_fft": 400,
8
+ "n_samples": 480000,
9
+ "nb_max_frames": 3000,
10
+ "padding_side": "right",
11
+ "padding_value": 0.0,
12
+ "processor_class": "WhisperProcessor",
13
+ "return_attention_mask": false,
14
+ "sampling_rate": 16000
15
+ }
checkpoint-425/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c56071df9f6ddfdf7711921b1c1816d748c20bacb6e7781b8852aa85b8e0a48e
3
+ size 14244
checkpoint-425/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ffebbfa87d047661b238e4e5058512285e9e08a971a086dfbc78a1c13249fe0
3
+ size 988
checkpoint-425/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe9622f19ef82ce503b99901229c61b1c7b372a9080192b99b005bd384022fda
3
+ size 1064
checkpoint-425/trainer_state.json ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 400,
3
+ "best_metric": 37.20698874588074,
4
+ "best_model_checkpoint": "./JUDIC/checkpoint-400",
5
+ "epoch": 5.0,
6
+ "eval_steps": 200,
7
+ "global_step": 425,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.23529411764705882,
14
+ "grad_norm": 39.55274200439453,
15
+ "learning_rate": 8.000000000000001e-07,
16
+ "loss": 2.8704,
17
+ "step": 20
18
+ },
19
+ {
20
+ "epoch": 0.47058823529411764,
21
+ "grad_norm": 16.861303329467773,
22
+ "learning_rate": 1.8000000000000001e-06,
23
+ "loss": 2.4422,
24
+ "step": 40
25
+ },
26
+ {
27
+ "epoch": 0.7058823529411765,
28
+ "grad_norm": 10.871281623840332,
29
+ "learning_rate": 2.8000000000000003e-06,
30
+ "loss": 1.9044,
31
+ "step": 60
32
+ },
33
+ {
34
+ "epoch": 0.9411764705882353,
35
+ "grad_norm": 7.858098030090332,
36
+ "learning_rate": 3.8000000000000005e-06,
37
+ "loss": 1.4465,
38
+ "step": 80
39
+ },
40
+ {
41
+ "epoch": 1.1764705882352942,
42
+ "grad_norm": 7.4879231452941895,
43
+ "learning_rate": 4.800000000000001e-06,
44
+ "loss": 1.3179,
45
+ "step": 100
46
+ },
47
+ {
48
+ "epoch": 1.4117647058823528,
49
+ "grad_norm": 7.5675835609436035,
50
+ "learning_rate": 5.8e-06,
51
+ "loss": 1.0743,
52
+ "step": 120
53
+ },
54
+ {
55
+ "epoch": 1.6470588235294117,
56
+ "grad_norm": 6.898285865783691,
57
+ "learning_rate": 6.800000000000001e-06,
58
+ "loss": 1.0679,
59
+ "step": 140
60
+ },
61
+ {
62
+ "epoch": 1.8823529411764706,
63
+ "grad_norm": 7.592809677124023,
64
+ "learning_rate": 7.800000000000002e-06,
65
+ "loss": 1.0381,
66
+ "step": 160
67
+ },
68
+ {
69
+ "epoch": 2.1176470588235294,
70
+ "grad_norm": 8.085546493530273,
71
+ "learning_rate": 8.8e-06,
72
+ "loss": 0.8211,
73
+ "step": 180
74
+ },
75
+ {
76
+ "epoch": 2.3529411764705883,
77
+ "grad_norm": 5.359922885894775,
78
+ "learning_rate": 9.800000000000001e-06,
79
+ "loss": 0.7182,
80
+ "step": 200
81
+ },
82
+ {
83
+ "epoch": 2.3529411764705883,
84
+ "eval_loss": 0.6759204864501953,
85
+ "eval_runtime": 326.8541,
86
+ "eval_samples_per_second": 2.083,
87
+ "eval_steps_per_second": 0.263,
88
+ "eval_wer": 41.783249393769815,
89
+ "step": 200
90
+ },
91
+ {
92
+ "epoch": 2.588235294117647,
93
+ "grad_norm": 3.346689462661743,
94
+ "learning_rate": 9.28888888888889e-06,
95
+ "loss": 0.7775,
96
+ "step": 220
97
+ },
98
+ {
99
+ "epoch": 2.8235294117647056,
100
+ "grad_norm": 4.038958549499512,
101
+ "learning_rate": 8.400000000000001e-06,
102
+ "loss": 0.6748,
103
+ "step": 240
104
+ },
105
+ {
106
+ "epoch": 3.0588235294117645,
107
+ "grad_norm": 3.325453758239746,
108
+ "learning_rate": 7.511111111111111e-06,
109
+ "loss": 0.66,
110
+ "step": 260
111
+ },
112
+ {
113
+ "epoch": 3.2941176470588234,
114
+ "grad_norm": 3.325045108795166,
115
+ "learning_rate": 6.6222222222222236e-06,
116
+ "loss": 0.5344,
117
+ "step": 280
118
+ },
119
+ {
120
+ "epoch": 3.5294117647058822,
121
+ "grad_norm": 3.9428045749664307,
122
+ "learning_rate": 5.733333333333334e-06,
123
+ "loss": 0.5228,
124
+ "step": 300
125
+ },
126
+ {
127
+ "epoch": 3.764705882352941,
128
+ "grad_norm": 3.552807092666626,
129
+ "learning_rate": 4.8444444444444446e-06,
130
+ "loss": 0.5771,
131
+ "step": 320
132
+ },
133
+ {
134
+ "epoch": 4.0,
135
+ "grad_norm": 4.557582378387451,
136
+ "learning_rate": 3.955555555555556e-06,
137
+ "loss": 0.4784,
138
+ "step": 340
139
+ },
140
+ {
141
+ "epoch": 4.235294117647059,
142
+ "grad_norm": 4.543866157531738,
143
+ "learning_rate": 3.066666666666667e-06,
144
+ "loss": 0.4485,
145
+ "step": 360
146
+ },
147
+ {
148
+ "epoch": 4.470588235294118,
149
+ "grad_norm": 3.4673140048980713,
150
+ "learning_rate": 2.1777777777777777e-06,
151
+ "loss": 0.4194,
152
+ "step": 380
153
+ },
154
+ {
155
+ "epoch": 4.705882352941177,
156
+ "grad_norm": 3.8203773498535156,
157
+ "learning_rate": 1.288888888888889e-06,
158
+ "loss": 0.3985,
159
+ "step": 400
160
+ },
161
+ {
162
+ "epoch": 4.705882352941177,
163
+ "eval_loss": 0.44990459084510803,
164
+ "eval_runtime": 307.4758,
165
+ "eval_samples_per_second": 2.215,
166
+ "eval_steps_per_second": 0.28,
167
+ "eval_wer": 37.20698874588074,
168
+ "step": 400
169
+ },
170
+ {
171
+ "epoch": 4.9411764705882355,
172
+ "grad_norm": 3.171309471130371,
173
+ "learning_rate": 4.0000000000000003e-07,
174
+ "loss": 0.4031,
175
+ "step": 420
176
+ }
177
+ ],
178
+ "logging_steps": 20,
179
+ "max_steps": 425,
180
+ "num_input_tokens_seen": 0,
181
+ "num_train_epochs": 5,
182
+ "save_steps": 200,
183
+ "stateful_callbacks": {
184
+ "TrainerControl": {
185
+ "args": {
186
+ "should_epoch_stop": false,
187
+ "should_evaluate": false,
188
+ "should_log": false,
189
+ "should_save": true,
190
+ "should_training_stop": true
191
+ },
192
+ "attributes": {}
193
+ }
194
+ },
195
+ "total_flos": 3.924761444352e+18,
196
+ "train_batch_size": 16,
197
+ "trial_name": null,
198
+ "trial_params": null
199
+ }
checkpoint-425/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8560064d3c95b83f5ff13cbbf3f02ecfc5b06383d6bc753a071d8b592db8d83
3
+ size 5432
runs/May01_13-12-38_cffb3ce9e37b/events.out.tfevents.1746105160.cffb3ce9e37b.5237.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fb94a22f3ef5bb996b7c4f2db485884ca43e757284cb0128215d53e20f7ae1a
3
- size 10573
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fce9f0379404ebcfadd0f71547ca192f92d282c29cdf235e64fcafa4cd77d92
3
+ size 11138