sazzadul commited on
Commit
96c3bee
·
verified ·
1 Parent(s): 010ffd5

Upload folder using huggingface_hub

Browse files
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 74,
3
+ "<s>": 73
4
+ }
config.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/w2v-bert-2.0",
3
+ "activation_dropout": 0.0,
4
+ "adapter_act": "relu",
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": true,
8
+ "apply_spec_augment": false,
9
+ "architectures": [
10
+ "Wav2Vec2BertForCTC"
11
+ ],
12
+ "attention_dropout": 0.0,
13
+ "bos_token_id": 1,
14
+ "classifier_proj_size": 768,
15
+ "codevector_dim": 768,
16
+ "conformer_conv_dropout": 0.1,
17
+ "contrastive_logits_temperature": 0.1,
18
+ "conv_depthwise_kernel_size": 31,
19
+ "ctc_loss_reduction": "mean",
20
+ "ctc_zero_infinity": false,
21
+ "diversity_loss_weight": 0.1,
22
+ "eos_token_id": 2,
23
+ "feat_proj_dropout": 0.0,
24
+ "feat_quantizer_dropout": 0.0,
25
+ "feature_projection_input_dim": 160,
26
+ "final_dropout": 0.1,
27
+ "hidden_act": "swish",
28
+ "hidden_dropout": 0.0,
29
+ "hidden_size": 1024,
30
+ "initializer_range": 0.02,
31
+ "intermediate_size": 4096,
32
+ "layer_norm_eps": 1e-05,
33
+ "layerdrop": 0.0,
34
+ "left_max_position_embeddings": 64,
35
+ "mask_feature_length": 10,
36
+ "mask_feature_min_masks": 0,
37
+ "mask_feature_prob": 0.0,
38
+ "mask_time_length": 10,
39
+ "mask_time_min_masks": 2,
40
+ "mask_time_prob": 0.0,
41
+ "max_source_positions": 5000,
42
+ "model_type": "wav2vec2-bert",
43
+ "num_adapter_layers": 1,
44
+ "num_attention_heads": 16,
45
+ "num_codevector_groups": 2,
46
+ "num_codevectors_per_group": 320,
47
+ "num_hidden_layers": 24,
48
+ "num_negatives": 100,
49
+ "output_hidden_size": 1024,
50
+ "pad_token_id": 72,
51
+ "position_embeddings_type": "relative_key",
52
+ "proj_codevector_dim": 768,
53
+ "right_max_position_embeddings": 8,
54
+ "rotary_embedding_base": 10000,
55
+ "tdnn_dilation": [
56
+ 1,
57
+ 2,
58
+ 3,
59
+ 1,
60
+ 1
61
+ ],
62
+ "tdnn_dim": [
63
+ 512,
64
+ 512,
65
+ 512,
66
+ 512,
67
+ 1500
68
+ ],
69
+ "tdnn_kernel": [
70
+ 5,
71
+ 3,
72
+ 3,
73
+ 1,
74
+ 1
75
+ ],
76
+ "torch_dtype": "float32",
77
+ "transformers_version": "4.48.3",
78
+ "use_intermediate_ffn_before_adapter": false,
79
+ "use_weighted_layer_sum": false,
80
+ "vocab_size": 75,
81
+ "xvector_output_dim": 512
82
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bba47b902c490ecea192f4de9bac474e61f4444b40dba229ed076dd360d194e1
3
+ size 2423122060
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa0ea9be1ff85c95bd65ad8207aff88b5bed0bdec9ef2883149703aa9a903553
3
+ size 4846704874
preprocessor_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "feature_extractor_type": "SeamlessM4TFeatureExtractor",
3
+ "feature_size": 80,
4
+ "num_mel_bins": 80,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2BertProcessor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000,
10
+ "stride": 2
11
+ }
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aba132af3ea6dbc34b93312228f433796d2df55f6dafda6becc04dd290c05d59
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c2388174ab4b674f5fb1397139663389caf82ede0c5cf18d7ea0722066e622a
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "71": {
4
+ "content": "[UNK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "72": {
12
+ "content": "[PAD]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "73": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "74": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": false,
38
+ "do_lower_case": false,
39
+ "eos_token": "</s>",
40
+ "extra_special_tokens": {},
41
+ "model_max_length": 1000000000000000019884624838656,
42
+ "pad_token": "[PAD]",
43
+ "processor_class": "Wav2Vec2BertProcessor",
44
+ "replace_word_delimiter_char": " ",
45
+ "target_lang": null,
46
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
47
+ "unk_token": "[UNK]",
48
+ "word_delimiter_token": "|"
49
+ }
trainer_state.json ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.9379360998898276,
5
+ "eval_steps": 1000,
6
+ "global_step": 8000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1836210062431142,
13
+ "grad_norm": 10.457475662231445,
14
+ "learning_rate": 4.970000000000001e-06,
15
+ "loss": 3.3985,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.3672420124862284,
20
+ "grad_norm": 11.14360237121582,
21
+ "learning_rate": 9.950000000000001e-06,
22
+ "loss": 0.4592,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.3672420124862284,
27
+ "eval_loss": 0.3498600423336029,
28
+ "eval_runtime": 562.4062,
29
+ "eval_samples_per_second": 16.584,
30
+ "eval_steps_per_second": 2.073,
31
+ "eval_wer": 0.3556134972927898,
32
+ "step": 1000
33
+ },
34
+ {
35
+ "epoch": 0.5508630187293426,
36
+ "grad_norm": 5.7916131019592285,
37
+ "learning_rate": 9.312316920072535e-06,
38
+ "loss": 0.3134,
39
+ "step": 1500
40
+ },
41
+ {
42
+ "epoch": 0.7344840249724568,
43
+ "grad_norm": 9.311443328857422,
44
+ "learning_rate": 8.616264472032362e-06,
45
+ "loss": 0.2559,
46
+ "step": 2000
47
+ },
48
+ {
49
+ "epoch": 0.7344840249724568,
50
+ "eval_loss": 0.2333422601222992,
51
+ "eval_runtime": 480.3112,
52
+ "eval_samples_per_second": 19.419,
53
+ "eval_steps_per_second": 2.428,
54
+ "eval_wer": 0.2637562688653207,
55
+ "step": 2000
56
+ },
57
+ {
58
+ "epoch": 0.9181050312155711,
59
+ "grad_norm": 6.38863468170166,
60
+ "learning_rate": 7.918817129306738e-06,
61
+ "loss": 0.2296,
62
+ "step": 2500
63
+ },
64
+ {
65
+ "epoch": 1.1017260374586852,
66
+ "grad_norm": 2.376054525375366,
67
+ "learning_rate": 7.222764681266565e-06,
68
+ "loss": 0.2012,
69
+ "step": 3000
70
+ },
71
+ {
72
+ "epoch": 1.1017260374586852,
73
+ "eval_loss": 0.18642069399356842,
74
+ "eval_runtime": 502.0224,
75
+ "eval_samples_per_second": 18.579,
76
+ "eval_steps_per_second": 2.323,
77
+ "eval_wer": 0.21953654440177114,
78
+ "step": 3000
79
+ },
80
+ {
81
+ "epoch": 1.2853470437017995,
82
+ "grad_norm": 1.8745460510253906,
83
+ "learning_rate": 6.526712233226392e-06,
84
+ "loss": 0.1756,
85
+ "step": 3500
86
+ },
87
+ {
88
+ "epoch": 1.4689680499449138,
89
+ "grad_norm": 2.770623207092285,
90
+ "learning_rate": 5.829264890500767e-06,
91
+ "loss": 0.1749,
92
+ "step": 4000
93
+ },
94
+ {
95
+ "epoch": 1.4689680499449138,
96
+ "eval_loss": 0.16301214694976807,
97
+ "eval_runtime": 476.0811,
98
+ "eval_samples_per_second": 19.591,
99
+ "eval_steps_per_second": 2.449,
100
+ "eval_wer": 0.20193086924350798,
101
+ "step": 4000
102
+ },
103
+ {
104
+ "epoch": 1.6525890561880279,
105
+ "grad_norm": 1.6120364665985107,
106
+ "learning_rate": 5.131817547775143e-06,
107
+ "loss": 0.1632,
108
+ "step": 4500
109
+ },
110
+ {
111
+ "epoch": 1.8362100624311422,
112
+ "grad_norm": 2.6673367023468018,
113
+ "learning_rate": 4.438554889105873e-06,
114
+ "loss": 0.1507,
115
+ "step": 5000
116
+ },
117
+ {
118
+ "epoch": 1.8362100624311422,
119
+ "eval_loss": 0.1462232917547226,
120
+ "eval_runtime": 485.091,
121
+ "eval_samples_per_second": 19.227,
122
+ "eval_steps_per_second": 2.404,
123
+ "eval_wer": 0.18161211138907485,
124
+ "step": 5000
125
+ },
126
+ {
127
+ "epoch": 2.0198310686742564,
128
+ "grad_norm": 1.5170563459396362,
129
+ "learning_rate": 3.741107546380249e-06,
130
+ "loss": 0.1458,
131
+ "step": 5500
132
+ },
133
+ {
134
+ "epoch": 2.2034520749173705,
135
+ "grad_norm": 1.745611310005188,
136
+ "learning_rate": 3.0450550983400755e-06,
137
+ "loss": 0.1287,
138
+ "step": 6000
139
+ },
140
+ {
141
+ "epoch": 2.2034520749173705,
142
+ "eval_loss": 0.13860595226287842,
143
+ "eval_runtime": 499.1348,
144
+ "eval_samples_per_second": 18.686,
145
+ "eval_steps_per_second": 2.336,
146
+ "eval_wer": 0.17375474202224492,
147
+ "step": 6000
148
+ },
149
+ {
150
+ "epoch": 2.3870730811604846,
151
+ "grad_norm": 2.008338212966919,
152
+ "learning_rate": 2.3490026502999026e-06,
153
+ "loss": 0.1335,
154
+ "step": 6500
155
+ },
156
+ {
157
+ "epoch": 2.570694087403599,
158
+ "grad_norm": 1.838847041130066,
159
+ "learning_rate": 1.6543450969451807e-06,
160
+ "loss": 0.1255,
161
+ "step": 7000
162
+ },
163
+ {
164
+ "epoch": 2.570694087403599,
165
+ "eval_loss": 0.13410328328609467,
166
+ "eval_runtime": 496.0496,
167
+ "eval_samples_per_second": 18.803,
168
+ "eval_steps_per_second": 2.351,
169
+ "eval_wer": 0.17149971224880495,
170
+ "step": 7000
171
+ },
172
+ {
173
+ "epoch": 2.754315093646713,
174
+ "grad_norm": 3.0811703205108643,
175
+ "learning_rate": 9.582926489050077e-07,
176
+ "loss": 0.1327,
177
+ "step": 7500
178
+ },
179
+ {
180
+ "epoch": 2.9379360998898276,
181
+ "grad_norm": 2.067793369293213,
182
+ "learning_rate": 2.6084530617938345e-07,
183
+ "loss": 0.1351,
184
+ "step": 8000
185
+ },
186
+ {
187
+ "epoch": 2.9379360998898276,
188
+ "eval_loss": 0.13526670634746552,
189
+ "eval_runtime": 494.2618,
190
+ "eval_samples_per_second": 18.871,
191
+ "eval_steps_per_second": 2.359,
192
+ "eval_wer": 0.16994937927956497,
193
+ "step": 8000
194
+ }
195
+ ],
196
+ "logging_steps": 500,
197
+ "max_steps": 8169,
198
+ "num_input_tokens_seen": 0,
199
+ "num_train_epochs": 3,
200
+ "save_steps": 1000,
201
+ "stateful_callbacks": {
202
+ "TrainerControl": {
203
+ "args": {
204
+ "should_epoch_stop": false,
205
+ "should_evaluate": false,
206
+ "should_log": false,
207
+ "should_save": true,
208
+ "should_training_stop": false
209
+ },
210
+ "attributes": {}
211
+ }
212
+ },
213
+ "total_flos": 3.3447222506063737e+19,
214
+ "train_batch_size": 24,
215
+ "trial_name": null,
216
+ "trial_params": null
217
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c34a341db8e94b17a583fd2b512ac1025a7e8b114b56a19bb4330009a519aacd
3
+ size 5304
vocab.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "/": 1,
3
+ "[PAD]": 72,
4
+ "[UNK]": 71,
5
+ "|": 0,
6
+ "।": 2,
7
+ "॥": 3,
8
+ "ঁ": 4,
9
+ "ং": 5,
10
+ "ঃ": 6,
11
+ "অ": 7,
12
+ "আ": 8,
13
+ "ই": 9,
14
+ "ঈ": 10,
15
+ "উ": 11,
16
+ "ঊ": 12,
17
+ "ঋ": 13,
18
+ "এ": 14,
19
+ "ঐ": 15,
20
+ "ও": 16,
21
+ "ঔ": 17,
22
+ "ক": 18,
23
+ "খ": 19,
24
+ "গ": 20,
25
+ "ঘ": 21,
26
+ "ঙ": 22,
27
+ "চ": 23,
28
+ "ছ": 24,
29
+ "জ": 25,
30
+ "ঝ": 26,
31
+ "ঞ": 27,
32
+ "ট": 28,
33
+ "ঠ": 29,
34
+ "ড": 30,
35
+ "ঢ": 31,
36
+ "ণ": 32,
37
+ "ত": 33,
38
+ "থ": 34,
39
+ "দ": 35,
40
+ "ধ": 36,
41
+ "ন": 37,
42
+ "প": 38,
43
+ "ফ": 39,
44
+ "ব": 40,
45
+ "ভ": 41,
46
+ "ম": 42,
47
+ "য": 43,
48
+ "র": 44,
49
+ "ল": 45,
50
+ "শ": 46,
51
+ "ষ": 47,
52
+ "স": 48,
53
+ "হ": 49,
54
+ "়": 50,
55
+ "া": 51,
56
+ "ি": 52,
57
+ "ী": 53,
58
+ "ু": 54,
59
+ "ূ": 55,
60
+ "ৃ": 56,
61
+ "ে": 57,
62
+ "ৈ": 58,
63
+ "ো": 59,
64
+ "ৌ": 60,
65
+ "্": 61,
66
+ "ৎ": 62,
67
+ "ড়": 63,
68
+ "ঢ়": 64,
69
+ "য়": 65,
70
+ "ৰ": 66,
71
+ "–": 67,
72
+ "—": 68,
73
+ "’": 69,
74
+ "‚": 70
75
+ }