sazzadul commited on
Commit
2fdf01a
·
verified ·
1 Parent(s): 56f18fc

Upload folder using huggingface_hub

Browse files
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 66,
3
+ "<s>": 65
4
+ }
config.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/w2v-bert-2.0",
3
+ "activation_dropout": 0.0,
4
+ "adapter_act": "relu",
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": true,
8
+ "apply_spec_augment": false,
9
+ "architectures": [
10
+ "Wav2Vec2BertForCTC"
11
+ ],
12
+ "attention_dropout": 0.0,
13
+ "bos_token_id": 1,
14
+ "classifier_proj_size": 768,
15
+ "codevector_dim": 768,
16
+ "conformer_conv_dropout": 0.1,
17
+ "contrastive_logits_temperature": 0.1,
18
+ "conv_depthwise_kernel_size": 31,
19
+ "ctc_loss_reduction": "mean",
20
+ "ctc_zero_infinity": false,
21
+ "diversity_loss_weight": 0.1,
22
+ "eos_token_id": 2,
23
+ "feat_proj_dropout": 0.0,
24
+ "feat_quantizer_dropout": 0.0,
25
+ "feature_projection_input_dim": 160,
26
+ "final_dropout": 0.1,
27
+ "hidden_act": "swish",
28
+ "hidden_dropout": 0.0,
29
+ "hidden_size": 1024,
30
+ "initializer_range": 0.02,
31
+ "intermediate_size": 4096,
32
+ "layer_norm_eps": 1e-05,
33
+ "layerdrop": 0.0,
34
+ "left_max_position_embeddings": 64,
35
+ "mask_feature_length": 10,
36
+ "mask_feature_min_masks": 0,
37
+ "mask_feature_prob": 0.0,
38
+ "mask_time_length": 10,
39
+ "mask_time_min_masks": 2,
40
+ "mask_time_prob": 0.0,
41
+ "max_source_positions": 5000,
42
+ "model_type": "wav2vec2-bert",
43
+ "num_adapter_layers": 1,
44
+ "num_attention_heads": 16,
45
+ "num_codevector_groups": 2,
46
+ "num_codevectors_per_group": 320,
47
+ "num_hidden_layers": 24,
48
+ "num_negatives": 100,
49
+ "output_hidden_size": 1024,
50
+ "pad_token_id": 71,
51
+ "position_embeddings_type": "relative_key",
52
+ "proj_codevector_dim": 768,
53
+ "right_max_position_embeddings": 8,
54
+ "rotary_embedding_base": 10000,
55
+ "tdnn_dilation": [
56
+ 1,
57
+ 2,
58
+ 3,
59
+ 1,
60
+ 1
61
+ ],
62
+ "tdnn_dim": [
63
+ 512,
64
+ 512,
65
+ 512,
66
+ 512,
67
+ 1500
68
+ ],
69
+ "tdnn_kernel": [
70
+ 5,
71
+ 3,
72
+ 3,
73
+ 1,
74
+ 1
75
+ ],
76
+ "torch_dtype": "float32",
77
+ "transformers_version": "4.48.3",
78
+ "use_intermediate_ffn_before_adapter": false,
79
+ "use_weighted_layer_sum": false,
80
+ "vocab_size": 74,
81
+ "xvector_output_dim": 512
82
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81aad00981aa2c3172820b69f4eb43290456850c3ec974b6d5bcb8ef0d7715ca
3
+ size 2423117960
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f1cce0157d3c7e75766959dff661de387c5c6c1f3b83d17035e039d6f25fcb1
3
+ size 4846696682
preprocessor_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "feature_extractor_type": "SeamlessM4TFeatureExtractor",
3
+ "feature_size": 80,
4
+ "num_mel_bins": 80,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2BertProcessor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000,
10
+ "stride": 2
11
+ }
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5226a7b6402d77283255ad91bd9fe2de7cdcfee7d94e0accedbb31a8e9a0a4c
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf5af39eb138fbe679393389dfca31a0be1e74ccceefd5383e57f616c1a0d126
3
+ size 1064
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "63": {
4
+ "content": "[UNK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "64": {
12
+ "content": "[PAD]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "65": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "66": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "bos_token": "<s>",
37
+ "clean_up_tokenization_spaces": false,
38
+ "do_lower_case": false,
39
+ "eos_token": "</s>",
40
+ "extra_special_tokens": {},
41
+ "model_max_length": 1000000000000000019884624838656,
42
+ "pad_token": "[PAD]",
43
+ "processor_class": "Wav2Vec2BertProcessor",
44
+ "replace_word_delimiter_char": " ",
45
+ "target_lang": null,
46
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
47
+ "unk_token": "[UNK]",
48
+ "word_delimiter_token": "|"
49
+ }
trainer_state.json ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7127555988315482,
3
+ "best_model_checkpoint": "whisper-reg-ben\\checkpoint-6000",
4
+ "epoch": 3.634161114476075,
5
+ "eval_steps": 500,
6
+ "global_step": 6000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.30284675953967294,
13
+ "eval_cer": 0.49007230153778397,
14
+ "eval_loss": 2.5224013328552246,
15
+ "eval_runtime": 19.2778,
16
+ "eval_samples_per_second": 6.951,
17
+ "eval_steps_per_second": 1.764,
18
+ "eval_wer": 1.0074001947419668,
19
+ "step": 500
20
+ },
21
+ {
22
+ "epoch": 0.6056935190793459,
23
+ "grad_norm": 6.7337727546691895,
24
+ "learning_rate": 9.692067457838851e-06,
25
+ "loss": 2.9581,
26
+ "step": 1000
27
+ },
28
+ {
29
+ "epoch": 0.6056935190793459,
30
+ "eval_cer": 0.3797115278746284,
31
+ "eval_loss": 1.7524603605270386,
32
+ "eval_runtime": 19.6842,
33
+ "eval_samples_per_second": 6.807,
34
+ "eval_steps_per_second": 1.727,
35
+ "eval_wer": 0.84634858812074,
36
+ "step": 1000
37
+ },
38
+ {
39
+ "epoch": 0.9085402786190188,
40
+ "eval_cer": 0.3615076889198811,
41
+ "eval_loss": 1.5823218822479248,
42
+ "eval_runtime": 18.1939,
43
+ "eval_samples_per_second": 7.365,
44
+ "eval_steps_per_second": 1.869,
45
+ "eval_wer": 0.7945472249269717,
46
+ "step": 1500
47
+ },
48
+ {
49
+ "epoch": 1.2113870381586918,
50
+ "grad_norm": 4.9313130378723145,
51
+ "learning_rate": 9.067457838850719e-06,
52
+ "loss": 1.6132,
53
+ "step": 2000
54
+ },
55
+ {
56
+ "epoch": 1.2113870381586918,
57
+ "eval_cer": 0.3612507799023746,
58
+ "eval_loss": 1.4997178316116333,
59
+ "eval_runtime": 21.2924,
60
+ "eval_samples_per_second": 6.293,
61
+ "eval_steps_per_second": 1.597,
62
+ "eval_wer": 0.7700097370983447,
63
+ "step": 2000
64
+ },
65
+ {
66
+ "epoch": 1.5142337976983646,
67
+ "eval_cer": 0.36807721951040484,
68
+ "eval_loss": 1.4603444337844849,
69
+ "eval_runtime": 17.7387,
70
+ "eval_samples_per_second": 7.554,
71
+ "eval_steps_per_second": 1.917,
72
+ "eval_wer": 0.7518987341772152,
73
+ "step": 2500
74
+ },
75
+ {
76
+ "epoch": 1.8170805572380375,
77
+ "grad_norm": 5.206490516662598,
78
+ "learning_rate": 8.443472829481576e-06,
79
+ "loss": 1.5051,
80
+ "step": 3000
81
+ },
82
+ {
83
+ "epoch": 1.8170805572380375,
84
+ "eval_cer": 0.33611039747495136,
85
+ "eval_loss": 1.4466902017593384,
86
+ "eval_runtime": 19.3549,
87
+ "eval_samples_per_second": 6.923,
88
+ "eval_steps_per_second": 1.757,
89
+ "eval_wer": 0.7450827653359299,
90
+ "step": 3000
91
+ },
92
+ {
93
+ "epoch": 2.1199273167777104,
94
+ "eval_cer": 0.33500935882849486,
95
+ "eval_loss": 1.4404141902923584,
96
+ "eval_runtime": 18.1487,
97
+ "eval_samples_per_second": 7.383,
98
+ "eval_steps_per_second": 1.873,
99
+ "eval_wer": 0.7489776046738073,
100
+ "step": 3500
101
+ },
102
+ {
103
+ "epoch": 2.4227740763173835,
104
+ "grad_norm": 4.5190887451171875,
105
+ "learning_rate": 7.820737039350407e-06,
106
+ "loss": 1.3798,
107
+ "step": 4000
108
+ },
109
+ {
110
+ "epoch": 2.4227740763173835,
111
+ "eval_cer": 0.3345689433699123,
112
+ "eval_loss": 1.394412636756897,
113
+ "eval_runtime": 19.6972,
114
+ "eval_samples_per_second": 6.803,
115
+ "eval_steps_per_second": 1.726,
116
+ "eval_wer": 0.7226874391431354,
117
+ "step": 4000
118
+ },
119
+ {
120
+ "epoch": 2.725620835857056,
121
+ "eval_cer": 0.3381656696150035,
122
+ "eval_loss": 1.421527624130249,
123
+ "eval_runtime": 18.6951,
124
+ "eval_samples_per_second": 7.168,
125
+ "eval_steps_per_second": 1.819,
126
+ "eval_wer": 0.7626095423563778,
127
+ "step": 4500
128
+ },
129
+ {
130
+ "epoch": 3.0284675953967293,
131
+ "grad_norm": 12.196548461914062,
132
+ "learning_rate": 7.196127420362275e-06,
133
+ "loss": 1.3506,
134
+ "step": 5000
135
+ },
136
+ {
137
+ "epoch": 3.0284675953967293,
138
+ "eval_cer": 0.32873343854369286,
139
+ "eval_loss": 1.3746217489242554,
140
+ "eval_runtime": 17.0045,
141
+ "eval_samples_per_second": 7.88,
142
+ "eval_steps_per_second": 1.999,
143
+ "eval_wer": 0.7302823758519961,
144
+ "step": 5000
145
+ },
146
+ {
147
+ "epoch": 3.331314354936402,
148
+ "eval_cer": 0.3210628693067127,
149
+ "eval_loss": 1.3873779773712158,
150
+ "eval_runtime": 19.6904,
151
+ "eval_samples_per_second": 6.805,
152
+ "eval_steps_per_second": 1.727,
153
+ "eval_wer": 0.7230769230769231,
154
+ "step": 5500
155
+ },
156
+ {
157
+ "epoch": 3.634161114476075,
158
+ "grad_norm": 5.505178451538086,
159
+ "learning_rate": 6.57214241099313e-06,
160
+ "loss": 1.2704,
161
+ "step": 6000
162
+ },
163
+ {
164
+ "epoch": 3.634161114476075,
165
+ "eval_cer": 0.31680551987374755,
166
+ "eval_loss": 1.403659462928772,
167
+ "eval_runtime": 17.1479,
168
+ "eval_samples_per_second": 7.814,
169
+ "eval_steps_per_second": 1.983,
170
+ "eval_wer": 0.7127555988315482,
171
+ "step": 6000
172
+ }
173
+ ],
174
+ "logging_steps": 1000,
175
+ "max_steps": 16510,
176
+ "num_input_tokens_seen": 0,
177
+ "num_train_epochs": 10,
178
+ "save_steps": 500,
179
+ "stateful_callbacks": {
180
+ "TrainerControl": {
181
+ "args": {
182
+ "should_epoch_stop": false,
183
+ "should_evaluate": false,
184
+ "should_log": false,
185
+ "should_save": true,
186
+ "should_training_stop": false
187
+ },
188
+ "attributes": {}
189
+ }
190
+ },
191
+ "total_flos": 2.7849349072039633e+19,
192
+ "train_batch_size": 4,
193
+ "trial_name": null,
194
+ "trial_params": null
195
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1b967ba4ed0328c5cab4ad9d3cb082f89582de82a4ba189f7c3dc275ec6fe76
3
+ size 5432
vocab.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[PAD]": 64,
3
+ "[UNK]": 63,
4
+ "|": 0,
5
+ "ঁ": 1,
6
+ "ং": 2,
7
+ "ঃ": 3,
8
+ "অ": 4,
9
+ "আ": 5,
10
+ "ই": 6,
11
+ "ঈ": 7,
12
+ "উ": 8,
13
+ "ঊ": 9,
14
+ "ঋ": 10,
15
+ "এ": 11,
16
+ "ঐ": 12,
17
+ "ও": 13,
18
+ "ঔ": 14,
19
+ "ক": 15,
20
+ "খ": 16,
21
+ "গ": 17,
22
+ "ঘ": 18,
23
+ "ঙ": 19,
24
+ "চ": 20,
25
+ "ছ": 21,
26
+ "জ": 22,
27
+ "ঝ": 23,
28
+ "ঞ": 24,
29
+ "ট": 25,
30
+ "ঠ": 26,
31
+ "ড": 27,
32
+ "ঢ": 28,
33
+ "ণ": 29,
34
+ "ত": 30,
35
+ "থ": 31,
36
+ "দ": 32,
37
+ "ধ": 33,
38
+ "ন": 34,
39
+ "প": 35,
40
+ "ফ": 36,
41
+ "ব": 37,
42
+ "ভ": 38,
43
+ "ম": 39,
44
+ "য": 40,
45
+ "র": 41,
46
+ "ল": 42,
47
+ "শ": 43,
48
+ "ষ": 44,
49
+ "স": 45,
50
+ "হ": 46,
51
+ "া": 47,
52
+ "ি": 48,
53
+ "ী": 49,
54
+ "ু": 50,
55
+ "ূ": 51,
56
+ "ৃ": 52,
57
+ "ে": 53,
58
+ "ৈ": 54,
59
+ "ো": 55,
60
+ "ৌ": 56,
61
+ "্": 57,
62
+ "ৎ": 58,
63
+ "ড়": 59,
64
+ "ঢ়": 60,
65
+ "য়": 61,
66
+ "‍": 62
67
+ }