israel commited on
Commit
73d9edc
·
verified ·
1 Parent(s): eea050e

Add files using upload-large-folder tool

Browse files
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 415,
3
+ "<s>": 414
4
+ }
checkpoint-8800/config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "adapter_attn_dim": null,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 1024,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": true,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.0,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.0,
58
+ "hidden_size": 1280,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 5120,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.0,
69
+ "model_type": "wav2vec2",
70
+ "num_adapter_layers": 3,
71
+ "num_attention_heads": 16,
72
+ "num_codevector_groups": 2,
73
+ "num_codevectors_per_group": 320,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 48,
78
+ "num_negatives": 100,
79
+ "output_hidden_size": 1280,
80
+ "pad_token_id": 408,
81
+ "proj_codevector_dim": 1024,
82
+ "tdnn_dilation": [
83
+ 1,
84
+ 2,
85
+ 3,
86
+ 1,
87
+ 1
88
+ ],
89
+ "tdnn_dim": [
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 1500
95
+ ],
96
+ "tdnn_kernel": [
97
+ 5,
98
+ 3,
99
+ 3,
100
+ 1,
101
+ 1
102
+ ],
103
+ "torch_dtype": "float32",
104
+ "transformers_version": "4.50.0",
105
+ "use_weighted_layer_sum": false,
106
+ "vocab_size": 416,
107
+ "xvector_output_dim": 512
108
+ }
checkpoint-8800/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a9c2c7f09741d74546657de3a0b0f1c9972c3740167d9c8e41b3dc46d3b1349
3
+ size 3852217376
checkpoint-8800/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc69c8622e1b92075ca54d2a9acb3cb49ad22f93430efd5d2e8617b4addcdc85
3
+ size 7671210755
checkpoint-8800/preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
checkpoint-8800/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a9935ea814c57156eba9e8a4efa123c725e79b1f68098e8f4a0d096c15447b2
3
+ size 14244
checkpoint-8800/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:febd050910e5d8cdbea0a7b6d80f5f795a947633c2ccdd0ecf21c22c9e5a114c
3
+ size 1064
checkpoint-8800/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-8800/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b750106402e066e83f2e8500d9bc48c868d446e9f12f58d61339685262e60823
3
+ size 5432
checkpoint-9600/config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "adapter_attn_dim": null,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 1024,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": true,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.0,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.0,
58
+ "hidden_size": 1280,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 5120,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.0,
69
+ "model_type": "wav2vec2",
70
+ "num_adapter_layers": 3,
71
+ "num_attention_heads": 16,
72
+ "num_codevector_groups": 2,
73
+ "num_codevectors_per_group": 320,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 48,
78
+ "num_negatives": 100,
79
+ "output_hidden_size": 1280,
80
+ "pad_token_id": 408,
81
+ "proj_codevector_dim": 1024,
82
+ "tdnn_dilation": [
83
+ 1,
84
+ 2,
85
+ 3,
86
+ 1,
87
+ 1
88
+ ],
89
+ "tdnn_dim": [
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 1500
95
+ ],
96
+ "tdnn_kernel": [
97
+ 5,
98
+ 3,
99
+ 3,
100
+ 1,
101
+ 1
102
+ ],
103
+ "torch_dtype": "float32",
104
+ "transformers_version": "4.50.0",
105
+ "use_weighted_layer_sum": false,
106
+ "vocab_size": 416,
107
+ "xvector_output_dim": 512
108
+ }
checkpoint-9600/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:681cdd569611f7b9e35a53e24e49d8f9662ed2b526f23f51799ce2722bd1573b
3
+ size 3852217376
checkpoint-9600/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97871d82d2346636a4b3a9c895edc229424ef565b019edb163828ed92f8bb1a8
3
+ size 7671210755
checkpoint-9600/preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
checkpoint-9600/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d049554c52bc780701b529f1a472c70df84b5d20505a50262463bb6b815ac48e
3
+ size 14244
checkpoint-9600/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39b0b40ad2d7d53008b0348b56ab9736840bd672d63629e76abb7e1e21f7efca
3
+ size 1064
checkpoint-9600/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-9600/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b750106402e066e83f2e8500d9bc48c868d446e9f12f58d61339685262e60823
3
+ size 5432
config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "adapter_attn_dim": null,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 1024,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": true,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.0,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.0,
58
+ "hidden_size": 1280,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 5120,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.0,
69
+ "model_type": "wav2vec2",
70
+ "num_adapter_layers": 3,
71
+ "num_attention_heads": 16,
72
+ "num_codevector_groups": 2,
73
+ "num_codevectors_per_group": 320,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 48,
78
+ "num_negatives": 100,
79
+ "output_hidden_size": 1280,
80
+ "pad_token_id": 408,
81
+ "proj_codevector_dim": 1024,
82
+ "tdnn_dilation": [
83
+ 1,
84
+ 2,
85
+ 3,
86
+ 1,
87
+ 1
88
+ ],
89
+ "tdnn_dim": [
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 1500
95
+ ],
96
+ "tdnn_kernel": [
97
+ 5,
98
+ 3,
99
+ 3,
100
+ 1,
101
+ 1
102
+ ],
103
+ "torch_dtype": "float32",
104
+ "transformers_version": "4.50.0",
105
+ "use_weighted_layer_sum": false,
106
+ "vocab_size": 416,
107
+ "xvector_output_dim": 512
108
+ }
ctc_tokenizer/vocab.json ADDED
@@ -0,0 +1,416 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "!": 1,
3
+ "#": 2,
4
+ "$": 3,
5
+ "%": 4,
6
+ "&": 5,
7
+ "'": 6,
8
+ "*": 7,
9
+ "+": 8,
10
+ ",": 9,
11
+ "-": 10,
12
+ ".": 11,
13
+ "0": 12,
14
+ "1": 13,
15
+ "2": 14,
16
+ "3": 15,
17
+ "4": 16,
18
+ "5": 17,
19
+ "6": 18,
20
+ "7": 19,
21
+ "8": 20,
22
+ "9": 21,
23
+ "=": 22,
24
+ "?": 23,
25
+ "@": 24,
26
+ "a": 25,
27
+ "b": 26,
28
+ "c": 27,
29
+ "d": 28,
30
+ "e": 29,
31
+ "f": 30,
32
+ "g": 31,
33
+ "h": 32,
34
+ "i": 33,
35
+ "j": 34,
36
+ "k": 35,
37
+ "l": 36,
38
+ "m": 37,
39
+ "n": 38,
40
+ "o": 39,
41
+ "p": 40,
42
+ "q": 41,
43
+ "r": 42,
44
+ "s": 43,
45
+ "t": 44,
46
+ "u": 45,
47
+ "v": 46,
48
+ "w": 47,
49
+ "x": 48,
50
+ "y": 49,
51
+ "z": 50,
52
+ "\u1200": 51,
53
+ "\u1201": 52,
54
+ "\u1202": 53,
55
+ "\u1203": 54,
56
+ "\u1204": 55,
57
+ "\u1205": 56,
58
+ "\u1206": 57,
59
+ "\u1207": 58,
60
+ "\u1208": 59,
61
+ "\u1209": 60,
62
+ "\u120a": 61,
63
+ "\u120b": 62,
64
+ "\u120c": 63,
65
+ "\u120d": 64,
66
+ "\u120e": 65,
67
+ "\u120f": 66,
68
+ "\u1210": 67,
69
+ "\u1211": 68,
70
+ "\u1212": 69,
71
+ "\u1213": 70,
72
+ "\u1214": 71,
73
+ "\u1215": 72,
74
+ "\u1216": 73,
75
+ "\u1217": 74,
76
+ "\u1218": 75,
77
+ "\u1219": 76,
78
+ "\u121a": 77,
79
+ "\u121b": 78,
80
+ "\u121c": 79,
81
+ "\u121d": 80,
82
+ "\u121e": 81,
83
+ "\u121f": 82,
84
+ "\u1220": 83,
85
+ "\u1221": 84,
86
+ "\u1222": 85,
87
+ "\u1223": 86,
88
+ "\u1224": 87,
89
+ "\u1225": 88,
90
+ "\u1226": 89,
91
+ "\u1227": 90,
92
+ "\u1228": 91,
93
+ "\u1229": 92,
94
+ "\u122a": 93,
95
+ "\u122b": 94,
96
+ "\u122c": 95,
97
+ "\u122d": 96,
98
+ "\u122e": 97,
99
+ "\u122f": 98,
100
+ "\u1230": 99,
101
+ "\u1231": 100,
102
+ "\u1232": 101,
103
+ "\u1233": 102,
104
+ "\u1234": 103,
105
+ "\u1235": 104,
106
+ "\u1236": 105,
107
+ "\u1237": 106,
108
+ "\u1238": 107,
109
+ "\u1239": 108,
110
+ "\u123a": 109,
111
+ "\u123b": 110,
112
+ "\u123c": 111,
113
+ "\u123d": 112,
114
+ "\u123e": 113,
115
+ "\u123f": 114,
116
+ "\u1240": 115,
117
+ "\u1241": 116,
118
+ "\u1242": 117,
119
+ "\u1243": 118,
120
+ "\u1244": 119,
121
+ "\u1245": 120,
122
+ "\u1246": 121,
123
+ "\u1247": 122,
124
+ "\u1248": 123,
125
+ "\u124a": 124,
126
+ "\u124b": 125,
127
+ "\u124c": 126,
128
+ "\u124d": 127,
129
+ "\u1250": 128,
130
+ "\u1251": 129,
131
+ "\u1252": 130,
132
+ "\u1253": 131,
133
+ "\u1254": 132,
134
+ "\u1255": 133,
135
+ "\u1256": 134,
136
+ "\u1258": 135,
137
+ "\u125a": 136,
138
+ "\u125b": 137,
139
+ "\u125c": 138,
140
+ "\u125d": 139,
141
+ "\u1260": 140,
142
+ "\u1261": 141,
143
+ "\u1262": 142,
144
+ "\u1263": 143,
145
+ "\u1264": 144,
146
+ "\u1265": 145,
147
+ "\u1266": 146,
148
+ "\u1267": 147,
149
+ "\u1268": 148,
150
+ "\u1269": 149,
151
+ "\u126a": 150,
152
+ "\u126b": 151,
153
+ "\u126c": 152,
154
+ "\u126d": 153,
155
+ "\u126e": 154,
156
+ "\u126f": 155,
157
+ "\u1270": 156,
158
+ "\u1271": 157,
159
+ "\u1272": 158,
160
+ "\u1273": 159,
161
+ "\u1274": 160,
162
+ "\u1275": 161,
163
+ "\u1276": 162,
164
+ "\u1277": 163,
165
+ "\u1278": 164,
166
+ "\u1279": 165,
167
+ "\u127a": 166,
168
+ "\u127b": 167,
169
+ "\u127c": 168,
170
+ "\u127d": 169,
171
+ "\u127e": 170,
172
+ "\u127f": 171,
173
+ "\u1280": 172,
174
+ "\u1281": 173,
175
+ "\u1282": 174,
176
+ "\u1283": 175,
177
+ "\u1284": 176,
178
+ "\u1285": 177,
179
+ "\u1286": 178,
180
+ "\u1287": 179,
181
+ "\u1288": 180,
182
+ "\u128a": 181,
183
+ "\u128b": 182,
184
+ "\u128c": 183,
185
+ "\u128d": 184,
186
+ "\u1290": 185,
187
+ "\u1291": 186,
188
+ "\u1292": 187,
189
+ "\u1293": 188,
190
+ "\u1294": 189,
191
+ "\u1295": 190,
192
+ "\u1296": 191,
193
+ "\u1297": 192,
194
+ "\u1298": 193,
195
+ "\u1299": 194,
196
+ "\u129a": 195,
197
+ "\u129b": 196,
198
+ "\u129c": 197,
199
+ "\u129d": 198,
200
+ "\u129e": 199,
201
+ "\u129f": 200,
202
+ "\u12a0": 201,
203
+ "\u12a1": 202,
204
+ "\u12a2": 203,
205
+ "\u12a3": 204,
206
+ "\u12a4": 205,
207
+ "\u12a5": 206,
208
+ "\u12a6": 207,
209
+ "\u12a7": 208,
210
+ "\u12a8": 209,
211
+ "\u12a9": 210,
212
+ "\u12aa": 211,
213
+ "\u12ab": 212,
214
+ "\u12ac": 213,
215
+ "\u12ad": 214,
216
+ "\u12ae": 215,
217
+ "\u12af": 216,
218
+ "\u12b0": 217,
219
+ "\u12b2": 218,
220
+ "\u12b3": 219,
221
+ "\u12b4": 220,
222
+ "\u12b5": 221,
223
+ "\u12b8": 222,
224
+ "\u12b9": 223,
225
+ "\u12ba": 224,
226
+ "\u12bb": 225,
227
+ "\u12bc": 226,
228
+ "\u12bd": 227,
229
+ "\u12be": 228,
230
+ "\u12c0": 229,
231
+ "\u12c2": 230,
232
+ "\u12c3": 231,
233
+ "\u12c4": 232,
234
+ "\u12c5": 233,
235
+ "\u12c8": 234,
236
+ "\u12c9": 235,
237
+ "\u12ca": 236,
238
+ "\u12cb": 237,
239
+ "\u12cc": 238,
240
+ "\u12cd": 239,
241
+ "\u12ce": 240,
242
+ "\u12cf": 241,
243
+ "\u12d0": 242,
244
+ "\u12d1": 243,
245
+ "\u12d2": 244,
246
+ "\u12d3": 245,
247
+ "\u12d4": 246,
248
+ "\u12d5": 247,
249
+ "\u12d6": 248,
250
+ "\u12d8": 249,
251
+ "\u12d9": 250,
252
+ "\u12da": 251,
253
+ "\u12db": 252,
254
+ "\u12dc": 253,
255
+ "\u12dd": 254,
256
+ "\u12de": 255,
257
+ "\u12df": 256,
258
+ "\u12e0": 257,
259
+ "\u12e1": 258,
260
+ "\u12e2": 259,
261
+ "\u12e3": 260,
262
+ "\u12e4": 261,
263
+ "\u12e5": 262,
264
+ "\u12e6": 263,
265
+ "\u12e7": 264,
266
+ "\u12e8": 265,
267
+ "\u12e9": 266,
268
+ "\u12ea": 267,
269
+ "\u12eb": 268,
270
+ "\u12ec": 269,
271
+ "\u12ed": 270,
272
+ "\u12ee": 271,
273
+ "\u12ef": 272,
274
+ "\u12f0": 273,
275
+ "\u12f1": 274,
276
+ "\u12f2": 275,
277
+ "\u12f3": 276,
278
+ "\u12f4": 277,
279
+ "\u12f5": 278,
280
+ "\u12f6": 279,
281
+ "\u12f7": 280,
282
+ "\u12f8": 281,
283
+ "\u12f9": 282,
284
+ "\u12fa": 283,
285
+ "\u12fb": 284,
286
+ "\u12fc": 285,
287
+ "\u12fd": 286,
288
+ "\u12fe": 287,
289
+ "\u12ff": 288,
290
+ "\u1300": 289,
291
+ "\u1301": 290,
292
+ "\u1302": 291,
293
+ "\u1303": 292,
294
+ "\u1304": 293,
295
+ "\u1305": 294,
296
+ "\u1306": 295,
297
+ "\u1307": 296,
298
+ "\u1308": 297,
299
+ "\u1309": 298,
300
+ "\u130a": 299,
301
+ "\u130b": 300,
302
+ "\u130c": 301,
303
+ "\u130d": 302,
304
+ "\u130e": 303,
305
+ "\u130f": 304,
306
+ "\u1310": 305,
307
+ "\u1312": 306,
308
+ "\u1313": 307,
309
+ "\u1314": 308,
310
+ "\u1315": 309,
311
+ "\u1318": 310,
312
+ "\u1319": 311,
313
+ "\u131a": 312,
314
+ "\u131b": 313,
315
+ "\u131c": 314,
316
+ "\u131d": 315,
317
+ "\u131e": 316,
318
+ "\u131f": 317,
319
+ "\u1320": 318,
320
+ "\u1321": 319,
321
+ "\u1322": 320,
322
+ "\u1323": 321,
323
+ "\u1324": 322,
324
+ "\u1325": 323,
325
+ "\u1326": 324,
326
+ "\u1327": 325,
327
+ "\u1328": 326,
328
+ "\u1329": 327,
329
+ "\u132a": 328,
330
+ "\u132b": 329,
331
+ "\u132c": 330,
332
+ "\u132d": 331,
333
+ "\u132e": 332,
334
+ "\u132f": 333,
335
+ "\u1330": 334,
336
+ "\u1331": 335,
337
+ "\u1332": 336,
338
+ "\u1333": 337,
339
+ "\u1334": 338,
340
+ "\u1335": 339,
341
+ "\u1336": 340,
342
+ "\u1337": 341,
343
+ "\u1338": 342,
344
+ "\u1339": 343,
345
+ "\u133a": 344,
346
+ "\u133b": 345,
347
+ "\u133c": 346,
348
+ "\u133d": 347,
349
+ "\u133e": 348,
350
+ "\u133f": 349,
351
+ "\u1340": 350,
352
+ "\u1341": 351,
353
+ "\u1342": 352,
354
+ "\u1343": 353,
355
+ "\u1344": 354,
356
+ "\u1345": 355,
357
+ "\u1346": 356,
358
+ "\u1347": 357,
359
+ "\u1348": 358,
360
+ "\u1349": 359,
361
+ "\u134a": 360,
362
+ "\u134b": 361,
363
+ "\u134c": 362,
364
+ "\u134d": 363,
365
+ "\u134e": 364,
366
+ "\u134f": 365,
367
+ "\u1350": 366,
368
+ "\u1351": 367,
369
+ "\u1352": 368,
370
+ "\u1353": 369,
371
+ "\u1354": 370,
372
+ "\u1355": 371,
373
+ "\u1356": 372,
374
+ "\u1357": 373,
375
+ "\u1358": 374,
376
+ "\u1359": 375,
377
+ "\u135a": 376,
378
+ "\u1360": 377,
379
+ "\u1361": 378,
380
+ "\u1362": 379,
381
+ "\u1363": 380,
382
+ "\u1364": 381,
383
+ "\u1365": 382,
384
+ "\u1366": 383,
385
+ "\u1367": 384,
386
+ "\u1368": 385,
387
+ "\u1369": 386,
388
+ "\u136a": 387,
389
+ "\u136b": 388,
390
+ "\u136c": 389,
391
+ "\u136d": 390,
392
+ "\u136e": 391,
393
+ "\u136f": 392,
394
+ "\u1370": 393,
395
+ "\u1371": 394,
396
+ "\u1372": 395,
397
+ "\u1373": 396,
398
+ "\u1374": 397,
399
+ "\u1375": 398,
400
+ "\u1376": 399,
401
+ "\u1377": 400,
402
+ "\u1378": 401,
403
+ "\u1379": 402,
404
+ "\u137a": 403,
405
+ "\u137b": 404,
406
+ "\u137c": 405,
407
+ "\u20ac": 406,
408
+ "|": 0,
409
+ "[UNK]": 407,
410
+ "[PAD]": 408,
411
+ "[AMH]": 409,
412
+ "[WAL]": 410,
413
+ "[ORM]": 411,
414
+ "[TIR]": 412,
415
+ "[SID]": 413
416
+ }
metrics.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_loss": 0.8018730282783508, "eval_wer": 0.3658269644579654, "eval_cer": 0.09859656887964062, "eval_score": 76.77882333311969, "eval_runtime": 30.4694, "eval_samples_per_second": 65.64, "eval_steps_per_second": 8.205, "epoch": 6.21369596891695}
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:681cdd569611f7b9e35a53e24e49d8f9662ed2b526f23f51799ce2722bd1573b
3
+ size 3852217376
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
processor/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 415,
3
+ "<s>": 414
4
+ }
processor/preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
processor/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
processor/tokenizer_config.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "407": {
4
+ "content": "[UNK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "408": {
12
+ "content": "[PAD]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "409": {
20
+ "content": "[AMH]",
21
+ "lstrip": true,
22
+ "normalized": false,
23
+ "rstrip": true,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "410": {
28
+ "content": "[WAL]",
29
+ "lstrip": true,
30
+ "normalized": false,
31
+ "rstrip": true,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "411": {
36
+ "content": "[ORM]",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": true,
40
+ "single_word": false,
41
+ "special": false
42
+ },
43
+ "412": {
44
+ "content": "[TIR]",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": true,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "413": {
52
+ "content": "[SID]",
53
+ "lstrip": true,
54
+ "normalized": false,
55
+ "rstrip": true,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "414": {
60
+ "content": "<s>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "415": {
68
+ "content": "</s>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ }
75
+ },
76
+ "bos_token": "<s>",
77
+ "clean_up_tokenization_spaces": false,
78
+ "do_lower_case": false,
79
+ "eos_token": "</s>",
80
+ "extra_special_tokens": {},
81
+ "model_max_length": 1000000000000000019884624838656,
82
+ "pad_token": "[PAD]",
83
+ "processor_class": "Wav2Vec2Processor",
84
+ "replace_word_delimiter_char": " ",
85
+ "target_lang": null,
86
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
87
+ "unk_token": "[UNK]",
88
+ "word_delimiter_token": "|"
89
+ }
processor/vocab.json ADDED
@@ -0,0 +1,416 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "!": 1,
3
+ "#": 2,
4
+ "$": 3,
5
+ "%": 4,
6
+ "&": 5,
7
+ "'": 6,
8
+ "*": 7,
9
+ "+": 8,
10
+ ",": 9,
11
+ "-": 10,
12
+ ".": 11,
13
+ "0": 12,
14
+ "1": 13,
15
+ "2": 14,
16
+ "3": 15,
17
+ "4": 16,
18
+ "5": 17,
19
+ "6": 18,
20
+ "7": 19,
21
+ "8": 20,
22
+ "9": 21,
23
+ "=": 22,
24
+ "?": 23,
25
+ "@": 24,
26
+ "[AMH]": 409,
27
+ "[ORM]": 411,
28
+ "[PAD]": 408,
29
+ "[SID]": 413,
30
+ "[TIR]": 412,
31
+ "[UNK]": 407,
32
+ "[WAL]": 410,
33
+ "a": 25,
34
+ "b": 26,
35
+ "c": 27,
36
+ "d": 28,
37
+ "e": 29,
38
+ "f": 30,
39
+ "g": 31,
40
+ "h": 32,
41
+ "i": 33,
42
+ "j": 34,
43
+ "k": 35,
44
+ "l": 36,
45
+ "m": 37,
46
+ "n": 38,
47
+ "o": 39,
48
+ "p": 40,
49
+ "q": 41,
50
+ "r": 42,
51
+ "s": 43,
52
+ "t": 44,
53
+ "u": 45,
54
+ "v": 46,
55
+ "w": 47,
56
+ "x": 48,
57
+ "y": 49,
58
+ "z": 50,
59
+ "|": 0,
60
+ "ሀ": 51,
61
+ "ሁ": 52,
62
+ "ሂ": 53,
63
+ "ሃ": 54,
64
+ "ሄ": 55,
65
+ "ህ": 56,
66
+ "ሆ": 57,
67
+ "ሇ": 58,
68
+ "ለ": 59,
69
+ "ሉ": 60,
70
+ "ሊ": 61,
71
+ "ላ": 62,
72
+ "ሌ": 63,
73
+ "ል": 64,
74
+ "ሎ": 65,
75
+ "ሏ": 66,
76
+ "ሐ": 67,
77
+ "ሑ": 68,
78
+ "ሒ": 69,
79
+ "ሓ": 70,
80
+ "ሔ": 71,
81
+ "ሕ": 72,
82
+ "ሖ": 73,
83
+ "ሗ": 74,
84
+ "መ": 75,
85
+ "ሙ": 76,
86
+ "ሚ": 77,
87
+ "ማ": 78,
88
+ "ሜ": 79,
89
+ "ም": 80,
90
+ "ሞ": 81,
91
+ "ሟ": 82,
92
+ "ሠ": 83,
93
+ "ሡ": 84,
94
+ "ሢ": 85,
95
+ "ሣ": 86,
96
+ "ሤ": 87,
97
+ "ሥ": 88,
98
+ "ሦ": 89,
99
+ "ሧ": 90,
100
+ "ረ": 91,
101
+ "ሩ": 92,
102
+ "ሪ": 93,
103
+ "ራ": 94,
104
+ "ሬ": 95,
105
+ "ር": 96,
106
+ "ሮ": 97,
107
+ "ሯ": 98,
108
+ "ሰ": 99,
109
+ "ሱ": 100,
110
+ "ሲ": 101,
111
+ "ሳ": 102,
112
+ "ሴ": 103,
113
+ "ስ": 104,
114
+ "ሶ": 105,
115
+ "ሷ": 106,
116
+ "ሸ": 107,
117
+ "ሹ": 108,
118
+ "ሺ": 109,
119
+ "ሻ": 110,
120
+ "ሼ": 111,
121
+ "ሽ": 112,
122
+ "ሾ": 113,
123
+ "ሿ": 114,
124
+ "ቀ": 115,
125
+ "ቁ": 116,
126
+ "ቂ": 117,
127
+ "ቃ": 118,
128
+ "ቄ": 119,
129
+ "ቅ": 120,
130
+ "ቆ": 121,
131
+ "ቇ": 122,
132
+ "ቈ": 123,
133
+ "ቊ": 124,
134
+ "ቋ": 125,
135
+ "ቌ": 126,
136
+ "ቍ": 127,
137
+ "ቐ": 128,
138
+ "ቑ": 129,
139
+ "ቒ": 130,
140
+ "ቓ": 131,
141
+ "ቔ": 132,
142
+ "ቕ": 133,
143
+ "ቖ": 134,
144
+ "ቘ": 135,
145
+ "ቚ": 136,
146
+ "ቛ": 137,
147
+ "ቜ": 138,
148
+ "ቝ": 139,
149
+ "በ": 140,
150
+ "ቡ": 141,
151
+ "ቢ": 142,
152
+ "ባ": 143,
153
+ "ቤ": 144,
154
+ "ብ": 145,
155
+ "ቦ": 146,
156
+ "ቧ": 147,
157
+ "ቨ": 148,
158
+ "ቩ": 149,
159
+ "ቪ": 150,
160
+ "ቫ": 151,
161
+ "ቬ": 152,
162
+ "ቭ": 153,
163
+ "ቮ": 154,
164
+ "ቯ": 155,
165
+ "ተ": 156,
166
+ "ቱ": 157,
167
+ "ቲ": 158,
168
+ "ታ": 159,
169
+ "ቴ": 160,
170
+ "ት": 161,
171
+ "ቶ": 162,
172
+ "ቷ": 163,
173
+ "ቸ": 164,
174
+ "ቹ": 165,
175
+ "ቺ": 166,
176
+ "ቻ": 167,
177
+ "ቼ": 168,
178
+ "ች": 169,
179
+ "ቾ": 170,
180
+ "ቿ": 171,
181
+ "ኀ": 172,
182
+ "ኁ": 173,
183
+ "ኂ": 174,
184
+ "ኃ": 175,
185
+ "ኄ": 176,
186
+ "ኅ": 177,
187
+ "ኆ": 178,
188
+ "ኇ": 179,
189
+ "ኈ": 180,
190
+ "ኊ": 181,
191
+ "ኋ": 182,
192
+ "ኌ": 183,
193
+ "ኍ": 184,
194
+ "ነ": 185,
195
+ "ኑ": 186,
196
+ "ኒ": 187,
197
+ "ና": 188,
198
+ "ኔ": 189,
199
+ "ን": 190,
200
+ "ኖ": 191,
201
+ "ኗ": 192,
202
+ "ኘ": 193,
203
+ "ኙ": 194,
204
+ "ኚ": 195,
205
+ "ኛ": 196,
206
+ "ኜ": 197,
207
+ "ኝ": 198,
208
+ "ኞ": 199,
209
+ "ኟ": 200,
210
+ "አ": 201,
211
+ "ኡ": 202,
212
+ "ኢ": 203,
213
+ "ኣ": 204,
214
+ "ኤ": 205,
215
+ "እ": 206,
216
+ "ኦ": 207,
217
+ "ኧ": 208,
218
+ "ከ": 209,
219
+ "ኩ": 210,
220
+ "ኪ": 211,
221
+ "ካ": 212,
222
+ "ኬ": 213,
223
+ "ክ": 214,
224
+ "ኮ": 215,
225
+ "ኯ": 216,
226
+ "ኰ": 217,
227
+ "ኲ": 218,
228
+ "ኳ": 219,
229
+ "ኴ": 220,
230
+ "ኵ": 221,
231
+ "ኸ": 222,
232
+ "ኹ": 223,
233
+ "ኺ": 224,
234
+ "ኻ": 225,
235
+ "ኼ": 226,
236
+ "ኽ": 227,
237
+ "ኾ": 228,
238
+ "ዀ": 229,
239
+ "ዂ": 230,
240
+ "ዃ": 231,
241
+ "ዄ": 232,
242
+ "ዅ": 233,
243
+ "ወ": 234,
244
+ "ዉ": 235,
245
+ "ዊ": 236,
246
+ "ዋ": 237,
247
+ "ዌ": 238,
248
+ "ው": 239,
249
+ "ዎ": 240,
250
+ "ዏ": 241,
251
+ "ዐ": 242,
252
+ "ዑ": 243,
253
+ "ዒ": 244,
254
+ "ዓ": 245,
255
+ "ዔ": 246,
256
+ "ዕ": 247,
257
+ "ዖ": 248,
258
+ "ዘ": 249,
259
+ "ዙ": 250,
260
+ "ዚ": 251,
261
+ "ዛ": 252,
262
+ "ዜ": 253,
263
+ "ዝ": 254,
264
+ "ዞ": 255,
265
+ "ዟ": 256,
266
+ "ዠ": 257,
267
+ "ዡ": 258,
268
+ "ዢ": 259,
269
+ "ዣ": 260,
270
+ "ዤ": 261,
271
+ "ዥ": 262,
272
+ "ዦ": 263,
273
+ "ዧ": 264,
274
+ "የ": 265,
275
+ "ዩ": 266,
276
+ "ዪ": 267,
277
+ "ያ": 268,
278
+ "ዬ": 269,
279
+ "ይ": 270,
280
+ "ዮ": 271,
281
+ "ዯ": 272,
282
+ "ደ": 273,
283
+ "ዱ": 274,
284
+ "ዲ": 275,
285
+ "ዳ": 276,
286
+ "ዴ": 277,
287
+ "ድ": 278,
288
+ "ዶ": 279,
289
+ "ዷ": 280,
290
+ "ዸ": 281,
291
+ "ዹ": 282,
292
+ "ዺ": 283,
293
+ "ዻ": 284,
294
+ "ዼ": 285,
295
+ "ዽ": 286,
296
+ "ዾ": 287,
297
+ "ዿ": 288,
298
+ "ጀ": 289,
299
+ "ጁ": 290,
300
+ "ጂ": 291,
301
+ "ጃ": 292,
302
+ "ጄ": 293,
303
+ "ጅ": 294,
304
+ "ጆ": 295,
305
+ "ጇ": 296,
306
+ "ገ": 297,
307
+ "ጉ": 298,
308
+ "ጊ": 299,
309
+ "ጋ": 300,
310
+ "ጌ": 301,
311
+ "ግ": 302,
312
+ "ጎ": 303,
313
+ "ጏ": 304,
314
+ "ጐ": 305,
315
+ "ጒ": 306,
316
+ "ጓ": 307,
317
+ "ጔ": 308,
318
+ "ጕ": 309,
319
+ "ጘ": 310,
320
+ "ጙ": 311,
321
+ "ጚ": 312,
322
+ "ጛ": 313,
323
+ "ጜ": 314,
324
+ "ጝ": 315,
325
+ "ጞ": 316,
326
+ "ጟ": 317,
327
+ "ጠ": 318,
328
+ "ጡ": 319,
329
+ "ጢ": 320,
330
+ "ጣ": 321,
331
+ "ጤ": 322,
332
+ "ጥ": 323,
333
+ "ጦ": 324,
334
+ "ጧ": 325,
335
+ "ጨ": 326,
336
+ "ጩ": 327,
337
+ "ጪ": 328,
338
+ "ጫ": 329,
339
+ "ጬ": 330,
340
+ "ጭ": 331,
341
+ "ጮ": 332,
342
+ "ጯ": 333,
343
+ "ጰ": 334,
344
+ "ጱ": 335,
345
+ "ጲ": 336,
346
+ "ጳ": 337,
347
+ "ጴ": 338,
348
+ "ጵ": 339,
349
+ "ጶ": 340,
350
+ "ጷ": 341,
351
+ "ጸ": 342,
352
+ "ጹ": 343,
353
+ "ጺ": 344,
354
+ "ጻ": 345,
355
+ "ጼ": 346,
356
+ "ጽ": 347,
357
+ "ጾ": 348,
358
+ "ጿ": 349,
359
+ "ፀ": 350,
360
+ "ፁ": 351,
361
+ "ፂ": 352,
362
+ "ፃ": 353,
363
+ "ፄ": 354,
364
+ "ፅ": 355,
365
+ "ፆ": 356,
366
+ "ፇ": 357,
367
+ "ፈ": 358,
368
+ "ፉ": 359,
369
+ "ፊ": 360,
370
+ "ፋ": 361,
371
+ "ፌ": 362,
372
+ "ፍ": 363,
373
+ "ፎ": 364,
374
+ "ፏ": 365,
375
+ "ፐ": 366,
376
+ "ፑ": 367,
377
+ "ፒ": 368,
378
+ "ፓ": 369,
379
+ "ፔ": 370,
380
+ "ፕ": 371,
381
+ "ፖ": 372,
382
+ "ፗ": 373,
383
+ "ፘ": 374,
384
+ "ፙ": 375,
385
+ "ፚ": 376,
386
+ "፠": 377,
387
+ "፡": 378,
388
+ "።": 379,
389
+ "፣": 380,
390
+ "፤": 381,
391
+ "፥": 382,
392
+ "፦": 383,
393
+ "፧": 384,
394
+ "፨": 385,
395
+ "፩": 386,
396
+ "፪": 387,
397
+ "፫": 388,
398
+ "፬": 389,
399
+ "፭": 390,
400
+ "፮": 391,
401
+ "፯": 392,
402
+ "፰": 393,
403
+ "፱": 394,
404
+ "፲": 395,
405
+ "፳": 396,
406
+ "፴": 397,
407
+ "፵": 398,
408
+ "፶": 399,
409
+ "፷": 400,
410
+ "፸": 401,
411
+ "፹": 402,
412
+ "፺": 403,
413
+ "፻": 404,
414
+ "፼": 405,
415
+ "€": 406
416
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "407": {
4
+ "content": "[UNK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "408": {
12
+ "content": "[PAD]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "409": {
20
+ "content": "[AMH]",
21
+ "lstrip": true,
22
+ "normalized": false,
23
+ "rstrip": true,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "410": {
28
+ "content": "[WAL]",
29
+ "lstrip": true,
30
+ "normalized": false,
31
+ "rstrip": true,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "411": {
36
+ "content": "[ORM]",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": true,
40
+ "single_word": false,
41
+ "special": false
42
+ },
43
+ "412": {
44
+ "content": "[TIR]",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": true,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "413": {
52
+ "content": "[SID]",
53
+ "lstrip": true,
54
+ "normalized": false,
55
+ "rstrip": true,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "414": {
60
+ "content": "<s>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "415": {
68
+ "content": "</s>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ }
75
+ },
76
+ "bos_token": "<s>",
77
+ "clean_up_tokenization_spaces": false,
78
+ "do_lower_case": false,
79
+ "eos_token": "</s>",
80
+ "extra_special_tokens": {},
81
+ "model_max_length": 1000000000000000019884624838656,
82
+ "pad_token": "[PAD]",
83
+ "processor_class": "Wav2Vec2Processor",
84
+ "replace_word_delimiter_char": " ",
85
+ "target_lang": null,
86
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
87
+ "unk_token": "[UNK]",
88
+ "word_delimiter_token": "|"
89
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b750106402e066e83f2e8500d9bc48c868d446e9f12f58d61339685262e60823
3
+ size 5432
vocab.json ADDED
@@ -0,0 +1,416 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "!": 1,
3
+ "#": 2,
4
+ "$": 3,
5
+ "%": 4,
6
+ "&": 5,
7
+ "'": 6,
8
+ "*": 7,
9
+ "+": 8,
10
+ ",": 9,
11
+ "-": 10,
12
+ ".": 11,
13
+ "0": 12,
14
+ "1": 13,
15
+ "2": 14,
16
+ "3": 15,
17
+ "4": 16,
18
+ "5": 17,
19
+ "6": 18,
20
+ "7": 19,
21
+ "8": 20,
22
+ "9": 21,
23
+ "=": 22,
24
+ "?": 23,
25
+ "@": 24,
26
+ "[AMH]": 409,
27
+ "[ORM]": 411,
28
+ "[PAD]": 408,
29
+ "[SID]": 413,
30
+ "[TIR]": 412,
31
+ "[UNK]": 407,
32
+ "[WAL]": 410,
33
+ "a": 25,
34
+ "b": 26,
35
+ "c": 27,
36
+ "d": 28,
37
+ "e": 29,
38
+ "f": 30,
39
+ "g": 31,
40
+ "h": 32,
41
+ "i": 33,
42
+ "j": 34,
43
+ "k": 35,
44
+ "l": 36,
45
+ "m": 37,
46
+ "n": 38,
47
+ "o": 39,
48
+ "p": 40,
49
+ "q": 41,
50
+ "r": 42,
51
+ "s": 43,
52
+ "t": 44,
53
+ "u": 45,
54
+ "v": 46,
55
+ "w": 47,
56
+ "x": 48,
57
+ "y": 49,
58
+ "z": 50,
59
+ "|": 0,
60
+ "ሀ": 51,
61
+ "ሁ": 52,
62
+ "ሂ": 53,
63
+ "ሃ": 54,
64
+ "ሄ": 55,
65
+ "ህ": 56,
66
+ "ሆ": 57,
67
+ "ሇ": 58,
68
+ "ለ": 59,
69
+ "ሉ": 60,
70
+ "ሊ": 61,
71
+ "ላ": 62,
72
+ "ሌ": 63,
73
+ "ል": 64,
74
+ "ሎ": 65,
75
+ "ሏ": 66,
76
+ "ሐ": 67,
77
+ "ሑ": 68,
78
+ "ሒ": 69,
79
+ "ሓ": 70,
80
+ "ሔ": 71,
81
+ "ሕ": 72,
82
+ "ሖ": 73,
83
+ "ሗ": 74,
84
+ "መ": 75,
85
+ "ሙ": 76,
86
+ "ሚ": 77,
87
+ "ማ": 78,
88
+ "ሜ": 79,
89
+ "ም": 80,
90
+ "ሞ": 81,
91
+ "ሟ": 82,
92
+ "ሠ": 83,
93
+ "ሡ": 84,
94
+ "ሢ": 85,
95
+ "ሣ": 86,
96
+ "ሤ": 87,
97
+ "ሥ": 88,
98
+ "ሦ": 89,
99
+ "ሧ": 90,
100
+ "ረ": 91,
101
+ "ሩ": 92,
102
+ "ሪ": 93,
103
+ "ራ": 94,
104
+ "ሬ": 95,
105
+ "ር": 96,
106
+ "ሮ": 97,
107
+ "ሯ": 98,
108
+ "ሰ": 99,
109
+ "ሱ": 100,
110
+ "ሲ": 101,
111
+ "ሳ": 102,
112
+ "ሴ": 103,
113
+ "ስ": 104,
114
+ "ሶ": 105,
115
+ "ሷ": 106,
116
+ "ሸ": 107,
117
+ "ሹ": 108,
118
+ "ሺ": 109,
119
+ "ሻ": 110,
120
+ "ሼ": 111,
121
+ "ሽ": 112,
122
+ "ሾ": 113,
123
+ "ሿ": 114,
124
+ "ቀ": 115,
125
+ "ቁ": 116,
126
+ "ቂ": 117,
127
+ "ቃ": 118,
128
+ "ቄ": 119,
129
+ "ቅ": 120,
130
+ "ቆ": 121,
131
+ "ቇ": 122,
132
+ "ቈ": 123,
133
+ "ቊ": 124,
134
+ "ቋ": 125,
135
+ "ቌ": 126,
136
+ "ቍ": 127,
137
+ "ቐ": 128,
138
+ "ቑ": 129,
139
+ "ቒ": 130,
140
+ "ቓ": 131,
141
+ "ቔ": 132,
142
+ "ቕ": 133,
143
+ "ቖ": 134,
144
+ "ቘ": 135,
145
+ "ቚ": 136,
146
+ "ቛ": 137,
147
+ "ቜ": 138,
148
+ "ቝ": 139,
149
+ "በ": 140,
150
+ "ቡ": 141,
151
+ "ቢ": 142,
152
+ "ባ": 143,
153
+ "ቤ": 144,
154
+ "ብ": 145,
155
+ "ቦ": 146,
156
+ "ቧ": 147,
157
+ "ቨ": 148,
158
+ "ቩ": 149,
159
+ "ቪ": 150,
160
+ "ቫ": 151,
161
+ "ቬ": 152,
162
+ "ቭ": 153,
163
+ "ቮ": 154,
164
+ "ቯ": 155,
165
+ "ተ": 156,
166
+ "ቱ": 157,
167
+ "ቲ": 158,
168
+ "ታ": 159,
169
+ "ቴ": 160,
170
+ "ት": 161,
171
+ "ቶ": 162,
172
+ "ቷ": 163,
173
+ "ቸ": 164,
174
+ "ቹ": 165,
175
+ "ቺ": 166,
176
+ "ቻ": 167,
177
+ "ቼ": 168,
178
+ "ች": 169,
179
+ "ቾ": 170,
180
+ "ቿ": 171,
181
+ "ኀ": 172,
182
+ "ኁ": 173,
183
+ "ኂ": 174,
184
+ "ኃ": 175,
185
+ "ኄ": 176,
186
+ "ኅ": 177,
187
+ "ኆ": 178,
188
+ "ኇ": 179,
189
+ "ኈ": 180,
190
+ "ኊ": 181,
191
+ "ኋ": 182,
192
+ "ኌ": 183,
193
+ "ኍ": 184,
194
+ "ነ": 185,
195
+ "ኑ": 186,
196
+ "ኒ": 187,
197
+ "ና": 188,
198
+ "ኔ": 189,
199
+ "ን": 190,
200
+ "ኖ": 191,
201
+ "ኗ": 192,
202
+ "ኘ": 193,
203
+ "ኙ": 194,
204
+ "ኚ": 195,
205
+ "ኛ": 196,
206
+ "ኜ": 197,
207
+ "ኝ": 198,
208
+ "ኞ": 199,
209
+ "ኟ": 200,
210
+ "አ": 201,
211
+ "ኡ": 202,
212
+ "ኢ": 203,
213
+ "ኣ": 204,
214
+ "ኤ": 205,
215
+ "እ": 206,
216
+ "ኦ": 207,
217
+ "ኧ": 208,
218
+ "ከ": 209,
219
+ "ኩ": 210,
220
+ "ኪ": 211,
221
+ "ካ": 212,
222
+ "ኬ": 213,
223
+ "ክ": 214,
224
+ "ኮ": 215,
225
+ "ኯ": 216,
226
+ "ኰ": 217,
227
+ "ኲ": 218,
228
+ "ኳ": 219,
229
+ "ኴ": 220,
230
+ "ኵ": 221,
231
+ "ኸ": 222,
232
+ "ኹ": 223,
233
+ "ኺ": 224,
234
+ "ኻ": 225,
235
+ "ኼ": 226,
236
+ "ኽ": 227,
237
+ "ኾ": 228,
238
+ "ዀ": 229,
239
+ "ዂ": 230,
240
+ "ዃ": 231,
241
+ "ዄ": 232,
242
+ "ዅ": 233,
243
+ "ወ": 234,
244
+ "ዉ": 235,
245
+ "ዊ": 236,
246
+ "ዋ": 237,
247
+ "ዌ": 238,
248
+ "ው": 239,
249
+ "ዎ": 240,
250
+ "ዏ": 241,
251
+ "ዐ": 242,
252
+ "ዑ": 243,
253
+ "ዒ": 244,
254
+ "ዓ": 245,
255
+ "ዔ": 246,
256
+ "ዕ": 247,
257
+ "ዖ": 248,
258
+ "ዘ": 249,
259
+ "ዙ": 250,
260
+ "ዚ": 251,
261
+ "ዛ": 252,
262
+ "ዜ": 253,
263
+ "ዝ": 254,
264
+ "ዞ": 255,
265
+ "ዟ": 256,
266
+ "ዠ": 257,
267
+ "ዡ": 258,
268
+ "ዢ": 259,
269
+ "ዣ": 260,
270
+ "ዤ": 261,
271
+ "ዥ": 262,
272
+ "ዦ": 263,
273
+ "ዧ": 264,
274
+ "የ": 265,
275
+ "ዩ": 266,
276
+ "ዪ": 267,
277
+ "ያ": 268,
278
+ "ዬ": 269,
279
+ "ይ": 270,
280
+ "ዮ": 271,
281
+ "ዯ": 272,
282
+ "ደ": 273,
283
+ "ዱ": 274,
284
+ "ዲ": 275,
285
+ "ዳ": 276,
286
+ "ዴ": 277,
287
+ "ድ": 278,
288
+ "ዶ": 279,
289
+ "ዷ": 280,
290
+ "ዸ": 281,
291
+ "ዹ": 282,
292
+ "ዺ": 283,
293
+ "ዻ": 284,
294
+ "ዼ": 285,
295
+ "ዽ": 286,
296
+ "ዾ": 287,
297
+ "ዿ": 288,
298
+ "ጀ": 289,
299
+ "ጁ": 290,
300
+ "ጂ": 291,
301
+ "ጃ": 292,
302
+ "ጄ": 293,
303
+ "ጅ": 294,
304
+ "ጆ": 295,
305
+ "ጇ": 296,
306
+ "ገ": 297,
307
+ "ጉ": 298,
308
+ "ጊ": 299,
309
+ "ጋ": 300,
310
+ "ጌ": 301,
311
+ "ግ": 302,
312
+ "ጎ": 303,
313
+ "ጏ": 304,
314
+ "ጐ": 305,
315
+ "ጒ": 306,
316
+ "ጓ": 307,
317
+ "ጔ": 308,
318
+ "ጕ": 309,
319
+ "ጘ": 310,
320
+ "ጙ": 311,
321
+ "ጚ": 312,
322
+ "ጛ": 313,
323
+ "ጜ": 314,
324
+ "ጝ": 315,
325
+ "ጞ": 316,
326
+ "ጟ": 317,
327
+ "ጠ": 318,
328
+ "ጡ": 319,
329
+ "ጢ": 320,
330
+ "ጣ": 321,
331
+ "ጤ": 322,
332
+ "ጥ": 323,
333
+ "ጦ": 324,
334
+ "ጧ": 325,
335
+ "ጨ": 326,
336
+ "ጩ": 327,
337
+ "ጪ": 328,
338
+ "ጫ": 329,
339
+ "ጬ": 330,
340
+ "ጭ": 331,
341
+ "ጮ": 332,
342
+ "ጯ": 333,
343
+ "ጰ": 334,
344
+ "ጱ": 335,
345
+ "ጲ": 336,
346
+ "ጳ": 337,
347
+ "ጴ": 338,
348
+ "ጵ": 339,
349
+ "ጶ": 340,
350
+ "ጷ": 341,
351
+ "ጸ": 342,
352
+ "ጹ": 343,
353
+ "ጺ": 344,
354
+ "ጻ": 345,
355
+ "ጼ": 346,
356
+ "ጽ": 347,
357
+ "ጾ": 348,
358
+ "ጿ": 349,
359
+ "ፀ": 350,
360
+ "ፁ": 351,
361
+ "ፂ": 352,
362
+ "ፃ": 353,
363
+ "ፄ": 354,
364
+ "ፅ": 355,
365
+ "ፆ": 356,
366
+ "ፇ": 357,
367
+ "ፈ": 358,
368
+ "ፉ": 359,
369
+ "ፊ": 360,
370
+ "ፋ": 361,
371
+ "ፌ": 362,
372
+ "ፍ": 363,
373
+ "ፎ": 364,
374
+ "ፏ": 365,
375
+ "ፐ": 366,
376
+ "ፑ": 367,
377
+ "ፒ": 368,
378
+ "ፓ": 369,
379
+ "ፔ": 370,
380
+ "ፕ": 371,
381
+ "ፖ": 372,
382
+ "ፗ": 373,
383
+ "ፘ": 374,
384
+ "ፙ": 375,
385
+ "ፚ": 376,
386
+ "፠": 377,
387
+ "፡": 378,
388
+ "።": 379,
389
+ "፣": 380,
390
+ "፤": 381,
391
+ "፥": 382,
392
+ "፦": 383,
393
+ "፧": 384,
394
+ "፨": 385,
395
+ "፩": 386,
396
+ "፪": 387,
397
+ "፫": 388,
398
+ "፬": 389,
399
+ "፭": 390,
400
+ "፮": 391,
401
+ "፯": 392,
402
+ "፰": 393,
403
+ "፱": 394,
404
+ "፲": 395,
405
+ "፳": 396,
406
+ "፴": 397,
407
+ "፵": 398,
408
+ "፶": 399,
409
+ "፷": 400,
410
+ "፸": 401,
411
+ "፹": 402,
412
+ "፺": 403,
413
+ "፻": 404,
414
+ "፼": 405,
415
+ "€": 406
416
+ }