elmadany commited on
Commit
4a57f1a
·
verified ·
1 Parent(s): 3578350

Initial model upload

Browse files
README.md ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: facebook/wav2vec2-xls-r-1b
4
+ tags:
5
+ - automatic-speech-recognition
6
+ - /mnt/home/elmadany/elmadany_workspace/African_speechT5/jasmine-raid/elmadany_work/HF_format/SimbaBench_tasks_ft
7
+ - generated_from_trainer
8
+ metrics:
9
+ - wer
10
+ model-index:
11
+ - name: wav2vec2-xls-r-1b
12
+ results: []
13
+ ---
14
+
15
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
+ should probably proofread and complete it, then remove this comment. -->
17
+
18
+ # wav2vec2-xls-r-1b
19
+
20
+ This model is a fine-tuned version of [facebook/wav2vec2-xls-r-1b](https://huggingface.co/facebook/wav2vec2-xls-r-1b) on the /MNT/HOME/ELMADANY/ELMADANY_WORKSPACE/AFRICAN_SPEECHT5/JASMINE-RAID/ELMADANY_WORK/HF_FORMAT/SIMBABENCH_TASKS_FT - ASR_FT_DATA_BATCH_2 dataset.
21
+ It achieves the following results on the evaluation set:
22
+ - Loss: 1.3260
23
+ - Wer: 0.8212
24
+
25
+ ## Model description
26
+
27
+ More information needed
28
+
29
+ ## Intended uses & limitations
30
+
31
+ More information needed
32
+
33
+ ## Training and evaluation data
34
+
35
+ More information needed
36
+
37
+ ## Training procedure
38
+
39
+ ### Training hyperparameters
40
+
41
+ The following hyperparameters were used during training:
42
+ - learning_rate: 0.0001
43
+ - train_batch_size: 8
44
+ - eval_batch_size: 8
45
+ - seed: 42
46
+ - distributed_type: multi-GPU
47
+ - num_devices: 4
48
+ - gradient_accumulation_steps: 8
49
+ - total_train_batch_size: 256
50
+ - total_eval_batch_size: 32
51
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
+ - lr_scheduler_type: linear
53
+ - lr_scheduler_warmup_steps: 1000
54
+ - num_epochs: 30.0
55
+
56
+ ### Training results
57
+
58
+ | Training Loss | Epoch | Step | Validation Loss | Wer |
59
+ |:-------------:|:-----:|:-----:|:---------------:|:------:|
60
+ | 4.5214 | 1.0 | 610 | 2.5037 | 0.9721 |
61
+ | 0.5588 | 2.0 | 1221 | 2.3758 | 0.9199 |
62
+ | 0.4547 | 3.0 | 1831 | 2.2466 | 0.9419 |
63
+ | 0.4025 | 4.0 | 2442 | 2.2313 | 0.8876 |
64
+ | 0.3789 | 5.0 | 3053 | 2.2444 | 0.8845 |
65
+ | 0.3542 | 6.0 | 3663 | 1.9893 | 0.8914 |
66
+ | 0.3383 | 7.0 | 4274 | 1.3485 | 0.8357 |
67
+ | 0.3239 | 8.0 | 4885 | 1.9888 | 0.8776 |
68
+ | 0.3074 | 9.0 | 5495 | 1.9462 | 0.8689 |
69
+ | 0.2883 | 10.0 | 6106 | 1.5767 | 0.8871 |
70
+ | 0.2749 | 11.0 | 6716 | 1.3260 | 0.8212 |
71
+ | 0.2563 | 12.0 | 7327 | 1.8270 | 0.8440 |
72
+ | 0.2474 | 13.0 | 7938 | 1.8219 | 0.8677 |
73
+ | 0.2347 | 14.0 | 8548 | 1.5346 | 0.8635 |
74
+ | 0.2211 | 15.0 | 9159 | 1.7185 | 0.8636 |
75
+ | 0.2117 | 16.0 | 9770 | 1.8663 | 0.8697 |
76
+ | 0.1987 | 17.0 | 10380 | 1.4298 | 0.8687 |
77
+ | 0.1814 | 18.0 | 10991 | 1.5630 | 0.8680 |
78
+ | 0.1694 | 19.0 | 11601 | 1.3627 | 0.8573 |
79
+ | 0.1597 | 20.0 | 12212 | 1.7108 | 0.8642 |
80
+ | 0.1517 | 21.0 | 12823 | 1.8344 | 0.8794 |
81
+ | 0.1405 | 22.0 | 13433 | 1.4838 | 0.8508 |
82
+ | 0.1262 | 23.0 | 14044 | 1.5322 | 0.8415 |
83
+ | 0.1171 | 24.0 | 14655 | 1.7095 | 0.8682 |
84
+ | 0.1079 | 25.0 | 15265 | 1.7445 | 0.8719 |
85
+ | 0.0996 | 26.0 | 15876 | 1.7322 | 0.8502 |
86
+ | 0.0922 | 27.0 | 16486 | 1.8349 | 0.8625 |
87
+ | 0.0855 | 28.0 | 17097 | 1.8259 | 0.8646 |
88
+ | 0.081 | 29.0 | 17708 | 1.8187 | 0.8651 |
89
+ | 0.0771 | 29.97 | 18300 | 1.8427 | 0.8624 |
90
+
91
+
92
+ ### Framework versions
93
+
94
+ - Transformers 4.33.2
95
+ - Pytorch 2.0.1+cu117
96
+ - Datasets 3.5.0
97
+ - Tokenizers 0.13.3
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 427,
3
+ "<s>": 426
4
+ }
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.97,
3
+ "eval_loss": 1.3259695768356323,
4
+ "eval_runtime": 51.9696,
5
+ "eval_samples": 7426,
6
+ "eval_samples_per_second": 142.891,
7
+ "eval_steps_per_second": 4.483,
8
+ "eval_wer": 0.8212058967880319,
9
+ "train_loss": 0.372250930494298,
10
+ "train_runtime": 52675.4843,
11
+ "train_samples": 156308,
12
+ "train_samples_per_second": 89.021,
13
+ "train_steps_per_second": 0.347
14
+ }
checkpoint-6716/config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-xls-r-1b",
3
+ "activation_dropout": 0.0,
4
+ "adapter_attn_dim": null,
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": false,
8
+ "apply_spec_augment": true,
9
+ "architectures": [
10
+ "Wav2Vec2ForCTC"
11
+ ],
12
+ "attention_dropout": 0.0,
13
+ "bos_token_id": 1,
14
+ "classifier_proj_size": 256,
15
+ "codevector_dim": 1024,
16
+ "contrastive_logits_temperature": 0.1,
17
+ "conv_bias": true,
18
+ "conv_dim": [
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512,
25
+ 512
26
+ ],
27
+ "conv_kernel": [
28
+ 10,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 3,
33
+ 2,
34
+ 2
35
+ ],
36
+ "conv_stride": [
37
+ 5,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2,
43
+ 2
44
+ ],
45
+ "ctc_loss_reduction": "mean",
46
+ "ctc_zero_infinity": false,
47
+ "diversity_loss_weight": 0.1,
48
+ "do_stable_layer_norm": true,
49
+ "eos_token_id": 2,
50
+ "feat_extract_activation": "gelu",
51
+ "feat_extract_dropout": 0.0,
52
+ "feat_extract_norm": "layer",
53
+ "feat_proj_dropout": 0.0,
54
+ "feat_quantizer_dropout": 0.0,
55
+ "final_dropout": 0.0,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.0,
58
+ "hidden_size": 1280,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 5120,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "wav2vec2",
70
+ "num_adapter_layers": 3,
71
+ "num_attention_heads": 16,
72
+ "num_codevector_groups": 2,
73
+ "num_codevectors_per_group": 320,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 48,
78
+ "num_negatives": 100,
79
+ "output_hidden_size": 1280,
80
+ "pad_token_id": 425,
81
+ "proj_codevector_dim": 1024,
82
+ "tdnn_dilation": [
83
+ 1,
84
+ 2,
85
+ 3,
86
+ 1,
87
+ 1
88
+ ],
89
+ "tdnn_dim": [
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 1500
95
+ ],
96
+ "tdnn_kernel": [
97
+ 5,
98
+ 3,
99
+ 3,
100
+ 1,
101
+ 1
102
+ ],
103
+ "torch_dtype": "float32",
104
+ "transformers_version": "4.33.2",
105
+ "use_weighted_layer_sum": false,
106
+ "vocab_size": 428,
107
+ "xvector_output_dim": 512
108
+ }
checkpoint-6716/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9bd31d869d450ac8af0153e3c27448134fbbe0a16ad61600ee8ec42a6d547b4
3
+ size 7671344417
checkpoint-6716/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55dadad20e78450a398a164fc4f9ec8f18459ea683ca466c894ebcc3fdbc2320
3
+ size 3852463469
checkpoint-6716/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:224d781dca743bd2bdbbc7a9071a748ce2a18827059ee7bfecb5904638f627dd
3
+ size 17655
checkpoint-6716/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a27e526f830cf52c9ee47e7ac19ee6d50cd3fc46dcde6e89ed5ed31889d584aa
3
+ size 17655
checkpoint-6716/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bad431fa772ad7d4d66f468c49ee16803e20e6ac4fb5a4a1a82850b56c907f5
3
+ size 17655
checkpoint-6716/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:398da0ccfca86f1dda414c8a319b99a13bb7ab1ee512bcc2efe22c716e8cf166
3
+ size 17655
checkpoint-6716/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b38edd48d33fd9531cdb736d90bab9992219630aa65b4a7ccb2651798a351718
3
+ size 627
checkpoint-6716/trainer_state.json ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8212058967880319,
3
+ "best_model_checkpoint": "./outputs/facebook/wav2vec2-xls-r-1b/checkpoint-6716",
4
+ "epoch": 10.998567041965199,
5
+ "eval_steps": 500,
6
+ "global_step": 6716,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "learning_rate": 6.07e-05,
14
+ "loss": 4.5214,
15
+ "step": 610
16
+ },
17
+ {
18
+ "epoch": 1.0,
19
+ "eval_loss": 2.5036747455596924,
20
+ "eval_runtime": 53.3547,
21
+ "eval_samples_per_second": 139.182,
22
+ "eval_steps_per_second": 4.367,
23
+ "eval_wer": 0.9721338592696271,
24
+ "step": 610
25
+ },
26
+ {
27
+ "epoch": 2.0,
28
+ "learning_rate": 9.873988439306359e-05,
29
+ "loss": 0.5588,
30
+ "step": 1221
31
+ },
32
+ {
33
+ "epoch": 2.0,
34
+ "eval_loss": 2.375758171081543,
35
+ "eval_runtime": 52.8335,
36
+ "eval_samples_per_second": 140.555,
37
+ "eval_steps_per_second": 4.41,
38
+ "eval_wer": 0.9198734844491302,
39
+ "step": 1221
40
+ },
41
+ {
42
+ "epoch": 3.0,
43
+ "learning_rate": 9.521387283236995e-05,
44
+ "loss": 0.4547,
45
+ "step": 1831
46
+ },
47
+ {
48
+ "epoch": 3.0,
49
+ "eval_loss": 2.2466094493865967,
50
+ "eval_runtime": 53.1764,
51
+ "eval_samples_per_second": 139.648,
52
+ "eval_steps_per_second": 4.382,
53
+ "eval_wer": 0.9418864631996073,
54
+ "step": 1831
55
+ },
56
+ {
57
+ "epoch": 4.0,
58
+ "learning_rate": 9.16820809248555e-05,
59
+ "loss": 0.4025,
60
+ "step": 2442
61
+ },
62
+ {
63
+ "epoch": 4.0,
64
+ "eval_loss": 2.2312777042388916,
65
+ "eval_runtime": 54.1165,
66
+ "eval_samples_per_second": 137.223,
67
+ "eval_steps_per_second": 4.306,
68
+ "eval_wer": 0.8876083834729973,
69
+ "step": 2442
70
+ },
71
+ {
72
+ "epoch": 5.0,
73
+ "learning_rate": 8.815606936416185e-05,
74
+ "loss": 0.3789,
75
+ "step": 3053
76
+ },
77
+ {
78
+ "epoch": 5.0,
79
+ "eval_loss": 2.244392156600952,
80
+ "eval_runtime": 52.4474,
81
+ "eval_samples_per_second": 141.589,
82
+ "eval_steps_per_second": 4.443,
83
+ "eval_wer": 0.8844636722229291,
84
+ "step": 3053
85
+ },
86
+ {
87
+ "epoch": 6.0,
88
+ "learning_rate": 8.463005780346821e-05,
89
+ "loss": 0.3542,
90
+ "step": 3663
91
+ },
92
+ {
93
+ "epoch": 6.0,
94
+ "eval_loss": 1.9893066883087158,
95
+ "eval_runtime": 54.83,
96
+ "eval_samples_per_second": 135.437,
97
+ "eval_steps_per_second": 4.249,
98
+ "eval_wer": 0.8913711304600731,
99
+ "step": 3663
100
+ },
101
+ {
102
+ "epoch": 7.0,
103
+ "learning_rate": 8.109826589595376e-05,
104
+ "loss": 0.3383,
105
+ "step": 4274
106
+ },
107
+ {
108
+ "epoch": 7.0,
109
+ "eval_loss": 1.3484952449798584,
110
+ "eval_runtime": 53.4092,
111
+ "eval_samples_per_second": 139.04,
112
+ "eval_steps_per_second": 4.363,
113
+ "eval_wer": 0.8356752040426808,
114
+ "step": 4274
115
+ },
116
+ {
117
+ "epoch": 8.0,
118
+ "learning_rate": 7.756647398843931e-05,
119
+ "loss": 0.3239,
120
+ "step": 4885
121
+ },
122
+ {
123
+ "epoch": 8.0,
124
+ "eval_loss": 1.98879075050354,
125
+ "eval_runtime": 53.1444,
126
+ "eval_samples_per_second": 139.732,
127
+ "eval_steps_per_second": 4.384,
128
+ "eval_wer": 0.8775743915074619,
129
+ "step": 4885
130
+ },
131
+ {
132
+ "epoch": 9.0,
133
+ "learning_rate": 7.404624277456647e-05,
134
+ "loss": 0.3074,
135
+ "step": 5495
136
+ },
137
+ {
138
+ "epoch": 9.0,
139
+ "eval_loss": 1.9461687803268433,
140
+ "eval_runtime": 53.2785,
141
+ "eval_samples_per_second": 139.381,
142
+ "eval_steps_per_second": 4.373,
143
+ "eval_wer": 0.8689400687110319,
144
+ "step": 5495
145
+ },
146
+ {
147
+ "epoch": 10.0,
148
+ "learning_rate": 7.051445086705202e-05,
149
+ "loss": 0.2883,
150
+ "step": 6106
151
+ },
152
+ {
153
+ "epoch": 10.0,
154
+ "eval_loss": 1.5767468214035034,
155
+ "eval_runtime": 53.0357,
156
+ "eval_samples_per_second": 140.019,
157
+ "eval_steps_per_second": 4.393,
158
+ "eval_wer": 0.8871357679094032,
159
+ "step": 6106
160
+ },
161
+ {
162
+ "epoch": 11.0,
163
+ "learning_rate": 6.698843930635838e-05,
164
+ "loss": 0.2749,
165
+ "step": 6716
166
+ },
167
+ {
168
+ "epoch": 11.0,
169
+ "eval_loss": 1.3259695768356323,
170
+ "eval_runtime": 53.7403,
171
+ "eval_samples_per_second": 138.183,
172
+ "eval_steps_per_second": 4.336,
173
+ "eval_wer": 0.8212058967880319,
174
+ "step": 6716
175
+ }
176
+ ],
177
+ "logging_steps": 500,
178
+ "max_steps": 18300,
179
+ "num_train_epochs": 30,
180
+ "save_steps": 500,
181
+ "total_flos": 7.487260021114609e+20,
182
+ "trial_name": null,
183
+ "trial_params": null
184
+ }
checkpoint-6716/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:875cf88512c7fb59f5e0a63eea1c3c0b801f1d554bf17923c524c6414043ca0a
3
+ size 4091
config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/wav2vec2-xls-r-1b",
3
+ "activation_dropout": 0.0,
4
+ "adapter_attn_dim": null,
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": false,
8
+ "apply_spec_augment": true,
9
+ "architectures": [
10
+ "Wav2Vec2ForCTC"
11
+ ],
12
+ "attention_dropout": 0.0,
13
+ "bos_token_id": 1,
14
+ "classifier_proj_size": 256,
15
+ "codevector_dim": 1024,
16
+ "contrastive_logits_temperature": 0.1,
17
+ "conv_bias": true,
18
+ "conv_dim": [
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512,
25
+ 512
26
+ ],
27
+ "conv_kernel": [
28
+ 10,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 3,
33
+ 2,
34
+ 2
35
+ ],
36
+ "conv_stride": [
37
+ 5,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2,
43
+ 2
44
+ ],
45
+ "ctc_loss_reduction": "mean",
46
+ "ctc_zero_infinity": false,
47
+ "diversity_loss_weight": 0.1,
48
+ "do_stable_layer_norm": true,
49
+ "eos_token_id": 2,
50
+ "feat_extract_activation": "gelu",
51
+ "feat_extract_dropout": 0.0,
52
+ "feat_extract_norm": "layer",
53
+ "feat_proj_dropout": 0.0,
54
+ "feat_quantizer_dropout": 0.0,
55
+ "final_dropout": 0.0,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.0,
58
+ "hidden_size": 1280,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 5120,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "wav2vec2",
70
+ "num_adapter_layers": 3,
71
+ "num_attention_heads": 16,
72
+ "num_codevector_groups": 2,
73
+ "num_codevectors_per_group": 320,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 48,
78
+ "num_negatives": 100,
79
+ "output_hidden_size": 1280,
80
+ "pad_token_id": 425,
81
+ "proj_codevector_dim": 1024,
82
+ "tdnn_dilation": [
83
+ 1,
84
+ 2,
85
+ 3,
86
+ 1,
87
+ 1
88
+ ],
89
+ "tdnn_dim": [
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 1500
95
+ ],
96
+ "tdnn_kernel": [
97
+ 5,
98
+ 3,
99
+ 3,
100
+ 1,
101
+ 1
102
+ ],
103
+ "torch_dtype": "float32",
104
+ "transformers_version": "4.33.2",
105
+ "use_weighted_layer_sum": false,
106
+ "vocab_size": 428,
107
+ "xvector_output_dim": 512
108
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.97,
3
+ "eval_loss": 1.3259695768356323,
4
+ "eval_runtime": 51.9696,
5
+ "eval_samples": 7426,
6
+ "eval_samples_per_second": 142.891,
7
+ "eval_steps_per_second": 4.483,
8
+ "eval_wer": 0.8212058967880319
9
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55dadad20e78450a398a164fc4f9ec8f18459ea683ca466c894ebcc3fdbc2320
3
+ size 3852463469
special_tokens_map.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</s>",
12
+ "lstrip": false,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<s>",
19
+ "eos_token": "</s>",
20
+ "pad_token": "[PAD]",
21
+ "unk_token": "[UNK]"
22
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "clean_up_tokenization_spaces": true,
4
+ "do_lower_case": false,
5
+ "eos_token": "</s>",
6
+ "model_max_length": 1000000000000000019884624838656,
7
+ "pad_token": "[PAD]",
8
+ "replace_word_delimiter_char": " ",
9
+ "target_lang": null,
10
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
11
+ "unk_token": "[UNK]",
12
+ "word_delimiter_token": "|"
13
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 29.97,
3
+ "train_loss": 0.372250930494298,
4
+ "train_runtime": 52675.4843,
5
+ "train_samples": 156308,
6
+ "train_samples_per_second": 89.021,
7
+ "train_steps_per_second": 0.347
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,478 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8212058967880319,
3
+ "best_model_checkpoint": "./outputs/facebook/wav2vec2-xls-r-1b/checkpoint-6716",
4
+ "epoch": 29.969293756397136,
5
+ "eval_steps": 500,
6
+ "global_step": 18300,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "learning_rate": 6.07e-05,
14
+ "loss": 4.5214,
15
+ "step": 610
16
+ },
17
+ {
18
+ "epoch": 1.0,
19
+ "eval_loss": 2.5036747455596924,
20
+ "eval_runtime": 53.3547,
21
+ "eval_samples_per_second": 139.182,
22
+ "eval_steps_per_second": 4.367,
23
+ "eval_wer": 0.9721338592696271,
24
+ "step": 610
25
+ },
26
+ {
27
+ "epoch": 2.0,
28
+ "learning_rate": 9.873988439306359e-05,
29
+ "loss": 0.5588,
30
+ "step": 1221
31
+ },
32
+ {
33
+ "epoch": 2.0,
34
+ "eval_loss": 2.375758171081543,
35
+ "eval_runtime": 52.8335,
36
+ "eval_samples_per_second": 140.555,
37
+ "eval_steps_per_second": 4.41,
38
+ "eval_wer": 0.9198734844491302,
39
+ "step": 1221
40
+ },
41
+ {
42
+ "epoch": 3.0,
43
+ "learning_rate": 9.521387283236995e-05,
44
+ "loss": 0.4547,
45
+ "step": 1831
46
+ },
47
+ {
48
+ "epoch": 3.0,
49
+ "eval_loss": 2.2466094493865967,
50
+ "eval_runtime": 53.1764,
51
+ "eval_samples_per_second": 139.648,
52
+ "eval_steps_per_second": 4.382,
53
+ "eval_wer": 0.9418864631996073,
54
+ "step": 1831
55
+ },
56
+ {
57
+ "epoch": 4.0,
58
+ "learning_rate": 9.16820809248555e-05,
59
+ "loss": 0.4025,
60
+ "step": 2442
61
+ },
62
+ {
63
+ "epoch": 4.0,
64
+ "eval_loss": 2.2312777042388916,
65
+ "eval_runtime": 54.1165,
66
+ "eval_samples_per_second": 137.223,
67
+ "eval_steps_per_second": 4.306,
68
+ "eval_wer": 0.8876083834729973,
69
+ "step": 2442
70
+ },
71
+ {
72
+ "epoch": 5.0,
73
+ "learning_rate": 8.815606936416185e-05,
74
+ "loss": 0.3789,
75
+ "step": 3053
76
+ },
77
+ {
78
+ "epoch": 5.0,
79
+ "eval_loss": 2.244392156600952,
80
+ "eval_runtime": 52.4474,
81
+ "eval_samples_per_second": 141.589,
82
+ "eval_steps_per_second": 4.443,
83
+ "eval_wer": 0.8844636722229291,
84
+ "step": 3053
85
+ },
86
+ {
87
+ "epoch": 6.0,
88
+ "learning_rate": 8.463005780346821e-05,
89
+ "loss": 0.3542,
90
+ "step": 3663
91
+ },
92
+ {
93
+ "epoch": 6.0,
94
+ "eval_loss": 1.9893066883087158,
95
+ "eval_runtime": 54.83,
96
+ "eval_samples_per_second": 135.437,
97
+ "eval_steps_per_second": 4.249,
98
+ "eval_wer": 0.8913711304600731,
99
+ "step": 3663
100
+ },
101
+ {
102
+ "epoch": 7.0,
103
+ "learning_rate": 8.109826589595376e-05,
104
+ "loss": 0.3383,
105
+ "step": 4274
106
+ },
107
+ {
108
+ "epoch": 7.0,
109
+ "eval_loss": 1.3484952449798584,
110
+ "eval_runtime": 53.4092,
111
+ "eval_samples_per_second": 139.04,
112
+ "eval_steps_per_second": 4.363,
113
+ "eval_wer": 0.8356752040426808,
114
+ "step": 4274
115
+ },
116
+ {
117
+ "epoch": 8.0,
118
+ "learning_rate": 7.756647398843931e-05,
119
+ "loss": 0.3239,
120
+ "step": 4885
121
+ },
122
+ {
123
+ "epoch": 8.0,
124
+ "eval_loss": 1.98879075050354,
125
+ "eval_runtime": 53.1444,
126
+ "eval_samples_per_second": 139.732,
127
+ "eval_steps_per_second": 4.384,
128
+ "eval_wer": 0.8775743915074619,
129
+ "step": 4885
130
+ },
131
+ {
132
+ "epoch": 9.0,
133
+ "learning_rate": 7.404624277456647e-05,
134
+ "loss": 0.3074,
135
+ "step": 5495
136
+ },
137
+ {
138
+ "epoch": 9.0,
139
+ "eval_loss": 1.9461687803268433,
140
+ "eval_runtime": 53.2785,
141
+ "eval_samples_per_second": 139.381,
142
+ "eval_steps_per_second": 4.373,
143
+ "eval_wer": 0.8689400687110319,
144
+ "step": 5495
145
+ },
146
+ {
147
+ "epoch": 10.0,
148
+ "learning_rate": 7.051445086705202e-05,
149
+ "loss": 0.2883,
150
+ "step": 6106
151
+ },
152
+ {
153
+ "epoch": 10.0,
154
+ "eval_loss": 1.5767468214035034,
155
+ "eval_runtime": 53.0357,
156
+ "eval_samples_per_second": 140.019,
157
+ "eval_steps_per_second": 4.393,
158
+ "eval_wer": 0.8871357679094032,
159
+ "step": 6106
160
+ },
161
+ {
162
+ "epoch": 11.0,
163
+ "learning_rate": 6.698843930635838e-05,
164
+ "loss": 0.2749,
165
+ "step": 6716
166
+ },
167
+ {
168
+ "epoch": 11.0,
169
+ "eval_loss": 1.3259695768356323,
170
+ "eval_runtime": 53.7403,
171
+ "eval_samples_per_second": 138.183,
172
+ "eval_steps_per_second": 4.336,
173
+ "eval_wer": 0.8212058967880319,
174
+ "step": 6716
175
+ },
176
+ {
177
+ "epoch": 12.0,
178
+ "learning_rate": 6.345664739884394e-05,
179
+ "loss": 0.2563,
180
+ "step": 7327
181
+ },
182
+ {
183
+ "epoch": 12.0,
184
+ "eval_loss": 1.8269567489624023,
185
+ "eval_runtime": 53.4539,
186
+ "eval_samples_per_second": 138.923,
187
+ "eval_steps_per_second": 4.359,
188
+ "eval_wer": 0.844000508970607,
189
+ "step": 7327
190
+ },
191
+ {
192
+ "epoch": 13.0,
193
+ "learning_rate": 5.992485549132948e-05,
194
+ "loss": 0.2474,
195
+ "step": 7938
196
+ },
197
+ {
198
+ "epoch": 13.0,
199
+ "eval_loss": 1.8218885660171509,
200
+ "eval_runtime": 53.3104,
201
+ "eval_samples_per_second": 139.297,
202
+ "eval_steps_per_second": 4.371,
203
+ "eval_wer": 0.8676676421936633,
204
+ "step": 7938
205
+ },
206
+ {
207
+ "epoch": 14.0,
208
+ "learning_rate": 5.6398843930635835e-05,
209
+ "loss": 0.2347,
210
+ "step": 8548
211
+ },
212
+ {
213
+ "epoch": 14.0,
214
+ "eval_loss": 1.5345903635025024,
215
+ "eval_runtime": 53.5574,
216
+ "eval_samples_per_second": 138.655,
217
+ "eval_steps_per_second": 4.35,
218
+ "eval_wer": 0.8634504571646702,
219
+ "step": 8548
220
+ },
221
+ {
222
+ "epoch": 15.0,
223
+ "learning_rate": 5.28728323699422e-05,
224
+ "loss": 0.2211,
225
+ "step": 9159
226
+ },
227
+ {
228
+ "epoch": 15.0,
229
+ "eval_loss": 1.7185391187667847,
230
+ "eval_runtime": 53.0996,
231
+ "eval_samples_per_second": 139.85,
232
+ "eval_steps_per_second": 4.388,
233
+ "eval_wer": 0.8636140548597604,
234
+ "step": 9159
235
+ },
236
+ {
237
+ "epoch": 16.0,
238
+ "learning_rate": 4.934104046242775e-05,
239
+ "loss": 0.2117,
240
+ "step": 9770
241
+ },
242
+ {
243
+ "epoch": 16.0,
244
+ "eval_loss": 1.8662818670272827,
245
+ "eval_runtime": 54.2061,
246
+ "eval_samples_per_second": 136.996,
247
+ "eval_steps_per_second": 4.298,
248
+ "eval_wer": 0.8696671695780998,
249
+ "step": 9770
250
+ },
251
+ {
252
+ "epoch": 17.0,
253
+ "learning_rate": 4.582080924855491e-05,
254
+ "loss": 0.1987,
255
+ "step": 10380
256
+ },
257
+ {
258
+ "epoch": 17.0,
259
+ "eval_loss": 1.4298427104949951,
260
+ "eval_runtime": 53.5752,
261
+ "eval_samples_per_second": 138.609,
262
+ "eval_steps_per_second": 4.349,
263
+ "eval_wer": 0.8686674058858815,
264
+ "step": 10380
265
+ },
266
+ {
267
+ "epoch": 18.0,
268
+ "learning_rate": 4.228901734104046e-05,
269
+ "loss": 0.1814,
270
+ "step": 10991
271
+ },
272
+ {
273
+ "epoch": 18.0,
274
+ "eval_loss": 1.5630472898483276,
275
+ "eval_runtime": 54.0558,
276
+ "eval_samples_per_second": 137.377,
277
+ "eval_steps_per_second": 4.31,
278
+ "eval_wer": 0.8680130151055205,
279
+ "step": 10991
280
+ },
281
+ {
282
+ "epoch": 19.0,
283
+ "learning_rate": 3.8763005780346824e-05,
284
+ "loss": 0.1694,
285
+ "step": 11601
286
+ },
287
+ {
288
+ "epoch": 19.0,
289
+ "eval_loss": 1.3626692295074463,
290
+ "eval_runtime": 53.8757,
291
+ "eval_samples_per_second": 137.836,
292
+ "eval_steps_per_second": 4.325,
293
+ "eval_wer": 0.8573246323596241,
294
+ "step": 11601
295
+ },
296
+ {
297
+ "epoch": 20.0,
298
+ "learning_rate": 3.523121387283237e-05,
299
+ "loss": 0.1597,
300
+ "step": 12212
301
+ },
302
+ {
303
+ "epoch": 20.0,
304
+ "eval_loss": 1.7108192443847656,
305
+ "eval_runtime": 54.0651,
306
+ "eval_samples_per_second": 137.353,
307
+ "eval_steps_per_second": 4.31,
308
+ "eval_wer": 0.8641775580317379,
309
+ "step": 12212
310
+ },
311
+ {
312
+ "epoch": 21.0,
313
+ "learning_rate": 3.169942196531792e-05,
314
+ "loss": 0.1517,
315
+ "step": 12823
316
+ },
317
+ {
318
+ "epoch": 21.0,
319
+ "eval_loss": 1.8344017267227173,
320
+ "eval_runtime": 53.2307,
321
+ "eval_samples_per_second": 139.506,
322
+ "eval_steps_per_second": 4.377,
323
+ "eval_wer": 0.8794466762401614,
324
+ "step": 12823
325
+ },
326
+ {
327
+ "epoch": 22.0,
328
+ "learning_rate": 2.817341040462428e-05,
329
+ "loss": 0.1405,
330
+ "step": 13433
331
+ },
332
+ {
333
+ "epoch": 22.0,
334
+ "eval_loss": 1.4837909936904907,
335
+ "eval_runtime": 52.8743,
336
+ "eval_samples_per_second": 140.446,
337
+ "eval_steps_per_second": 4.407,
338
+ "eval_wer": 0.8508352571210441,
339
+ "step": 13433
340
+ },
341
+ {
342
+ "epoch": 23.0,
343
+ "learning_rate": 2.4641618497109827e-05,
344
+ "loss": 0.1262,
345
+ "step": 14044
346
+ },
347
+ {
348
+ "epoch": 23.0,
349
+ "eval_loss": 1.5321871042251587,
350
+ "eval_runtime": 57.0975,
351
+ "eval_samples_per_second": 130.058,
352
+ "eval_steps_per_second": 4.081,
353
+ "eval_wer": 0.8415283660225765,
354
+ "step": 14044
355
+ },
356
+ {
357
+ "epoch": 24.0,
358
+ "learning_rate": 2.1109826589595376e-05,
359
+ "loss": 0.1171,
360
+ "step": 14655
361
+ },
362
+ {
363
+ "epoch": 24.0,
364
+ "eval_loss": 1.7094522714614868,
365
+ "eval_runtime": 54.2703,
366
+ "eval_samples_per_second": 136.834,
367
+ "eval_steps_per_second": 4.293,
368
+ "eval_wer": 0.8682493228873176,
369
+ "step": 14655
370
+ },
371
+ {
372
+ "epoch": 25.0,
373
+ "learning_rate": 1.7589595375722546e-05,
374
+ "loss": 0.1079,
375
+ "step": 15265
376
+ },
377
+ {
378
+ "epoch": 25.0,
379
+ "eval_loss": 1.744529128074646,
380
+ "eval_runtime": 53.2445,
381
+ "eval_samples_per_second": 139.47,
382
+ "eval_steps_per_second": 4.376,
383
+ "eval_wer": 0.8719211822660099,
384
+ "step": 15265
385
+ },
386
+ {
387
+ "epoch": 26.0,
388
+ "learning_rate": 1.4057803468208095e-05,
389
+ "loss": 0.0996,
390
+ "step": 15876
391
+ },
392
+ {
393
+ "epoch": 26.0,
394
+ "eval_loss": 1.73218834400177,
395
+ "eval_runtime": 55.182,
396
+ "eval_samples_per_second": 134.573,
397
+ "eval_steps_per_second": 4.222,
398
+ "eval_wer": 0.8501808663406831,
399
+ "step": 15876
400
+ },
401
+ {
402
+ "epoch": 27.0,
403
+ "learning_rate": 1.0537572254335262e-05,
404
+ "loss": 0.0922,
405
+ "step": 16486
406
+ },
407
+ {
408
+ "epoch": 27.0,
409
+ "eval_loss": 1.8349395990371704,
410
+ "eval_runtime": 55.3784,
411
+ "eval_samples_per_second": 134.096,
412
+ "eval_steps_per_second": 4.207,
413
+ "eval_wer": 0.862450693472452,
414
+ "step": 16486
415
+ },
416
+ {
417
+ "epoch": 28.0,
418
+ "learning_rate": 7.00578034682081e-06,
419
+ "loss": 0.0855,
420
+ "step": 17097
421
+ },
422
+ {
423
+ "epoch": 28.0,
424
+ "eval_loss": 1.8259180784225464,
425
+ "eval_runtime": 52.9961,
426
+ "eval_samples_per_second": 140.124,
427
+ "eval_steps_per_second": 4.397,
428
+ "eval_wer": 0.8645592859869485,
429
+ "step": 17097
430
+ },
431
+ {
432
+ "epoch": 29.0,
433
+ "learning_rate": 3.473988439306359e-06,
434
+ "loss": 0.081,
435
+ "step": 17708
436
+ },
437
+ {
438
+ "epoch": 29.0,
439
+ "eval_loss": 1.818723440170288,
440
+ "eval_runtime": 53.212,
441
+ "eval_samples_per_second": 139.555,
442
+ "eval_steps_per_second": 4.379,
443
+ "eval_wer": 0.8650864341155727,
444
+ "step": 17708
445
+ },
446
+ {
447
+ "epoch": 29.97,
448
+ "learning_rate": 5.2023121387283234e-08,
449
+ "loss": 0.0771,
450
+ "step": 18300
451
+ },
452
+ {
453
+ "epoch": 29.97,
454
+ "eval_loss": 1.842650055885315,
455
+ "eval_runtime": 54.1155,
456
+ "eval_samples_per_second": 137.225,
457
+ "eval_steps_per_second": 4.306,
458
+ "eval_wer": 0.8623598058640685,
459
+ "step": 18300
460
+ },
461
+ {
462
+ "epoch": 29.97,
463
+ "step": 18300,
464
+ "total_flos": 2.0400693356246094e+21,
465
+ "train_loss": 0.372250930494298,
466
+ "train_runtime": 52675.4843,
467
+ "train_samples_per_second": 89.021,
468
+ "train_steps_per_second": 0.347
469
+ }
470
+ ],
471
+ "logging_steps": 500,
472
+ "max_steps": 18300,
473
+ "num_train_epochs": 30,
474
+ "save_steps": 500,
475
+ "total_flos": 2.0400693356246094e+21,
476
+ "trial_name": null,
477
+ "trial_params": null
478
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:875cf88512c7fb59f5e0a63eea1c3c0b801f1d554bf17923c524c6414043ca0a
3
+ size 4091
vocab.json ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<": 1,
3
+ "=": 2,
4
+ ">": 3,
5
+ "[": 4,
6
+ "[PAD]": 425,
7
+ "[UNK]": 424,
8
+ "\\": 5,
9
+ "]": 6,
10
+ "_": 7,
11
+ "`": 8,
12
+ "a": 9,
13
+ "b": 10,
14
+ "c": 11,
15
+ "d": 12,
16
+ "e": 13,
17
+ "f": 14,
18
+ "g": 15,
19
+ "h": 16,
20
+ "i": 17,
21
+ "j": 18,
22
+ "k": 19,
23
+ "l": 20,
24
+ "m": 21,
25
+ "n": 22,
26
+ "o": 23,
27
+ "p": 24,
28
+ "q": 25,
29
+ "r": 26,
30
+ "s": 27,
31
+ "t": 28,
32
+ "u": 29,
33
+ "v": 30,
34
+ "w": 31,
35
+ "x": 32,
36
+ "y": 33,
37
+ "z": 34,
38
+ "|": 0,
39
+ "~": 35,
40
+ "«": 36,
41
+ "²": 37,
42
+ "µ": 38,
43
+ "·": 39,
44
+ "»": 40,
45
+ "à": 41,
46
+ "á": 42,
47
+ "â": 43,
48
+ "ã": 44,
49
+ "ç": 45,
50
+ "è": 46,
51
+ "é": 47,
52
+ "ê": 48,
53
+ "ë": 49,
54
+ "ì": 50,
55
+ "í": 51,
56
+ "ï": 52,
57
+ "ñ": 53,
58
+ "ò": 54,
59
+ "ó": 55,
60
+ "ô": 56,
61
+ "õ": 57,
62
+ "ö": 58,
63
+ "ù": 59,
64
+ "ú": 60,
65
+ "û": 61,
66
+ "ć": 62,
67
+ "č": 63,
68
+ "ĕ": 64,
69
+ "ğ": 65,
70
+ "ĩ": 66,
71
+ "ī": 67,
72
+ "ĭ": 68,
73
+ "ĺ": 69,
74
+ "ń": 70,
75
+ "ŋ": 71,
76
+ "ŏ": 72,
77
+ "ś": 73,
78
+ "š": 74,
79
+ "ŧ": 75,
80
+ "ũ": 76,
81
+ "ŭ": 77,
82
+ "ŵ": 78,
83
+ "ƙ": 79,
84
+ "ƥ": 80,
85
+ "ƭ": 81,
86
+ "ƴ": 82,
87
+ "ǧ": 83,
88
+ "ǹ": 84,
89
+ "ɓ": 85,
90
+ "ɔ": 86,
91
+ "ɖ": 87,
92
+ "ɗ": 88,
93
+ "ɛ": 89,
94
+ "ɣ": 90,
95
+ "ɲ": 91,
96
+ "ʹ": 92,
97
+ "ʻ": 93,
98
+ "̀": 94,
99
+ "́": 95,
100
+ "̆": 96,
101
+ "̈": 97,
102
+ "̣": 98,
103
+ "έ": 99,
104
+ "γ": 100,
105
+ "ε": 101,
106
+ "ԑ": 102,
107
+ "ሀ": 103,
108
+ "ሁ": 104,
109
+ "ሂ": 105,
110
+ "ሃ": 106,
111
+ "ሄ": 107,
112
+ "ህ": 108,
113
+ "ሆ": 109,
114
+ "ለ": 110,
115
+ "ሉ": 111,
116
+ "ሊ": 112,
117
+ "ላ": 113,
118
+ "ሌ": 114,
119
+ "ል": 115,
120
+ "ሎ": 116,
121
+ "ሏ": 117,
122
+ "ሐ": 118,
123
+ "ሑ": 119,
124
+ "ሒ": 120,
125
+ "ሓ": 121,
126
+ "ሔ": 122,
127
+ "ሕ": 123,
128
+ "ሖ": 124,
129
+ "መ": 125,
130
+ "ሙ": 126,
131
+ "ሚ": 127,
132
+ "ማ": 128,
133
+ "ሜ": 129,
134
+ "ም": 130,
135
+ "ሞ": 131,
136
+ "ሟ": 132,
137
+ "ሠ": 133,
138
+ "ሡ": 134,
139
+ "ሣ": 135,
140
+ "ሥ": 136,
141
+ "ሦ": 137,
142
+ "ረ": 138,
143
+ "ሩ": 139,
144
+ "ሪ": 140,
145
+ "ራ": 141,
146
+ "ሬ": 142,
147
+ "ር": 143,
148
+ "ሮ": 144,
149
+ "ሯ": 145,
150
+ "ሰ": 146,
151
+ "ሱ": 147,
152
+ "ሲ": 148,
153
+ "ሳ": 149,
154
+ "ሴ": 150,
155
+ "ስ": 151,
156
+ "ሶ": 152,
157
+ "ሷ": 153,
158
+ "ሸ": 154,
159
+ "ሹ": 155,
160
+ "ሺ": 156,
161
+ "ሻ": 157,
162
+ "ሼ": 158,
163
+ "ሽ": 159,
164
+ "ሾ": 160,
165
+ "ሿ": 161,
166
+ "ቀ": 162,
167
+ "ቁ": 163,
168
+ "ቂ": 164,
169
+ "ቃ": 165,
170
+ "ቄ": 166,
171
+ "ቅ": 167,
172
+ "ቆ": 168,
173
+ "ቋ": 169,
174
+ "ቐ": 170,
175
+ "ቒ": 171,
176
+ "ቓ": 172,
177
+ "ቕ": 173,
178
+ "ቚ": 174,
179
+ "በ": 175,
180
+ "ቡ": 176,
181
+ "ቢ": 177,
182
+ "ባ": 178,
183
+ "ቤ": 179,
184
+ "ብ": 180,
185
+ "ቦ": 181,
186
+ "ቧ": 182,
187
+ "ቨ": 183,
188
+ "ቩ": 184,
189
+ "ቪ": 185,
190
+ "ቫ": 186,
191
+ "ቬ": 187,
192
+ "ቭ": 188,
193
+ "ቮ": 189,
194
+ "ቯ": 190,
195
+ "ተ": 191,
196
+ "ቱ": 192,
197
+ "ቲ": 193,
198
+ "ታ": 194,
199
+ "ቴ": 195,
200
+ "ት": 196,
201
+ "ቶ": 197,
202
+ "ቷ": 198,
203
+ "ቸ": 199,
204
+ "ቹ": 200,
205
+ "ቺ": 201,
206
+ "ቻ": 202,
207
+ "ቼ": 203,
208
+ "ች": 204,
209
+ "ቾ": 205,
210
+ "ቿ": 206,
211
+ "ኃ": 207,
212
+ "ኅ": 208,
213
+ "ኋ": 209,
214
+ "ነ": 210,
215
+ "ኑ": 211,
216
+ "ኒ": 212,
217
+ "ና": 213,
218
+ "ኔ": 214,
219
+ "ን": 215,
220
+ "ኖ": 216,
221
+ "ኗ": 217,
222
+ "ኘ": 218,
223
+ "ኙ": 219,
224
+ "ኚ": 220,
225
+ "ኛ": 221,
226
+ "ኜ": 222,
227
+ "ኝ": 223,
228
+ "ኞ": 224,
229
+ "ኟ": 225,
230
+ "አ": 226,
231
+ "ኡ": 227,
232
+ "ኢ": 228,
233
+ "ኣ": 229,
234
+ "ኤ": 230,
235
+ "እ": 231,
236
+ "ኦ": 232,
237
+ "ከ": 233,
238
+ "ኩ": 234,
239
+ "ኪ": 235,
240
+ "ካ": 236,
241
+ "ኬ": 237,
242
+ "ክ": 238,
243
+ "ኮ": 239,
244
+ "ኰ": 240,
245
+ "ኲ": 241,
246
+ "ኳ": 242,
247
+ "ኸ": 243,
248
+ "ኻ": 244,
249
+ "ኽ": 245,
250
+ "ኾ": 246,
251
+ "ወ": 247,
252
+ "ዉ": 248,
253
+ "ዊ": 249,
254
+ "ዋ": 250,
255
+ "ዌ": 251,
256
+ "ው": 252,
257
+ "ዎ": 253,
258
+ "ዐ": 254,
259
+ "ዑ": 255,
260
+ "ዒ": 256,
261
+ "ዓ": 257,
262
+ "ዔ": 258,
263
+ "ዕ": 259,
264
+ "ዖ": 260,
265
+ "ዘ": 261,
266
+ "ዙ": 262,
267
+ "ዚ": 263,
268
+ "ዛ": 264,
269
+ "ዜ": 265,
270
+ "ዝ": 266,
271
+ "ዞ": 267,
272
+ "ዟ": 268,
273
+ "ዠ": 269,
274
+ "ዡ": 270,
275
+ "ዢ": 271,
276
+ "ዣ": 272,
277
+ "ዤ": 273,
278
+ "ዥ": 274,
279
+ "ዦ": 275,
280
+ "ዧ": 276,
281
+ "የ": 277,
282
+ "ዩ": 278,
283
+ "ዪ": 279,
284
+ "ያ": 280,
285
+ "ዬ": 281,
286
+ "ይ": 282,
287
+ "ዮ": 283,
288
+ "ደ": 284,
289
+ "ዱ": 285,
290
+ "ዲ": 286,
291
+ "ዳ": 287,
292
+ "ዴ": 288,
293
+ "ድ": 289,
294
+ "ዶ": 290,
295
+ "ዷ": 291,
296
+ "ጀ": 292,
297
+ "ጁ": 293,
298
+ "ጂ": 294,
299
+ "ጃ": 295,
300
+ "ጄ": 296,
301
+ "ጅ": 297,
302
+ "ጆ": 298,
303
+ "ጇ": 299,
304
+ "ገ": 300,
305
+ "ጉ": 301,
306
+ "ጊ": 302,
307
+ "ጋ": 303,
308
+ "ጌ": 304,
309
+ "ግ": 305,
310
+ "ጎ": 306,
311
+ "ጐ": 307,
312
+ "ጓ": 308,
313
+ "ጔ": 309,
314
+ "ጠ": 310,
315
+ "ጡ": 311,
316
+ "ጢ": 312,
317
+ "ጣ": 313,
318
+ "ጤ": 314,
319
+ "ጥ": 315,
320
+ "ጦ": 316,
321
+ "ጧ": 317,
322
+ "ጨ": 318,
323
+ "ጩ": 319,
324
+ "ጪ": 320,
325
+ "ጫ": 321,
326
+ "ጬ": 322,
327
+ "ጭ": 323,
328
+ "ጮ": 324,
329
+ "ጯ": 325,
330
+ "ጰ": 326,
331
+ "ጱ": 327,
332
+ "ጲ": 328,
333
+ "ጳ": 329,
334
+ "ጴ": 330,
335
+ "ጵ": 331,
336
+ "ጶ": 332,
337
+ "ጸ": 333,
338
+ "ጹ": 334,
339
+ "ጺ": 335,
340
+ "ጻ": 336,
341
+ "ጼ": 337,
342
+ "ጽ": 338,
343
+ "ጾ": 339,
344
+ "ጿ": 340,
345
+ "ፀ": 341,
346
+ "ፁ": 342,
347
+ "ፃ": 343,
348
+ "ፅ": 344,
349
+ "ፈ": 345,
350
+ "ፉ": 346,
351
+ "ፊ": 347,
352
+ "ፋ": 348,
353
+ "ፌ": 349,
354
+ "ፍ": 350,
355
+ "ፎ": 351,
356
+ "ፏ": 352,
357
+ "ፐ": 353,
358
+ "ፑ": 354,
359
+ "ፒ": 355,
360
+ "ፓ": 356,
361
+ "ፔ": 357,
362
+ "ፕ": 358,
363
+ "ፖ": 359,
364
+ "፡": 360,
365
+ "።": 361,
366
+ "፣": 362,
367
+ "፤": 363,
368
+ "ḅ": 364,
369
+ "ḍ": 365,
370
+ "ḓ": 366,
371
+ "ḥ": 367,
372
+ "ḷ": 368,
373
+ "ḽ": 369,
374
+ "ṅ": 370,
375
+ "ṋ": 371,
376
+ "ṕ": 372,
377
+ "ṛ": 373,
378
+ "ṣ": 374,
379
+ "ṭ": 375,
380
+ "ṱ": 376,
381
+ "ẃ": 377,
382
+ "ẓ": 378,
383
+ "ạ": 379,
384
+ "ẹ": 380,
385
+ "ị": 381,
386
+ "ọ": 382,
387
+ "ụ": 383,
388
+ "ὲ": 384,
389
+ "–": 385,
390
+ "—": 386,
391
+ "’": 387,
392
+ "‟": 388,
393
+ "•": 389,
394
+ "…": 390,
395
+ "‽": 391,
396
+ "ⴰ": 392,
397
+ "ⴱ": 393,
398
+ "ⴳ": 394,
399
+ "ⴷ": 395,
400
+ "ⴹ": 396,
401
+ "ⴻ": 397,
402
+ "ⴼ": 398,
403
+ "ⴽ": 399,
404
+ "ⵀ": 400,
405
+ "ⵃ": 401,
406
+ "ⵄ": 402,
407
+ "ⵅ": 403,
408
+ "ⵇ": 404,
409
+ "ⵉ": 405,
410
+ "ⵊ": 406,
411
+ "ⵍ": 407,
412
+ "ⵎ": 408,
413
+ "ⵏ": 409,
414
+ "ⵓ": 410,
415
+ "ⵔ": 411,
416
+ "ⵕ": 412,
417
+ "ⵖ": 413,
418
+ "ⵙ": 414,
419
+ "ⵚ": 415,
420
+ "ⵛ": 416,
421
+ "ⵜ": 417,
422
+ "ⵟ": 418,
423
+ "ⵡ": 419,
424
+ "ⵢ": 420,
425
+ "ⵣ": 421,
426
+ "ⵥ": 422,
427
+ "ⵯ": 423
428
+ }