diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..e0530df6571a8993da0effea44f4f5b6207ef56c --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,4 @@ +{ + "": 67, + "": 66 +} diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..b066569bc74ac37bfe36710a9b5262b04fd2bbe2 --- /dev/null +++ b/config.json @@ -0,0 +1,81 @@ +{ + "activation_dropout": 0.0, + "adapter_act": "relu", + "adapter_kernel_size": 3, + "adapter_stride": 2, + "add_adapter": true, + "apply_spec_augment": false, + "architectures": [ + "Wav2Vec2BertForCTC" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 768, + "codevector_dim": 768, + "conformer_conv_dropout": 0.1, + "contrastive_logits_temperature": 0.1, + "conv_depthwise_kernel_size": 31, + "ctc_loss_reduction": "mean", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "eos_token_id": 2, + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "feature_projection_input_dim": 160, + "final_dropout": 0.1, + "hidden_act": "swish", + "hidden_dropout": 0.1, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 4096, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "left_max_position_embeddings": 64, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_source_positions": 5000, + "model_type": "wav2vec2-bert", + "num_adapter_layers": 1, + "num_attention_heads": 16, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_hidden_layers": 24, + "num_negatives": 100, + "output_hidden_size": 1024, + "pad_token_id": 65, + "position_embeddings_type": "relative_key", + "proj_codevector_dim": 768, + "right_max_position_embeddings": 8, + "rotary_embedding_base": 10000, + "tdnn_dilation": [ + 1, + 2, + 3, + 1, + 1 + ], + "tdnn_dim": [ + 512, + 512, + 512, + 512, + 1500 + ], + "tdnn_kernel": [ + 5, + 3, + 3, + 1, + 1 + ], + "torch_dtype": "float32", + "transformers_version": "4.54.0.dev0", + "use_intermediate_ffn_before_adapter": false, + "use_weighted_layer_sum": false, + "vocab_size": 68, + "xvector_output_dim": 512 +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..830f48dd7f49f38e01899b7945d7ca2423b8f872 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a546e2452229e8a8520f15de92500004f80193c7fe8a7d535968cd39d20c3e2e +size 2423097560 diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5db61951cdf5edab6337fd84ee619500c27aaa3d --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,11 @@ +{ + "feature_extractor_type": "SeamlessM4TFeatureExtractor", + "feature_size": 80, + "num_mel_bins": 80, + "padding_side": "right", + "padding_value": 1, + "processor_class": "Wav2Vec2BertProcessor", + "return_attention_mask": true, + "sampling_rate": 16000, + "stride": 2 +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1c2a0360e58a815a5084fdc610415d9dd8d81381 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "[PAD]", + "unk_token": "[UNK]" +} diff --git a/test/data-00000-of-00001.arrow b/test/data-00000-of-00001.arrow new file mode 100644 index 0000000000000000000000000000000000000000..3a90c8f5300c7af227b2b740bdc3a92f80604c6a --- /dev/null +++ b/test/data-00000-of-00001.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21e50f8beb3ba74c924938ef4b875560557759704abe4d4b3325e84d9683121a +size 465901864 diff --git a/test/dataset_info.json b/test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..a6c4cf3c08888cac5ff7727f7c560e66a4069ba5 --- /dev/null +++ b/test/dataset_info.json @@ -0,0 +1,15 @@ +{ + "citation": "", + "description": "", + "features": { + "AUDIO_PATH": { + "_type": "Audio" + }, + "Transcription": { + "dtype": "string", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/test/state.json b/test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..b93b58154de47eb26b2bc703c28af3d89ea4cebe --- /dev/null +++ b/test/state.json @@ -0,0 +1,13 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "5e34afa7c5ea9c0e", + "_format_columns": null, + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5f37edaa1c7dd7d210c30a48a2503ca443822f8a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,49 @@ +{ + "added_tokens_decoder": { + "64": { + "content": "[UNK]", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "65": { + "content": "[PAD]", + "lstrip": true, + "normalized": false, + "rstrip": true, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "", + "extra_special_tokens": {}, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "[PAD]", + "processor_class": "Wav2Vec2BertProcessor", + "replace_word_delimiter_char": " ", + "target_lang": null, + "tokenizer_class": "Wav2Vec2CTCTokenizer", + "unk_token": "[UNK]", + "word_delimiter_token": "|" +} diff --git a/train/data-00000-of-00059.arrow b/train/data-00000-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..09725e3cc157502f1188e8743c6b84ef5b961e1b --- /dev/null +++ b/train/data-00000-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5301c32fea0ad0d512ee9e3fc8b32d28f7d9ad2f39db32aa7bd19f3e8ff27a61 +size 499476440 diff --git a/train/data-00001-of-00059.arrow b/train/data-00001-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..464abb50d058e5a564ece3150e7727bd25659e76 --- /dev/null +++ b/train/data-00001-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09aa9f7814bf8e8eb8e7a9b40642b07f91863c1501ac53cd7f6135d20ad57b81 +size 496464632 diff --git a/train/data-00002-of-00059.arrow b/train/data-00002-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..44cbd82f5658eaca8ea1aec0285541082dd8f5aa --- /dev/null +++ b/train/data-00002-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:414a83c6050af866ca5ef6b248985c9ebb234d0faa1617551104157797389005 +size 495705040 diff --git a/train/data-00003-of-00059.arrow b/train/data-00003-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..5405e8594e8b973503e901323c3873a6a417a08c --- /dev/null +++ b/train/data-00003-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3413e8d190799d4329100710d5af03dd33bab4b4e7d73d1973cb585ba6839b59 +size 498811536 diff --git a/train/data-00004-of-00059.arrow b/train/data-00004-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..13a098b17b6784c156f769a2eb7e4e8ad4eb40b1 --- /dev/null +++ b/train/data-00004-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d592cad0f92fdc43f4aa258d37bc33be902d5b410ed79579aa8cca222cfda5fc +size 494294328 diff --git a/train/data-00005-of-00059.arrow b/train/data-00005-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..2a68d6bf6596a7dfdbf7f9cc12c7c3e8c21ef8f8 --- /dev/null +++ b/train/data-00005-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6da3990eac54f0ad866f725742b952185ce39affdb71929c06ef0a9253684f3c +size 502141568 diff --git a/train/data-00006-of-00059.arrow b/train/data-00006-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..65d211dff1f1724b8ed9b268055d864238f6f10e --- /dev/null +++ b/train/data-00006-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c2530fa74429d2f447a3b4ba8c4f46a9fadaaaeee9c15b5e1cfb52f544171c6 +size 497870872 diff --git a/train/data-00007-of-00059.arrow b/train/data-00007-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..553eb621d071df82320885a44e3fbc90c7f0c995 --- /dev/null +++ b/train/data-00007-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3849ec812663b9ebb60d9128daeabd1bb3b5b90e3a958ab49dce73cbf22a39e8 +size 499110264 diff --git a/train/data-00008-of-00059.arrow b/train/data-00008-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..f045ad37ad42b5ced645e6315621a9771199db2d --- /dev/null +++ b/train/data-00008-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a109ea2f4912f31c5c1eb471098e571aab72c3b1f5175d24cc141bbaf09bced +size 502462968 diff --git a/train/data-00009-of-00059.arrow b/train/data-00009-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..9d5dfa68b97e9f410223ba3563c14eeab9231f9a --- /dev/null +++ b/train/data-00009-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28495c8eb8a6a1f9ef5874a6099b4ae54c8e264208aefd56b454a655259a37ef +size 504714080 diff --git a/train/data-00010-of-00059.arrow b/train/data-00010-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..d985630f51de487ea4021e09f66d63906ba7259f --- /dev/null +++ b/train/data-00010-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d47ce92091e6dd5de9f74b7dd6294cb70a96bc2061b25ab28372a909d6c506e +size 494345520 diff --git a/train/data-00011-of-00059.arrow b/train/data-00011-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..d5c5361af275241353a7bd01872f6bf85ed30caf --- /dev/null +++ b/train/data-00011-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3dd488eb143a1998800d6706123f66a31a00fe4cf479c1dd79b485baf245894 +size 495622816 diff --git a/train/data-00012-of-00059.arrow b/train/data-00012-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..bcb3e923af87c4a013126c3d959cd7e1a9ba31b9 --- /dev/null +++ b/train/data-00012-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e058f7d8553a0894917cc4bce8627ce9c7fec576b54fdd03864459f7cf1a38f9 +size 500819816 diff --git a/train/data-00013-of-00059.arrow b/train/data-00013-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..a4f2a94f180ef92339c49caa58f82d0f98e0a21c --- /dev/null +++ b/train/data-00013-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac4ced09e1ff863a29aefb1122cd38793f431c31d4eac6b107559eb3eb115199 +size 501582080 diff --git a/train/data-00014-of-00059.arrow b/train/data-00014-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..7306887b542bc674b994dbcaf8207be9376f8d36 --- /dev/null +++ b/train/data-00014-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ae3963c41fd183bfc21eacea4c728fae398dda5139e4ba0e51b742c3f4be9f2 +size 501837888 diff --git a/train/data-00015-of-00059.arrow b/train/data-00015-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..a9a29ef36226e7cfe8275ec0115f0e05da3beacf --- /dev/null +++ b/train/data-00015-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68f0fac19dee05f65c566a7e07a90e4a0557a3337baa0bd5189d8f1da0defb45 +size 492637736 diff --git a/train/data-00016-of-00059.arrow b/train/data-00016-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..a2d97838498c9dc2b543c267ddfaf3dabf508239 --- /dev/null +++ b/train/data-00016-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10b18484ca230d6521b6f6ca720f38ed65c4ef7799c11f843f6de9afc30c85ce +size 498657496 diff --git a/train/data-00017-of-00059.arrow b/train/data-00017-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..614d9942383e5cca7e8e18b2b138fe582338e1c8 --- /dev/null +++ b/train/data-00017-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d5a5d7ea4f42268a815049315fe5f0113168dee3678002f47c654b2fd5724c +size 495965032 diff --git a/train/data-00018-of-00059.arrow b/train/data-00018-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..40ef391e8ad9d3b428ac093a2b6d020ef34ab3d2 --- /dev/null +++ b/train/data-00018-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f0a34f9061bc9f87d6edd3cf71edb461abcd2da4fc605d98b4db32d86e0e37f +size 499673616 diff --git a/train/data-00019-of-00059.arrow b/train/data-00019-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..e87f11548dd2bc128c3a2667d55c6b97ec701055 --- /dev/null +++ b/train/data-00019-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c596801a618f9760730ff1d076eef99a61c94207f9a71f635a1b58d25ed68b1 +size 498442192 diff --git a/train/data-00020-of-00059.arrow b/train/data-00020-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..4a403f550cf87f0f24f0ad30735500db507e101e --- /dev/null +++ b/train/data-00020-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a61d581621dc5469f6b29c938115cc87112c78712bd96c481cef3f165625339 +size 500413040 diff --git a/train/data-00021-of-00059.arrow b/train/data-00021-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..ee9fc78e42affb7ae381b90837184f27b0379182 --- /dev/null +++ b/train/data-00021-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d527a0cf45e27800d2780208edb4cefd21c8fdaf2e9abc1881944382e7e8081e +size 500106752 diff --git a/train/data-00022-of-00059.arrow b/train/data-00022-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..c898a58d1744ac29a4be4e2dd767c6d5532e0e2b --- /dev/null +++ b/train/data-00022-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19db9dc7e7259e06c79c18a56918c4cdfe8b57ad601009859294ff50cded54d0 +size 495207072 diff --git a/train/data-00023-of-00059.arrow b/train/data-00023-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..d4a68c81e57b6fdb07350fcf96a54ef7b87b5a6a --- /dev/null +++ b/train/data-00023-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1048abbacee022f4007be0c19151a3087230642672e4a52b800b094e99612af +size 505953520 diff --git a/train/data-00024-of-00059.arrow b/train/data-00024-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..502396ed6ee178cdc06776af5833f3c686cd2fde --- /dev/null +++ b/train/data-00024-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63b3b5c1726c1d8037bab544494be832285788cbc19ca26c9175f5d9112e1c4a +size 501308352 diff --git a/train/data-00025-of-00059.arrow b/train/data-00025-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..f98ddd8c24268ade1f1b2942ee43827e0b41da31 --- /dev/null +++ b/train/data-00025-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08d3c90918e6e1fe327a932ca20cdc884d31ecf9f5ec724383888dce4abcb132 +size 497351976 diff --git a/train/data-00026-of-00059.arrow b/train/data-00026-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..dfd2f0a6f4d1fc942723afa71954d2129e82ac13 --- /dev/null +++ b/train/data-00026-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616b504fc7114fb1e6f811500e467c83b7dd808c144acad7a43280cf1c89f5df +size 499477200 diff --git a/train/data-00027-of-00059.arrow b/train/data-00027-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..f309fc2a33d8f45250fd82445a1d8cba2cd3dfcf --- /dev/null +++ b/train/data-00027-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:236fe7dd10e84bde5e3424c6832dae90e5bd1b2ec223bdb611f8e0d5e74a78ab +size 499893624 diff --git a/train/data-00028-of-00059.arrow b/train/data-00028-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..f81869fd04a304d88ddc784b9b48281589c0a621 --- /dev/null +++ b/train/data-00028-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:384fa86a5831d8908f25ef06bcdb63fd2ed532d961e06ae086b88ae52f58182e +size 496302176 diff --git a/train/data-00029-of-00059.arrow b/train/data-00029-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..b0612ff9dd3fc0d13da65e5da0dddf238dbd69e0 --- /dev/null +++ b/train/data-00029-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dd041857551fc708ceadf7ee21540d4bf5ad7a99f50fcf959dea5bdead27388 +size 495026976 diff --git a/train/data-00030-of-00059.arrow b/train/data-00030-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..ccc63e4443216e57966660a85bced3dbf47c793f --- /dev/null +++ b/train/data-00030-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0aa1ece5cea1f6d57e20aeb11c08ff2ec315787c6ac04e3c90fea3c4a4864f4 +size 498280440 diff --git a/train/data-00031-of-00059.arrow b/train/data-00031-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..dcd6eff3dd05d9f2ff8dad896ad2f98abb7f93e9 --- /dev/null +++ b/train/data-00031-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e335fe6b8fd08841d476f553756e1a9c8063d433a1f8afa79e082a0c29f9f668 +size 498250456 diff --git a/train/data-00032-of-00059.arrow b/train/data-00032-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..117a784d9dd504d21fd4f501ed21709378830c65 --- /dev/null +++ b/train/data-00032-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eeb761313ae51b4eb14761786f10a74014ff6d532a0c4071b4aa70ddebc0326 +size 498291920 diff --git a/train/data-00033-of-00059.arrow b/train/data-00033-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..262ab90c9bd5bbf858d0e92da6cc05d33e8a8187 --- /dev/null +++ b/train/data-00033-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbd5bbabdff360b77504f74528e4041f15a65bc38d545cf83cf9580dbe56e481 +size 501878328 diff --git a/train/data-00034-of-00059.arrow b/train/data-00034-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..0c037f1e8fd9b039ddf3efaca9a40568bc1b9c7c --- /dev/null +++ b/train/data-00034-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a162d73ba36dfd31cb57fb2b9f718a60bf47c060e5559ea11e2521092d7ffad +size 497207600 diff --git a/train/data-00035-of-00059.arrow b/train/data-00035-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..b4eb0efbbed0f0206d0f55a772e294ee658ef517 --- /dev/null +++ b/train/data-00035-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:844906124ac43108015a011d09054c889064616898c52c09b05823e3f0d36c95 +size 497709968 diff --git a/train/data-00036-of-00059.arrow b/train/data-00036-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..6911e6ba07495ada42e988062ab2323955dfb788 --- /dev/null +++ b/train/data-00036-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74533d18ff784cecb24760266dc39c8abd5070e284d425122127daf5b1e7f9f7 +size 503447320 diff --git a/train/data-00037-of-00059.arrow b/train/data-00037-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..7eb92499213c1b8e891f468f26efbbd3f78a7415 --- /dev/null +++ b/train/data-00037-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efbf86dfa5aff8ce93bc8a9296b56059c19bc76f182494a90d65c49568404ef3 +size 492689072 diff --git a/train/data-00038-of-00059.arrow b/train/data-00038-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..3f535c5bd35246044fd4854c1d908bf735b6cee0 --- /dev/null +++ b/train/data-00038-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41b447795b2fa5e961c1e23039533fa671470d50a0d5aeedaa3523fc487b7d56 +size 497066648 diff --git a/train/data-00039-of-00059.arrow b/train/data-00039-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..14ddbc1efa5a28ac6f9017a85c683be9e443b8e4 --- /dev/null +++ b/train/data-00039-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e20c9d75433fffe6f5c6233f34af406c368a8efd91b44de612c6c953f999f88 +size 499872648 diff --git a/train/data-00040-of-00059.arrow b/train/data-00040-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..8623a654b8ca2c4934123fd7a9af8594bc2455ea --- /dev/null +++ b/train/data-00040-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a37230b2635e5c29ab253e0b77be11fd092ac6a3833435f33fb1b9fafa359cf +size 494551952 diff --git a/train/data-00041-of-00059.arrow b/train/data-00041-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..dde6ca7c800ec77f8569ccbd72ba1366d19252db --- /dev/null +++ b/train/data-00041-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fb78a58274a8c91dcec24c98a487e95dd49b75891b1747e06b5ee258da476dd +size 498948984 diff --git a/train/data-00042-of-00059.arrow b/train/data-00042-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..9884480d4bac919de38671079dcd5202a95d6bc2 --- /dev/null +++ b/train/data-00042-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75261c909a8c4eb55ef20f6be37d46e668ae7ce776d0c44097dd5a4b0e93137b +size 495546064 diff --git a/train/data-00043-of-00059.arrow b/train/data-00043-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..2d21454ae7c4bde6e27b4d89eeb77ca35a80102e --- /dev/null +++ b/train/data-00043-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b8c5aaad3782920f70283a573e402832fa9bb51e27c426cc74c4263f5243d15 +size 492811208 diff --git a/train/data-00044-of-00059.arrow b/train/data-00044-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..60964c4593a4aeabb720edfb31c6cdef588eb812 --- /dev/null +++ b/train/data-00044-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97d3977f30a8b89ecd576a25b01b04d71839a20e0e6df8400df7fbb9ec3b3bdd +size 489310648 diff --git a/train/data-00045-of-00059.arrow b/train/data-00045-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..e2a041d3bd0f9f6695e0594a8f694791509ee92c --- /dev/null +++ b/train/data-00045-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd42f4163ad1825b166fd066345a96ab855a08e3a3d704c0d97b5e3f09b1ea1 +size 496185560 diff --git a/train/data-00046-of-00059.arrow b/train/data-00046-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..8688dfc3bed143db17ad26bc6973e99da36211f6 --- /dev/null +++ b/train/data-00046-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac80d08b38ff96a65177fadb4ae958fe6bb9a153da67b64c8c728cca8a1072ed +size 501684872 diff --git a/train/data-00047-of-00059.arrow b/train/data-00047-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..842761c269ce86081b6048b4cf4e7450582d9cb4 --- /dev/null +++ b/train/data-00047-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c05cd02717be9e2bb47d74ab575c5913073396e3f0fdeafe1be2299586a9301d +size 497649368 diff --git a/train/data-00048-of-00059.arrow b/train/data-00048-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..249531e78967568f3636ed3036237b50190a20e1 --- /dev/null +++ b/train/data-00048-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1086bbfd03349858cef5f6be15a594b5eb73c1e5025700721495ee04d99e7df8 +size 496714920 diff --git a/train/data-00049-of-00059.arrow b/train/data-00049-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..739c836f5c6c690968035d76b25d2cd2fdf675da --- /dev/null +++ b/train/data-00049-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2106f88304e1d7f66c6d1319aac3aa333ddfc0e5341eb46f4987eab6855a9afd +size 500526144 diff --git a/train/data-00050-of-00059.arrow b/train/data-00050-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..afd236fa762a25ff90c7a9d41b3adef67056c1ca --- /dev/null +++ b/train/data-00050-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20192943833446eac295f609f126a107c1f965a64d12835a3b0baae9dd677c5 +size 494629648 diff --git a/train/data-00051-of-00059.arrow b/train/data-00051-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..0a94a934dc8c2eca32aba0eeb510e7c0d0da85a4 --- /dev/null +++ b/train/data-00051-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e94a949d0e97d914b170486de91e6d2304087a58a0a37e1b93da509e3445db5 +size 503205008 diff --git a/train/data-00052-of-00059.arrow b/train/data-00052-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..b7ac33d0c1325eea06472d92d49989c6b1371903 --- /dev/null +++ b/train/data-00052-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cf93a8f67328b3840a3a96c1fff986a359aabcbf963a97434dc6030069993c1 +size 490864112 diff --git a/train/data-00053-of-00059.arrow b/train/data-00053-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..479c0b3ec986f7d1e45761be94f3489e740461c6 --- /dev/null +++ b/train/data-00053-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f2fac845fdffa2e29841a018087b72d206ff74ebd6fb917b22cfb48cb9d2f3b +size 501374920 diff --git a/train/data-00054-of-00059.arrow b/train/data-00054-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..a12ea24b6f06e3fefbe0b1c52450307cb4373ecd --- /dev/null +++ b/train/data-00054-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b913b0b445260c33bb74ff0ca68ce592e28a9a922a70cb2c8b8f89772edd234 +size 490113472 diff --git a/train/data-00055-of-00059.arrow b/train/data-00055-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..acd77b02e32717f62a3ef7dc9c707f309fccc031 --- /dev/null +++ b/train/data-00055-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:475076598bc40a143a59a1a6c87fe0b1580e3f1edaff3998e187c3e63e97432f +size 496596224 diff --git a/train/data-00056-of-00059.arrow b/train/data-00056-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..55b9cc029932aca281c7906a4f5c52debb5244d4 --- /dev/null +++ b/train/data-00056-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26441574f98104fc64f991b7914cc150211719a20e4a7b4ba4ae216566b20125 +size 498291928 diff --git a/train/data-00057-of-00059.arrow b/train/data-00057-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..0257a42a5d5dbac1bec47ae35778addaf597975f --- /dev/null +++ b/train/data-00057-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9161636d8058cf6619c9d772c91f7e3539a39c644968717f5833830aaf978001 +size 486332040 diff --git a/train/data-00058-of-00059.arrow b/train/data-00058-of-00059.arrow new file mode 100644 index 0000000000000000000000000000000000000000..09b39eb262f891050a5705c2167d9d1f59923b2a --- /dev/null +++ b/train/data-00058-of-00059.arrow @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64433d21c912bdd81460a43773210d0827a47d968430c7c8a97f7dbed8d334c7 +size 492097176 diff --git a/train/dataset_info.json b/train/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..a6c4cf3c08888cac5ff7727f7c560e66a4069ba5 --- /dev/null +++ b/train/dataset_info.json @@ -0,0 +1,15 @@ +{ + "citation": "", + "description": "", + "features": { + "AUDIO_PATH": { + "_type": "Audio" + }, + "Transcription": { + "dtype": "string", + "_type": "Value" + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/train/state.json b/train/state.json new file mode 100644 index 0000000000000000000000000000000000000000..f91da6d3fe75ebfd30de4bf35b9d4d944dc1d9c5 --- /dev/null +++ b/train/state.json @@ -0,0 +1,190 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00059.arrow" + }, + { + "filename": "data-00001-of-00059.arrow" + }, + { + "filename": "data-00002-of-00059.arrow" + }, + { + "filename": "data-00003-of-00059.arrow" + }, + { + "filename": "data-00004-of-00059.arrow" + }, + { + "filename": "data-00005-of-00059.arrow" + }, + { + "filename": "data-00006-of-00059.arrow" + }, + { + "filename": "data-00007-of-00059.arrow" + }, + { + "filename": "data-00008-of-00059.arrow" + }, + { + "filename": "data-00009-of-00059.arrow" + }, + { + "filename": "data-00010-of-00059.arrow" + }, + { + "filename": "data-00011-of-00059.arrow" + }, + { + "filename": "data-00012-of-00059.arrow" + }, + { + "filename": "data-00013-of-00059.arrow" + }, + { + "filename": "data-00014-of-00059.arrow" + }, + { + "filename": "data-00015-of-00059.arrow" + }, + { + "filename": "data-00016-of-00059.arrow" + }, + { + "filename": "data-00017-of-00059.arrow" + }, + { + "filename": "data-00018-of-00059.arrow" + }, + { + "filename": "data-00019-of-00059.arrow" + }, + { + "filename": "data-00020-of-00059.arrow" + }, + { + "filename": "data-00021-of-00059.arrow" + }, + { + "filename": "data-00022-of-00059.arrow" + }, + { + "filename": "data-00023-of-00059.arrow" + }, + { + "filename": "data-00024-of-00059.arrow" + }, + { + "filename": "data-00025-of-00059.arrow" + }, + { + "filename": "data-00026-of-00059.arrow" + }, + { + "filename": "data-00027-of-00059.arrow" + }, + { + "filename": "data-00028-of-00059.arrow" + }, + { + "filename": "data-00029-of-00059.arrow" + }, + { + "filename": "data-00030-of-00059.arrow" + }, + { + "filename": "data-00031-of-00059.arrow" + }, + { + "filename": "data-00032-of-00059.arrow" + }, + { + "filename": "data-00033-of-00059.arrow" + }, + { + "filename": "data-00034-of-00059.arrow" + }, + { + "filename": "data-00035-of-00059.arrow" + }, + { + "filename": "data-00036-of-00059.arrow" + }, + { + "filename": "data-00037-of-00059.arrow" + }, + { + "filename": "data-00038-of-00059.arrow" + }, + { + "filename": "data-00039-of-00059.arrow" + }, + { + "filename": "data-00040-of-00059.arrow" + }, + { + "filename": "data-00041-of-00059.arrow" + }, + { + "filename": "data-00042-of-00059.arrow" + }, + { + "filename": "data-00043-of-00059.arrow" + }, + { + "filename": "data-00044-of-00059.arrow" + }, + { + "filename": "data-00045-of-00059.arrow" + }, + { + "filename": "data-00046-of-00059.arrow" + }, + { + "filename": "data-00047-of-00059.arrow" + }, + { + "filename": "data-00048-of-00059.arrow" + }, + { + "filename": "data-00049-of-00059.arrow" + }, + { + "filename": "data-00050-of-00059.arrow" + }, + { + "filename": "data-00051-of-00059.arrow" + }, + { + "filename": "data-00052-of-00059.arrow" + }, + { + "filename": "data-00053-of-00059.arrow" + }, + { + "filename": "data-00054-of-00059.arrow" + }, + { + "filename": "data-00055-of-00059.arrow" + }, + { + "filename": "data-00056-of-00059.arrow" + }, + { + "filename": "data-00057-of-00059.arrow" + }, + { + "filename": "data-00058-of-00059.arrow" + } + ], + "_fingerprint": "68f4876cf58bb9b8", + "_format_columns": [ + "AUDIO_PATH", + "Transcription" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee559cd14a4c0fd6289fb2e438acfdc37a7665a0 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9488ba2fe11533af426f027a38116045effc26bfd2d552120b07ec5090636653 +size 5713 diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000000000000000000000000000000000000..d9e24e4d7f3c69e65f25ca19a4860c9af939f445 --- /dev/null +++ b/vocab.json @@ -0,0 +1,68 @@ +{ + "0": 1, + "1": 2, + "9": 3, + "[PAD]": 65, + "[UNK]": 64, + "a": 4, + "b": 5, + "c": 6, + "d": 7, + "e": 8, + "f": 9, + "g": 10, + "h": 11, + "i": 12, + "j": 13, + "k": 14, + "l": 15, + "m": 16, + "n": 17, + "o": 18, + "p": 19, + "q": 20, + "r": 21, + "s": 22, + "t": 23, + "u": 24, + "v": 25, + "w": 26, + "x": 27, + "y": 28, + "z": 29, + "|": 0, + "~": 30, + "à": 31, + "á": 32, + "ã": 33, + "è": 34, + "é": 35, + "ì": 36, + "í": 37, + "ò": 38, + "ó": 39, + "õ": 40, + "ù": 41, + "ú": 42, + "ā": 43, + "ē": 44, + "ĩ": 45, + "ŋ": 46, + "ũ": 47, + "ū": 48, + "ƒ": 49, + "ɔ": 50, + "ɖ": 51, + "ɛ": 52, + "ɣ": 53, + "ʋ": 54, + "̀": 55, + "́": 56, + "̃": 57, + "̄": 58, + "ͻ": 59, + "ε": 60, + "ѐ": 61, + "ẽ": 62, + "ὸ": 63 +}