marofmar commited on
Commit ยท
38881ff
1
Parent(s): 374d523
First version of the your-model-name model and tokenizer.
Browse files- config.json +76 -0
- preprocessor_config.json +9 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
config.json
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "facebook/wav2vec2-large-xlsr-53",
|
| 3 |
+
"activation_dropout": 0.0,
|
| 4 |
+
"apply_spec_augment": true,
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Wav2Vec2ForCTC"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.1,
|
| 9 |
+
"bos_token_id": 1,
|
| 10 |
+
"conv_bias": true,
|
| 11 |
+
"conv_dim": [
|
| 12 |
+
512,
|
| 13 |
+
512,
|
| 14 |
+
512,
|
| 15 |
+
512,
|
| 16 |
+
512,
|
| 17 |
+
512,
|
| 18 |
+
512
|
| 19 |
+
],
|
| 20 |
+
"conv_kernel": [
|
| 21 |
+
10,
|
| 22 |
+
3,
|
| 23 |
+
3,
|
| 24 |
+
3,
|
| 25 |
+
3,
|
| 26 |
+
2,
|
| 27 |
+
2
|
| 28 |
+
],
|
| 29 |
+
"conv_stride": [
|
| 30 |
+
5,
|
| 31 |
+
2,
|
| 32 |
+
2,
|
| 33 |
+
2,
|
| 34 |
+
2,
|
| 35 |
+
2,
|
| 36 |
+
2
|
| 37 |
+
],
|
| 38 |
+
"ctc_loss_reduction": "mean",
|
| 39 |
+
"ctc_zero_infinity": false,
|
| 40 |
+
"do_stable_layer_norm": true,
|
| 41 |
+
"eos_token_id": 2,
|
| 42 |
+
"feat_extract_activation": "gelu",
|
| 43 |
+
"feat_extract_dropout": 0.0,
|
| 44 |
+
"feat_extract_norm": "layer",
|
| 45 |
+
"feat_proj_dropout": 0.0,
|
| 46 |
+
"final_dropout": 0.0,
|
| 47 |
+
"gradient_checkpointing": true,
|
| 48 |
+
"hidden_act": "gelu",
|
| 49 |
+
"hidden_dropout": 0.1,
|
| 50 |
+
"hidden_size": 1024,
|
| 51 |
+
"initializer_range": 0.02,
|
| 52 |
+
"intermediate_size": 4096,
|
| 53 |
+
"layer_norm_eps": 1e-05,
|
| 54 |
+
"layerdrop": 0.1,
|
| 55 |
+
"mask_channel_length": 10,
|
| 56 |
+
"mask_channel_min_space": 1,
|
| 57 |
+
"mask_channel_other": 0.0,
|
| 58 |
+
"mask_channel_prob": 0.0,
|
| 59 |
+
"mask_channel_selection": "static",
|
| 60 |
+
"mask_feature_length": 10,
|
| 61 |
+
"mask_feature_prob": 0.0,
|
| 62 |
+
"mask_time_length": 10,
|
| 63 |
+
"mask_time_min_space": 1,
|
| 64 |
+
"mask_time_other": 0.0,
|
| 65 |
+
"mask_time_prob": 0.05,
|
| 66 |
+
"mask_time_selection": "static",
|
| 67 |
+
"model_type": "wav2vec2",
|
| 68 |
+
"num_attention_heads": 16,
|
| 69 |
+
"num_conv_pos_embedding_groups": 16,
|
| 70 |
+
"num_conv_pos_embeddings": 128,
|
| 71 |
+
"num_feat_extract_layers": 7,
|
| 72 |
+
"num_hidden_layers": 24,
|
| 73 |
+
"pad_token_id": 259,
|
| 74 |
+
"transformers_version": "4.6.0.dev0",
|
| 75 |
+
"vocab_size": 260
|
| 76 |
+
}
|
preprocessor_config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_normalize": true,
|
| 3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
| 4 |
+
"feature_size": 1,
|
| 5 |
+
"padding_side": "right",
|
| 6 |
+
"padding_value": 0.0,
|
| 7 |
+
"return_attention_mask": true,
|
| 8 |
+
"sampling_rate": 16000
|
| 9 |
+
}
|
pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63ce341388d47769a2497a4f39f7008598ffbb18bc28e1230e82e34a56f8f384
|
| 3 |
+
size 1262999831
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|"}
|
vocab.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"์ผ": 0, "์ผ": 1, "๋ถ": 2, "์": 3, "๋": 4, "๋ฌ": 5, "๋": 6, "๋ณธ": 7, "์ ": 8, "์": 9, "์ด": 10, "๋": 11, "๋": 12, "๊ฒจ": 13, "๊ทผ": 14, "์ค": 15, "์ฑ": 16, "๋": 17, "๋ฅ": 18, "๋ค": 19, "์ฐ": 20, "์": 21, "์ ": 22, "๋ด": 23, "๊ณผ": 24, "์": 25, "๋น": 26, "ํ": 27, "์": 28, "์": 29, "๊ฒฝ": 30, "์
": 31, "ํ": 32, "๊ฐ": 33, "1": 34, "๊ฐ": 35, "๋ญ": 36, "์": 37, "์น": 38, "๋ฐ": 39, "์ง": 40, "์ฐฝ": 41, "๊ฒ": 42, "ํ": 43, "๋ค": 44, "ํ": 45, "๋ฅด": 46, "๋ฌด": 47, "๊ธ": 48, "๊ธ": 49, "ํ": 50, "๊ฒ": 51, "ํ": 52, "์ฅ": 53, "๋ ": 54, "๋ฒ": 55, "์": 56, "๋": 57, "๊ฒ ": 58, "๊ทธ": 59, "์": 60, "๊ต": 61, "ํ
": 62, "์": 63, "๋ง": 64, "์จ": 65, "์": 66, "๋ผ": 67, "๋ก ": 68, "๋ฆฌ": 69, "๋ฉฐ": 70, "์ฉ": 71, "์": 72, "๋ฌ": 73, "๊ตฌ": 74, "๋": 75, "์ฌ": 76, "์ถ": 77, "๊ฐ": 78, "๋ธ": 79, "๋ฆ": 80, "๊ฐ": 81, "์ซ": 82, ".": 83, "๋ฏ": 84, "์": 85, "๊ฐ": 86, "๋ง": 87, "๋ฃ": 88, "๋น": 89, "์": 90, "๋ป": 91, "์ด": 92, "๊ฒฉ": 93, "์": 94, "์ฒ": 95, "๋ก": 96, "๋ง": 97, "์": 98, "๋": 99, "ํธ": 100, "๋ฌผ": 101, "ํ": 102, "์ง": 103, "ํฅ": 104, "๋ง": 105, "์": 106, "๊ฐ": 107, "์ฌ": 108, "๋ ค": 109, "๋ฅผ": 110, "๊ฐ": 111, "๋ฐ": 112, "๋ฉด": 113, "์ญ": 114, "์ด": 115, "๊ฐ": 116, "๋ค": 117, "ํน": 118, "๋ฐ": 119, "ํฌ": 120, "๊ธธ": 121, "ํ": 122, "๋ ": 123, "๋ฅ": 124, "์ง": 125, "์ง": 126, "๋": 127, "๋จ": 128, "ํฌ": 129, "์ต": 130, "ํ": 131, "๋": 132, "ํด": 133, "๋ฐฉ": 134, "์": 135, "์ข": 136, "๋": 137, "์ต": 138, "๊ตญ": 139, "ํ ": 140, "๋ฆฐ": 141, "๋ก": 142, "์ปด": 143, "์ต": 144, "๋ ": 145, "๋ง": 146, "์": 147, "์ผ": 148, "์ค": 149, "ํ": 150, "์ฃ ": 151, "๋ฐ": 152, "๋": 153, "๋ค": 154, "์": 155, "์": 156, "์ ธ": 157, "์": 158, "๊น": 159, "๋ญ": 160, "์ฐ": 161, "์ ": 162, "์ ": 163, "๋ฐ": 164, "ํ": 165, "๊ฑธ": 166, "์ข": 167, "๊ถ": 168, "๋": 170, "์ง": 171, "์ธ": 172, "๊ฟ": 173, "๋ฐ": 174, "์": 175, "์ค": 176, "ํ": 177, "๋ง": 178, "ํ": 179, "๋ชจ": 180, "ํ": 181, "๋": 182, "๋": 183, "์นด": 184, "์": 185, "์น": 186, "๋ฆผ": 187, "์ด": 188, "๋ฏธ": 189, "๋": 190, "์ฐจ": 191, "๋": 192, "๊ฒ": 193, "์ด": 194, "์ก": 195, "์ฐ": 196, "๋": 197, "์จ": 198, "ํญ": 199, "๊ณ ": 200, "์ธ": 201, "๋ณด": 202, "์": 203, "์ถฉ": 204, "์": 205, "๊ฑฐ": 206, "๋": 207, "๋ญ": 208, "๋ฝ": 209, "๋": 210, "๋ง": 211, "๊ธฐ": 212, "์ฌ": 213, "ํ": 214, "์": 215, "์ผ": 216, "๋": 217, "๋": 218, "๋": 219, "์ฃผ": 220, "์ก": 221, "ํ": 222, "๊ฑด": 223, "๋": 224, "์ค": 225, "๋ฅธ": 226, "์ ": 227, "๊ฒฐ": 228, "์ ": 229, "๋": 230, "๋ชป": 231, "๋ถ": 232, "์": 233, "๋": 234, "์จ": 235, "์": 236, "์ ": 237, "๊พธ": 238, "๋ผ": 239, "๋ผ": 240, "์ผ": 241, "๋ถ": 242, "์ถ": 243, "๋ฒ": 244, "์ฝ": 245, "๋": 246, "์
": 247, "๋น": 248, "๋ฐฐ": 249, "๋ฌธ": 250, "ํ": 251, "๋": 252, "์ ": 253, "ํ": 254, "์": 255, "๋
": 256, "์ผ": 257, "|": 169, "[UNK]": 258, "[PAD]": 259}
|