diff --git a/.gitattributes b/.gitattributes index f6dfb293f4e0b73a73f2977336f9c5f88ace02a0..817b0bb808fb71885b7021a0eb80f059b32f519d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -66,3 +66,5 @@ no/samples/Arnulf[[:space:]]Overland[[:space:]]-[[:space:]]08_05_07[[:space:]]-[ no/samples/Arnulf[[:space:]]Overland[[:space:]]-[[:space:]]12_05_07[[:space:]]-[[:space:]]Male.wav filter=lfs diff=lfs merge=lfs -text no/samples/Arnulf[[:space:]]Overland[[:space:]]-[[:space:]]13_05_04[[:space:]]-[[:space:]]Female.wav filter=lfs diff=lfs merge=lfs -text no/samples/Ibsens[[:space:]]Ripsbaerbursker.wav filter=lfs diff=lfs merge=lfs -text +en/onnx/chatterbox_ONNX/llama3.data filter=lfs diff=lfs merge=lfs -text +en/onnx/chatterbox-onnx/onnx/language_model.onnx_data filter=lfs diff=lfs merge=lfs -text diff --git a/en/onnx/chatterbox-onnx/.gitattributes b/en/onnx/chatterbox-onnx/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..f1853612ebd8bf5d05fab33490c4b00a626c3d8b --- /dev/null +++ b/en/onnx/chatterbox-onnx/.gitattributes @@ -0,0 +1,38 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +llama3.onnx.data filter=lfs diff=lfs merge=lfs -text +llama3.data filter=lfs diff=lfs merge=lfs -text +onnx/language_model.onnx_data filter=lfs diff=lfs merge=lfs -text diff --git a/en/onnx/chatterbox-onnx/conditional_decoder.onnx b/en/onnx/chatterbox-onnx/conditional_decoder.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0d97b030f63cfcc34770021a8f6dc3d3918dc64f --- /dev/null +++ b/en/onnx/chatterbox-onnx/conditional_decoder.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcac4d92bed1fc493ed9615c2c951885f052156daae18680e2685bae6b18ddf8 +size 286352231 diff --git a/en/onnx/chatterbox-onnx/conds.pt b/en/onnx/chatterbox-onnx/conds.pt new file mode 100644 index 0000000000000000000000000000000000000000..e13b43d1ce809473454627428ff413ebfc7e8660 --- /dev/null +++ b/en/onnx/chatterbox-onnx/conds.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6552d70568833628ba019c6b03459e77fe71ca197d5c560cef9411bee9d87f4e +size 107374 diff --git a/en/onnx/chatterbox-onnx/flow_inference.onnx b/en/onnx/chatterbox-onnx/flow_inference.onnx new file mode 100644 index 0000000000000000000000000000000000000000..0090926e9e1fc921fa713af0dc1816772484c0da --- /dev/null +++ b/en/onnx/chatterbox-onnx/flow_inference.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de5f4a45e8e89788956e1374cdf67f561019d40480bab3942513e6b81259d0f4 +size 185816292 diff --git a/en/onnx/chatterbox-onnx/hift_generator.onnx b/en/onnx/chatterbox-onnx/hift_generator.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ca0acdefd57aae7b7ea2f1ef78a97c8c8e0c4874 --- /dev/null +++ b/en/onnx/chatterbox-onnx/hift_generator.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c186efb97d260555584f00baa97c6e009d0d0335a60b40f2fc15ffaa68527ee +size 70304704 diff --git a/en/onnx/chatterbox-onnx/llama_with_past.onnx b/en/onnx/chatterbox-onnx/llama_with_past.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9cdfa9aca6fa48fcd752435056b0c4fa0e0716ea --- /dev/null +++ b/en/onnx/chatterbox-onnx/llama_with_past.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c96957a58b7ebf1a5bd5322949637dd6e040f1377a0686b5bef1fcd1e176016b +size 2047725936 diff --git a/en/onnx/chatterbox-onnx/onnx/language_model.onnx b/en/onnx/chatterbox-onnx/onnx/language_model.onnx new file mode 100644 index 0000000000000000000000000000000000000000..ab712681052ced427408f3ef07d189714da549c1 --- /dev/null +++ b/en/onnx/chatterbox-onnx/onnx/language_model.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e2f908a1062e3ff40e11d9d59c2580af4407ae9734186901afc43942fc67b19 +size 228474 diff --git a/en/onnx/chatterbox-onnx/onnx/language_model.onnx_data b/en/onnx/chatterbox-onnx/onnx/language_model.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..a6ab26f2981ac56186ea73a59029f29ba3ecdf36 --- /dev/null +++ b/en/onnx/chatterbox-onnx/onnx/language_model.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6a91e4d9d3d8eb948913f18bca4b08d2b40fca553a33181c03d80d84c090c63 +size 2080632832 diff --git a/en/onnx/chatterbox-onnx/source.txt b/en/onnx/chatterbox-onnx/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..026f55b57c55ee42e5b4f7286e87fadb23468e89 --- /dev/null +++ b/en/onnx/chatterbox-onnx/source.txt @@ -0,0 +1 @@ +https://huggingface.co/onnx-community/chatterbox-onnx \ No newline at end of file diff --git a/en/onnx/chatterbox-onnx/speech_embedding.onnx b/en/onnx/chatterbox-onnx/speech_embedding.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b122c714bfa2b45de6f00e20a0c6f57dce8cb07e --- /dev/null +++ b/en/onnx/chatterbox-onnx/speech_embedding.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0c6935a267bbdada1b95b028659f0c582a9797901b9290d6cf9ca940f185008 +size 50357075 diff --git a/en/onnx/chatterbox-onnx/speech_encoder.onnx b/en/onnx/chatterbox-onnx/speech_encoder.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4214a6cfd4a351a4e82e4cd2b3e7fdef820ce911 --- /dev/null +++ b/en/onnx/chatterbox-onnx/speech_encoder.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffd24809a8d265e7237a7d3e60104c38cd46eefa1621e7224583401d26d15388 +size 79678497 diff --git a/en/onnx/chatterbox-onnx/stft_wrapper.onnx b/en/onnx/chatterbox-onnx/stft_wrapper.onnx new file mode 100644 index 0000000000000000000000000000000000000000..516e8c2d62cd405fe0db7225c656884d5e4f89e0 --- /dev/null +++ b/en/onnx/chatterbox-onnx/stft_wrapper.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73cd386decc96dc65e2e43a6ad2b603d6792f3aae11bb9ccacbd3f294b5a0e34 +size 13186298 diff --git a/en/onnx/chatterbox-onnx/tokenizer.json b/en/onnx/chatterbox-onnx/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..abd07c710243ba89bf1b21780e7c37ddde92334e --- /dev/null +++ b/en/onnx/chatterbox-onnx/tokenizer.json @@ -0,0 +1,1435 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "special": true, + "content": "[STOP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 1, + "special": true, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 2, + "special": true, + "content": "[SPACE]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 255, + "special": true, + "content": "[START]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 604, + "content": "[UH]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 605, + "content": "[UM]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 606, + "content": "[giggle]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 607, + "content": "[laughter]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 608, + "content": "[guffaw]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 609, + "content": "[inhale]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 610, + "content": "[exhale]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 611, + "content": "[sigh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 612, + "content": "[cry]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 613, + "content": "[bark]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 614, + "content": "[howl]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 615, + "content": "[meow]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 616, + "content": "[singing]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 617, + "content": "[music]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 618, + "content": "[whistle]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 619, + "content": "[humming]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 620, + "content": "[gasp]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 621, + "content": "[groan]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 622, + "content": "[whisper]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 623, + "content": "[mumble]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 624, + "content": "[sniff]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 625, + "content": "[sneeze]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 626, + "content": "[cough]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 627, + "content": "[snore]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 628, + "content": "[chew]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 629, + "content": "[sip]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 630, + "content": "[clear_throat]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 631, + "content": "[kiss]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 632, + "content": "[shhh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 633, + "content": "[gibberish]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 634, + "content": "[fr]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 635, + "content": "[es]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 636, + "content": "[de]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 637, + "content": "[it]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 638, + "content": "[ipa]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 639, + "content": "[end_of_label]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 695, + "content": "[PLACEHOLDER55]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 696, + "content": "[PLACEHOLDER56]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 697, + "content": "[PLACEHOLDER57]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 698, + "content": "[PLACEHOLDER58]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 699, + "content": "[PLACEHOLDER59]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 700, + "content": "[PLACEHOLDER60]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 701, + "content": "[PLACEHOLDER61]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 702, + "content": "[PLACEHOLDER62]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 703, + "content": "[PLACEHOLDER63]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": null, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "vocab": { + "[STOP]": 0, + "[UNK]": 1, + "[SPACE]": 2, + "!": 3, + "'": 4, + "(": 5, + ")": 6, + ",": 7, + "-": 8, + ".": 9, + "/": 10, + ":": 11, + ";": 12, + "?": 13, + "a": 14, + "b": 15, + "c": 16, + "d": 17, + "e": 18, + "f": 19, + "g": 20, + "h": 21, + "i": 22, + "j": 23, + "k": 24, + "l": 25, + "m": 26, + "n": 27, + "o": 28, + "p": 29, + "q": 30, + "r": 31, + "s": 32, + "t": 33, + "u": 34, + "v": 35, + "w": 36, + "x": 37, + "y": 38, + "z": 39, + "th": 40, + "in": 41, + "the": 42, + "an": 43, + "er": 44, + "ou": 45, + "re": 46, + "on": 47, + "at": 48, + "ed": 49, + "en": 50, + "to": 51, + "ing": 52, + "and": 53, + "is": 54, + "as": 55, + "al": 56, + "or": 57, + "of": 58, + "ar": 59, + "it": 60, + "es": 61, + "he": 62, + "st": 63, + "le": 64, + "om": 65, + "se": 66, + "be": 67, + "ad": 68, + "ow": 69, + "ly": 70, + "ch": 71, + "wh": 72, + "that": 73, + "you": 74, + "li": 75, + "ve": 76, + "ac": 77, + "ti": 78, + "ld": 79, + "me": 80, + "was": 81, + "gh": 82, + "id": 83, + "ll": 84, + "wi": 85, + "ent": 86, + "for": 87, + "ay": 88, + "ro": 89, + "ver": 90, + "ic": 91, + "her": 92, + "ke": 93, + "his": 94, + "no": 95, + "ut": 96, + "un": 97, + "ir": 98, + "lo": 99, + "we": 100, + "ri": 101, + "ha": 102, + "with": 103, + "ght": 104, + "out": 105, + "im": 106, + "ion": 107, + "all": 108, + "ab": 109, + "one": 110, + "ne": 111, + "ge": 112, + "ould": 113, + "ter": 114, + "mo": 115, + "had": 116, + "ce": 117, + "she": 118, + "go": 119, + "sh": 120, + "ur": 121, + "am": 122, + "so": 123, + "pe": 124, + "my": 125, + "de": 126, + "are": 127, + "but": 128, + "ome": 129, + "fr": 130, + "ther": 131, + "fe": 132, + "su": 133, + "do": 134, + "con": 135, + "te": 136, + "ain": 137, + "ere": 138, + "po": 139, + "if": 140, + "they": 141, + "us": 142, + "ag": 143, + "tr": 144, + "now": 145, + "oun": 146, + "this": 147, + "have": 148, + "not": 149, + "sa": 150, + "il": 151, + "up": 152, + "thing": 153, + "from": 154, + "ap": 155, + "him": 156, + "ack": 157, + "ation": 158, + "ant": 159, + "our": 160, + "op": 161, + "like": 162, + "ust": 163, + "ess": 164, + "bo": 165, + "ok": 166, + "ul": 167, + "ind": 168, + "ex": 169, + "com": 170, + "some": 171, + "there": 172, + "ers": 173, + "co": 174, + "res": 175, + "man": 176, + "ard": 177, + "pl": 178, + "wor": 179, + "way": 180, + "tion": 181, + "fo": 182, + "ca": 183, + "were": 184, + "by": 185, + "ate": 186, + "pro": 187, + "ted": 188, + "ound": 189, + "own": 190, + "would": 191, + "ts": 192, + "what": 193, + "qu": 194, + "ally": 195, + "ight": 196, + "ck": 197, + "gr": 198, + "when": 199, + "ven": 200, + "can": 201, + "ough": 202, + "ine": 203, + "end": 204, + "per": 205, + "ous": 206, + "od": 207, + "ide": 208, + "know": 209, + "ty": 210, + "very": 211, + "si": 212, + "ak": 213, + "who": 214, + "about": 215, + "ill": 216, + "them": 217, + "est": 218, + "red": 219, + "ye": 220, + "could": 221, + "ong": 222, + "your": 223, + "their": 224, + "em": 225, + "just": 226, + "other": 227, + "into": 228, + "any": 229, + "whi": 230, + "um": 231, + "tw": 232, + "ast": 233, + "der": 234, + "did": 235, + "ie": 236, + "been": 237, + "ace": 238, + "ink": 239, + "ity": 240, + "back": 241, + "ting": 242, + "br": 243, + "more": 244, + "ake": 245, + "pp": 246, + "then": 247, + "sp": 248, + "el": 249, + "use": 250, + "bl": 251, + "said": 252, + "over": 253, + "get": 254, + "[START]": 255, + "\"": 256, + "#": 257, + "$": 258, + "%": 259, + "&": 260, + "*": 261, + "+": 262, + "0": 263, + "1": 264, + "2": 265, + "3": 266, + "4": 267, + "5": 268, + "6": 269, + "7": 270, + "8": 271, + "9": 272, + "<": 273, + "=": 274, + ">": 275, + "@": 276, + "A": 277, + "B": 278, + "C": 279, + "D": 280, + "E": 281, + "F": 282, + "G": 283, + "H": 284, + "I": 285, + "J": 286, + "K": 287, + "L": 288, + "M": 289, + "N": 290, + "O": 291, + "P": 292, + "Q": 293, + "R": 294, + "S": 295, + "T": 296, + "U": 297, + "V": 298, + "W": 299, + "X": 300, + "Y": 301, + "Z": 302, + "[": 303, + "\\": 304, + "]": 305, + "^": 306, + "_": 307, + "`": 308, + "{": 309, + "|": 310, + "}": 311, + "~": 312, + "‐": 313, + "‑": 314, + "‒": 315, + "–": 316, + "—": 317, + "―": 318, + "‖": 319, + "‗": 320, + "‘": 321, + "’": 322, + "‚": 323, + "‛": 324, + "“": 325, + "”": 326, + "„": 327, + "‟": 328, + " ": 329, + "¡": 330, + "¢": 331, + "£": 332, + "¤": 333, + "¥": 334, + "¦": 335, + "§": 336, + "¨": 337, + "©": 338, + "ª": 339, + "«": 340, + "¬": 341, + "­": 342, + "®": 343, + "¯": 344, + "°": 345, + "±": 346, + "²": 347, + "³": 348, + "´": 349, + "µ": 350, + "¶": 351, + "·": 352, + "¸": 353, + "¹": 354, + "º": 355, + "»": 356, + "¼": 357, + "½": 358, + "¾": 359, + "¿": 360, + "À": 361, + "Á": 362, + "Â": 363, + "Ã": 364, + "Ä": 365, + "Å": 366, + "Æ": 367, + "Ç": 368, + "È": 369, + "É": 370, + "Ê": 371, + "Ë": 372, + "Ì": 373, + "Í": 374, + "Î": 375, + "Ï": 376, + "Ð": 377, + "Ñ": 378, + "Ò": 379, + "Ó": 380, + "Ô": 381, + "Õ": 382, + "Ö": 383, + "×": 384, + "Ø": 385, + "Ù": 386, + "Ú": 387, + "Û": 388, + "Ü": 389, + "Ý": 390, + "Þ": 391, + "ß": 392, + "à": 393, + "á": 394, + "â": 395, + "ã": 396, + "ä": 397, + "å": 398, + "æ": 399, + "ç": 400, + "è": 401, + "é": 402, + "ê": 403, + "ë": 404, + "ì": 405, + "í": 406, + "î": 407, + "ï": 408, + "ð": 409, + "ñ": 410, + "ò": 411, + "ó": 412, + "ô": 413, + "õ": 414, + "ö": 415, + "÷": 416, + "ø": 417, + "ù": 418, + "ú": 419, + "û": 420, + "ü": 421, + "ý": 422, + "þ": 423, + "ÿ": 424, + "ɐ": 425, + "ɑ": 426, + "ɒ": 427, + "ɓ": 428, + "ɔ": 429, + "ɕ": 430, + "ɖ": 431, + "ɗ": 432, + "ɘ": 433, + "ə": 434, + "ɚ": 435, + "ɛ": 436, + "ɜ": 437, + "ɝ": 438, + "ɞ": 439, + "ɟ": 440, + "ɠ": 441, + "ɡ": 442, + "ɢ": 443, + "ɣ": 444, + "ɤ": 445, + "ɥ": 446, + "ɦ": 447, + "ɧ": 448, + "ɨ": 449, + "ɩ": 450, + "ɪ": 451, + "ɫ": 452, + "ɬ": 453, + "ɭ": 454, + "ɮ": 455, + "ɯ": 456, + "ɰ": 457, + "ɱ": 458, + "ɲ": 459, + "ɳ": 460, + "ɴ": 461, + "ɵ": 462, + "ɶ": 463, + "ɷ": 464, + "ɸ": 465, + "ɹ": 466, + "ɺ": 467, + "ɻ": 468, + "ɼ": 469, + "ɽ": 470, + "ɾ": 471, + "ɿ": 472, + "ʀ": 473, + "ʁ": 474, + "ʂ": 475, + "ʃ": 476, + "ʄ": 477, + "ʅ": 478, + "ʆ": 479, + "ʇ": 480, + "ʈ": 481, + "ʉ": 482, + "ʊ": 483, + "ʋ": 484, + "ʌ": 485, + "ʍ": 486, + "ʎ": 487, + "ʏ": 488, + "ʐ": 489, + "ʑ": 490, + "ʒ": 491, + "ʓ": 492, + "ʔ": 493, + "ʕ": 494, + "ʖ": 495, + "ʗ": 496, + "ʘ": 497, + "ʙ": 498, + "ʚ": 499, + "ʛ": 500, + "ʜ": 501, + "ʝ": 502, + "ʞ": 503, + "ʟ": 504, + "ʠ": 505, + "ʡ": 506, + "ʢ": 507, + "ʣ": 508, + "ʤ": 509, + "ʥ": 510, + "ʦ": 511, + "ʧ": 512, + "ʨ": 513, + "ʩ": 514, + "ʪ": 515, + "ʫ": 516, + "ʬ": 517, + "ʭ": 518, + "ʮ": 519, + "ʯ": 520, + "ʰ": 521, + "ʱ": 522, + "ʲ": 523, + "ʳ": 524, + "ʴ": 525, + "ʵ": 526, + "ʶ": 527, + "ʷ": 528, + "ʸ": 529, + "ʹ": 530, + "ʺ": 531, + "ʻ": 532, + "ʼ": 533, + "ʽ": 534, + "ʾ": 535, + "ʿ": 536, + "ˀ": 537, + "ˁ": 538, + "˂": 539, + "˃": 540, + "˄": 541, + "˅": 542, + "ˆ": 543, + "ˇ": 544, + "ˈ": 545, + "ˉ": 546, + "ˊ": 547, + "ˋ": 548, + "ˌ": 549, + "ˍ": 550, + "ˎ": 551, + "ˏ": 552, + "ː": 553, + "ˑ": 554, + "˒": 555, + "˓": 556, + "˔": 557, + "˕": 558, + "˖": 559, + "˗": 560, + "˘": 561, + "˙": 562, + "˚": 563, + "˛": 564, + "˜": 565, + "˝": 566, + "˞": 567, + "˟": 568, + "ˠ": 569, + "ˡ": 570, + "ˢ": 571, + "ˣ": 572, + "ˤ": 573, + "˥": 574, + "˦": 575, + "˧": 576, + "˨": 577, + "˩": 578, + "˪": 579, + "˫": 580, + "ˬ": 581, + "˭": 582, + "ˮ": 583, + "˯": 584, + "˰": 585, + "˱": 586, + "˲": 587, + "˳": 588, + "˴": 589, + "˵": 590, + "˶": 591, + "˷": 592, + "˸": 593, + "˹": 594, + "˺": 595, + "˻": 596, + "˼": 597, + "˽": 598, + "˾": 599, + "˿": 600, + "ā": 601, + "ō": 602, + "…": 603, + "[UH]": 604, + "[UM]": 605, + "[giggle]": 606, + "[laughter]": 607, + "[guffaw]": 608, + "[inhale]": 609, + "[exhale]": 610, + "[sigh]": 611, + "[cry]": 612, + "[bark]": 613, + "[howl]": 614, + "[meow]": 615, + "[singing]": 616, + "[music]": 617, + "[whistle]": 618, + "[humming]": 619, + "[gasp]": 620, + "[groan]": 621, + "[whisper]": 622, + "[mumble]": 623, + "[sniff]": 624, + "[sneeze]": 625, + "[cough]": 626, + "[snore]": 627, + "[chew]": 628, + "[sip]": 629, + "[clear_throat]": 630, + "[kiss]": 631, + "[shhh]": 632, + "[gibberish]": 633, + "[fr]": 634, + "[es]": 635, + "[de]": 636, + "[it]": 637, + "[ipa]": 638, + "[end_of_label]": 639, + "ŋ": 640, + "ᵻ": 641, + "θ": 642, + "̩": 643, + "\u0303": 644, + "ɑː": 645, + "iː": 646, + "uː": 647, + "ɜː": 648, + "ɔː": 649, + "oː": 650, + "eɪ": 651, + "oʊ": 652, + "aɪ": 653, + "aʊ": 654, + "ɔɪ": 655, + "dʒ": 656, + "tʃ": 657, + "ɪŋ": 658, + "ᵻd": 659, + "ˈiː": 660, + "ˌiː": 661, + "ˈɪ": 662, + "ˌɪ": 663, + "ˈeɪ": 664, + "ˌeɪ": 665, + "ˈɛ": 666, + "ˌɛ": 667, + "ˈæ": 668, + "ˌæ": 669, + "ˈɑː": 670, + "ˌɑː": 671, + "ˈɔː": 672, + "ˌɔː": 673, + "oːɹ": 674, + "ˈoːɹ": 675, + "ˌoːɹ": 676, + "ˈoʊ": 677, + "ˌoʊ": 678, + "ˈʊ": 679, + "ˌʊ": 680, + "ˈuː": 681, + "ˌuː": 682, + "ˈɜː": 683, + "ˌɜː": 684, + "ˈʌ": 685, + "ˌʌ": 686, + "ˈaɪ": 687, + "ˌaɪ": 688, + "ˈaʊ": 689, + "ˌaʊ": 690, + "ˈɔɪ": 691, + "ˌɔɪ": 692, + "ˈɚ": 693, + "ˌɐ": 694, + "[PLACEHOLDER55]": 695, + "[PLACEHOLDER56]": 696, + "[PLACEHOLDER57]": 697, + "[PLACEHOLDER58]": 698, + "[PLACEHOLDER59]": 699, + "[PLACEHOLDER60]": 700, + "[PLACEHOLDER61]": 701, + "[PLACEHOLDER62]": 702, + "[PLACEHOLDER63]": 703 + }, + "merges": [ + "t h", + "i n", + "th e", + "a n", + "e r", + "o u", + "r e", + "o n", + "a t", + "e d", + "e n", + "t o", + "in g", + "an d", + "i s", + "a s", + "a l", + "o r", + "o f", + "a r", + "i t", + "e s", + "h e", + "s t", + "l e", + "o m", + "s e", + "b e", + "a d", + "o w", + "l y", + "c h", + "w h", + "th at", + "y ou", + "l i", + "v e", + "a c", + "t i", + "l d", + "m e", + "w as", + "g h", + "i d", + "l l", + "w i", + "en t", + "f or", + "a y", + "r o", + "v er", + "i c", + "h er", + "k e", + "h is", + "n o", + "u t", + "u n", + "i r", + "l o", + "w e", + "r i", + "h a", + "wi th", + "gh t", + "ou t", + "i m", + "i on", + "al l", + "a b", + "on e", + "n e", + "g e", + "ou ld", + "t er", + "m o", + "h ad", + "c e", + "s he", + "g o", + "s h", + "u r", + "a m", + "s o", + "p e", + "m y", + "d e", + "a re", + "b ut", + "om e", + "f r", + "the r", + "f e", + "s u", + "d o", + "c on", + "t e", + "a in", + "er e", + "p o", + "i f", + "the y", + "u s", + "a g", + "t r", + "n ow", + "ou n", + "th is", + "ha ve", + "no t", + "s a", + "i l", + "u p", + "th ing", + "fr om", + "a p", + "h im", + "ac k", + "at ion", + "an t", + "ou r", + "o p", + "li ke", + "u st", + "es s", + "b o", + "o k", + "u l", + "in d", + "e x", + "c om", + "s ome", + "the re", + "er s", + "c o", + "re s", + "m an", + "ar d", + "p l", + "w or", + "w ay", + "ti on", + "f o", + "c a", + "w ere", + "b y", + "at e", + "p ro", + "t ed", + "oun d", + "ow n", + "w ould", + "t s", + "wh at", + "q u", + "al ly", + "i ght", + "c k", + "g r", + "wh en", + "v en", + "c an", + "ou gh", + "in e", + "en d", + "p er", + "ou s", + "o d", + "id e", + "k now", + "t y", + "ver y", + "s i", + "a k", + "wh o", + "ab out", + "i ll", + "the m", + "es t", + "re d", + "y e", + "c ould", + "on g", + "you r", + "the ir", + "e m", + "j ust", + "o ther", + "in to", + "an y", + "wh i", + "u m", + "t w", + "as t", + "d er", + "d id", + "i e", + "be en", + "ac e", + "in k", + "it y", + "b ack", + "t ing", + "b r", + "mo re", + "a ke", + "p p", + "the n", + "s p", + "e l", + "u se", + "b l", + "sa id", + "o ver", + "ge t", + "ɑ ː", + "i ː", + "u ː", + "ɜ ː", + "ɔ ː", + "o ː", + "e ɪ", + "o ʊ", + "a ɪ", + "a ʊ", + "ɔ ɪ", + "d ʒ", + "t ʃ", + "ɪ ŋ", + "ᵻ d", + "ˈ iː", + "ˌ iː", + "ˈ ɪ", + "ˌ ɪ", + "ˈ eɪ", + "ˌ eɪ", + "ˈ ɛ", + "ˌ ɛ", + "ˈ æ", + "ˌ æ", + "ˈ ɑː", + "ˌ ɑː", + "ˈ ɔː", + "ˌ ɔː", + "oː ɹ", + "ˈ oːɹ", + "ˌ oːɹ", + "ˈ oʊ", + "ˌ oʊ", + "ˈ ʊ", + "ˌ ʊ", + "ˈ uː", + "ˌ uː", + "ˈ ɜː", + "ˌ ɜː", + "ˈ ʌ", + "ˌ ʌ", + "ˈ aɪ", + "ˌ aɪ", + "ˈ aʊ", + "ˌ aʊ", + "ˈ ɔɪ", + "ˌ ɔɪ", + "ˈ ɚ", + "ˌ ɐ" + ] + } +} \ No newline at end of file diff --git a/en/onnx/chatterbox-onnx/tokenizer_config.json b/en/onnx/chatterbox-onnx/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cb9ec25536e44d86778b10509d3e5bdca459a5cf --- /dev/null +++ b/en/onnx/chatterbox-onnx/tokenizer_config.json @@ -0,0 +1,2061 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/en/onnx/chatterbox-onnx/vc_tokenizer_weights.pth b/en/onnx/chatterbox-onnx/vc_tokenizer_weights.pth new file mode 100644 index 0000000000000000000000000000000000000000..60648100a00929016675b0e5f57487423b3c9036 --- /dev/null +++ b/en/onnx/chatterbox-onnx/vc_tokenizer_weights.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a1ded72ff76cd97a688fad7506098fe247806b9922d0d8a0923f2d827532de6 +size 495000154 diff --git a/en/onnx/chatterbox_ONNX/.gitattributes b/en/onnx/chatterbox_ONNX/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..162cd85d7cceecdd2fef9e92df020760e1a80d03 --- /dev/null +++ b/en/onnx/chatterbox_ONNX/.gitattributes @@ -0,0 +1,37 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +llama3.onnx.data filter=lfs diff=lfs merge=lfs -text +llama3.data filter=lfs diff=lfs merge=lfs -text diff --git a/en/onnx/chatterbox_ONNX/conditional_decoder.onnx b/en/onnx/chatterbox_ONNX/conditional_decoder.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c0a97415de69cfa6b6e17dd345cd09965a1b1d09 --- /dev/null +++ b/en/onnx/chatterbox_ONNX/conditional_decoder.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fba02c957ad02eacc409f1fd85b9f6815f3a15b99385a8e94e101645afa390f4 +size 294921432 diff --git a/en/onnx/chatterbox_ONNX/flow_inference.onnx b/en/onnx/chatterbox_ONNX/flow_inference.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c72bc0ff6ce1b51ca2f15252ff77dda88bde64d6 --- /dev/null +++ b/en/onnx/chatterbox_ONNX/flow_inference.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0052bc19f6d844f0f793a8010433f1df829d350b720b04700b86a52edccecf +size 185917375 diff --git a/en/onnx/chatterbox_ONNX/llama3.data b/en/onnx/chatterbox_ONNX/llama3.data new file mode 100644 index 0000000000000000000000000000000000000000..5a44ce8f18c1ab56646e1cc2f793c26afd43afb2 --- /dev/null +++ b/en/onnx/chatterbox_ONNX/llama3.data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65a763b2501b0022b6405ddbd3fd1a0ee36c4b58731199e035d55efdb3424bad +size 2080645120 diff --git a/en/onnx/chatterbox_ONNX/llama3.onnx b/en/onnx/chatterbox_ONNX/llama3.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d43d600360b8c54ecfc35c386b47c196653ffd7b --- /dev/null +++ b/en/onnx/chatterbox_ONNX/llama3.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a9cc8435d74a378709fb44057d1d8a4bfba1d6ce334668d5fd8cfb8e0a14684 +size 222296 diff --git a/en/onnx/chatterbox_ONNX/source.txt b/en/onnx/chatterbox_ONNX/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..70b3e40d7bf6fc78a1904c4f6e9fb418fbed9f35 --- /dev/null +++ b/en/onnx/chatterbox_ONNX/source.txt @@ -0,0 +1 @@ +https://huggingface.co/vladislavbro/chatterbox_ONNX \ No newline at end of file diff --git a/en/onnx/chatterbox_ONNX/speech_encoder.onnx b/en/onnx/chatterbox_ONNX/speech_encoder.onnx new file mode 100644 index 0000000000000000000000000000000000000000..22cff3e616d6d0c37fff606bd581683d4df45722 --- /dev/null +++ b/en/onnx/chatterbox_ONNX/speech_encoder.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b2881465fcc4c4dcb92944d7d89da7262629240a7589090a01fcd016f23254f +size 79677508 diff --git a/en/onnx/chatterbox_ONNX/tokenizer.json b/en/onnx/chatterbox_ONNX/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..8db974d9d0d9cba9b2568601d2c697062ba0b789 --- /dev/null +++ b/en/onnx/chatterbox_ONNX/tokenizer.json @@ -0,0 +1,1435 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "special": true, + "content": "[STOP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 1, + "special": true, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 2, + "special": true, + "content": "[SPACE]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 255, + "special": true, + "content": "[START]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 604, + "content": "[UH]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 605, + "content": "[UM]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 606, + "content": "[giggle]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 607, + "content": "[laughter]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 608, + "content": "[guffaw]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 609, + "content": "[inhale]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 610, + "content": "[exhale]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 611, + "content": "[sigh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 612, + "content": "[cry]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 613, + "content": "[bark]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 614, + "content": "[howl]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 615, + "content": "[meow]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 616, + "content": "[singing]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 617, + "content": "[music]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 618, + "content": "[whistle]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 619, + "content": "[humming]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 620, + "content": "[gasp]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 621, + "content": "[groan]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 622, + "content": "[whisper]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 623, + "content": "[mumble]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 624, + "content": "[sniff]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 625, + "content": "[sneeze]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 626, + "content": "[cough]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 627, + "content": "[snore]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 628, + "content": "[chew]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 629, + "content": "[sip]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 630, + "content": "[clear_throat]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 631, + "content": "[kiss]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 632, + "content": "[shhh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 633, + "content": "[gibberish]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 634, + "content": "[fr]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 635, + "content": "[es]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 636, + "content": "[de]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 637, + "content": "[it]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 638, + "content": "[ipa]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 639, + "content": "[end_of_label]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 695, + "content": "[PLACEHOLDER55]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 696, + "content": "[PLACEHOLDER56]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 697, + "content": "[PLACEHOLDER57]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 698, + "content": "[PLACEHOLDER58]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 699, + "content": "[PLACEHOLDER59]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 700, + "content": "[PLACEHOLDER60]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 701, + "content": "[PLACEHOLDER61]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 702, + "content": "[PLACEHOLDER62]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 703, + "content": "[PLACEHOLDER63]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": null, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "vocab": { + "[STOP]": 0, + "[UNK]": 1, + "[SPACE]": 2, + "!": 3, + "'": 4, + "(": 5, + ")": 6, + ",": 7, + "-": 8, + ".": 9, + "/": 10, + ":": 11, + ";": 12, + "?": 13, + "a": 14, + "b": 15, + "c": 16, + "d": 17, + "e": 18, + "f": 19, + "g": 20, + "h": 21, + "i": 22, + "j": 23, + "k": 24, + "l": 25, + "m": 26, + "n": 27, + "o": 28, + "p": 29, + "q": 30, + "r": 31, + "s": 32, + "t": 33, + "u": 34, + "v": 35, + "w": 36, + "x": 37, + "y": 38, + "z": 39, + "th": 40, + "in": 41, + "the": 42, + "an": 43, + "er": 44, + "ou": 45, + "re": 46, + "on": 47, + "at": 48, + "ed": 49, + "en": 50, + "to": 51, + "ing": 52, + "and": 53, + "is": 54, + "as": 55, + "al": 56, + "or": 57, + "of": 58, + "ar": 59, + "it": 60, + "es": 61, + "he": 62, + "st": 63, + "le": 64, + "om": 65, + "se": 66, + "be": 67, + "ad": 68, + "ow": 69, + "ly": 70, + "ch": 71, + "wh": 72, + "that": 73, + "you": 74, + "li": 75, + "ve": 76, + "ac": 77, + "ti": 78, + "ld": 79, + "me": 80, + "was": 81, + "gh": 82, + "id": 83, + "ll": 84, + "wi": 85, + "ent": 86, + "for": 87, + "ay": 88, + "ro": 89, + "ver": 90, + "ic": 91, + "her": 92, + "ke": 93, + "his": 94, + "no": 95, + "ut": 96, + "un": 97, + "ir": 98, + "lo": 99, + "we": 100, + "ri": 101, + "ha": 102, + "with": 103, + "ght": 104, + "out": 105, + "im": 106, + "ion": 107, + "all": 108, + "ab": 109, + "one": 110, + "ne": 111, + "ge": 112, + "ould": 113, + "ter": 114, + "mo": 115, + "had": 116, + "ce": 117, + "she": 118, + "go": 119, + "sh": 120, + "ur": 121, + "am": 122, + "so": 123, + "pe": 124, + "my": 125, + "de": 126, + "are": 127, + "but": 128, + "ome": 129, + "fr": 130, + "ther": 131, + "fe": 132, + "su": 133, + "do": 134, + "con": 135, + "te": 136, + "ain": 137, + "ere": 138, + "po": 139, + "if": 140, + "they": 141, + "us": 142, + "ag": 143, + "tr": 144, + "now": 145, + "oun": 146, + "this": 147, + "have": 148, + "not": 149, + "sa": 150, + "il": 151, + "up": 152, + "thing": 153, + "from": 154, + "ap": 155, + "him": 156, + "ack": 157, + "ation": 158, + "ant": 159, + "our": 160, + "op": 161, + "like": 162, + "ust": 163, + "ess": 164, + "bo": 165, + "ok": 166, + "ul": 167, + "ind": 168, + "ex": 169, + "com": 170, + "some": 171, + "there": 172, + "ers": 173, + "co": 174, + "res": 175, + "man": 176, + "ard": 177, + "pl": 178, + "wor": 179, + "way": 180, + "tion": 181, + "fo": 182, + "ca": 183, + "were": 184, + "by": 185, + "ate": 186, + "pro": 187, + "ted": 188, + "ound": 189, + "own": 190, + "would": 191, + "ts": 192, + "what": 193, + "qu": 194, + "ally": 195, + "ight": 196, + "ck": 197, + "gr": 198, + "when": 199, + "ven": 200, + "can": 201, + "ough": 202, + "ine": 203, + "end": 204, + "per": 205, + "ous": 206, + "od": 207, + "ide": 208, + "know": 209, + "ty": 210, + "very": 211, + "si": 212, + "ak": 213, + "who": 214, + "about": 215, + "ill": 216, + "them": 217, + "est": 218, + "red": 219, + "ye": 220, + "could": 221, + "ong": 222, + "your": 223, + "their": 224, + "em": 225, + "just": 226, + "other": 227, + "into": 228, + "any": 229, + "whi": 230, + "um": 231, + "tw": 232, + "ast": 233, + "der": 234, + "did": 235, + "ie": 236, + "been": 237, + "ace": 238, + "ink": 239, + "ity": 240, + "back": 241, + "ting": 242, + "br": 243, + "more": 244, + "ake": 245, + "pp": 246, + "then": 247, + "sp": 248, + "el": 249, + "use": 250, + "bl": 251, + "said": 252, + "over": 253, + "get": 254, + "[START]": 255, + "\"": 256, + "#": 257, + "$": 258, + "%": 259, + "&": 260, + "*": 261, + "+": 262, + "0": 263, + "1": 264, + "2": 265, + "3": 266, + "4": 267, + "5": 268, + "6": 269, + "7": 270, + "8": 271, + "9": 272, + "<": 273, + "=": 274, + ">": 275, + "@": 276, + "A": 277, + "B": 278, + "C": 279, + "D": 280, + "E": 281, + "F": 282, + "G": 283, + "H": 284, + "I": 285, + "J": 286, + "K": 287, + "L": 288, + "M": 289, + "N": 290, + "O": 291, + "P": 292, + "Q": 293, + "R": 294, + "S": 295, + "T": 296, + "U": 297, + "V": 298, + "W": 299, + "X": 300, + "Y": 301, + "Z": 302, + "[": 303, + "\\": 304, + "]": 305, + "^": 306, + "_": 307, + "`": 308, + "{": 309, + "|": 310, + "}": 311, + "~": 312, + "‐": 313, + "‑": 314, + "‒": 315, + "–": 316, + "—": 317, + "―": 318, + "‖": 319, + "‗": 320, + "‘": 321, + "’": 322, + "‚": 323, + "‛": 324, + "“": 325, + "”": 326, + "„": 327, + "‟": 328, + " ": 329, + "¡": 330, + "¢": 331, + "£": 332, + "¤": 333, + "¥": 334, + "¦": 335, + "§": 336, + "¨": 337, + "©": 338, + "ª": 339, + "«": 340, + "¬": 341, + "­": 342, + "®": 343, + "¯": 344, + "°": 345, + "±": 346, + "²": 347, + "³": 348, + "´": 349, + "µ": 350, + "¶": 351, + "·": 352, + "¸": 353, + "¹": 354, + "º": 355, + "»": 356, + "¼": 357, + "½": 358, + "¾": 359, + "¿": 360, + "À": 361, + "Á": 362, + "Â": 363, + "Ã": 364, + "Ä": 365, + "Å": 366, + "Æ": 367, + "Ç": 368, + "È": 369, + "É": 370, + "Ê": 371, + "Ë": 372, + "Ì": 373, + "Í": 374, + "Î": 375, + "Ï": 376, + "Ð": 377, + "Ñ": 378, + "Ò": 379, + "Ó": 380, + "Ô": 381, + "Õ": 382, + "Ö": 383, + "×": 384, + "Ø": 385, + "Ù": 386, + "Ú": 387, + "Û": 388, + "Ü": 389, + "Ý": 390, + "Þ": 391, + "ß": 392, + "à": 393, + "á": 394, + "â": 395, + "ã": 396, + "ä": 397, + "å": 398, + "æ": 399, + "ç": 400, + "è": 401, + "é": 402, + "ê": 403, + "ë": 404, + "ì": 405, + "í": 406, + "î": 407, + "ï": 408, + "ð": 409, + "ñ": 410, + "ò": 411, + "ó": 412, + "ô": 413, + "õ": 414, + "ö": 415, + "÷": 416, + "ø": 417, + "ù": 418, + "ú": 419, + "û": 420, + "ü": 421, + "ý": 422, + "þ": 423, + "ÿ": 424, + "ɐ": 425, + "ɑ": 426, + "ɒ": 427, + "ɓ": 428, + "ɔ": 429, + "ɕ": 430, + "ɖ": 431, + "ɗ": 432, + "ɘ": 433, + "ə": 434, + "ɚ": 435, + "ɛ": 436, + "ɜ": 437, + "ɝ": 438, + "ɞ": 439, + "ɟ": 440, + "ɠ": 441, + "ɡ": 442, + "ɢ": 443, + "ɣ": 444, + "ɤ": 445, + "ɥ": 446, + "ɦ": 447, + "ɧ": 448, + "ɨ": 449, + "ɩ": 450, + "ɪ": 451, + "ɫ": 452, + "ɬ": 453, + "ɭ": 454, + "ɮ": 455, + "ɯ": 456, + "ɰ": 457, + "ɱ": 458, + "ɲ": 459, + "ɳ": 460, + "ɴ": 461, + "ɵ": 462, + "ɶ": 463, + "ɷ": 464, + "ɸ": 465, + "ɹ": 466, + "ɺ": 467, + "ɻ": 468, + "ɼ": 469, + "ɽ": 470, + "ɾ": 471, + "ɿ": 472, + "ʀ": 473, + "ʁ": 474, + "ʂ": 475, + "ʃ": 476, + "ʄ": 477, + "ʅ": 478, + "ʆ": 479, + "ʇ": 480, + "ʈ": 481, + "ʉ": 482, + "ʊ": 483, + "ʋ": 484, + "ʌ": 485, + "ʍ": 486, + "ʎ": 487, + "ʏ": 488, + "ʐ": 489, + "ʑ": 490, + "ʒ": 491, + "ʓ": 492, + "ʔ": 493, + "ʕ": 494, + "ʖ": 495, + "ʗ": 496, + "ʘ": 497, + "ʙ": 498, + "ʚ": 499, + "ʛ": 500, + "ʜ": 501, + "ʝ": 502, + "ʞ": 503, + "ʟ": 504, + "ʠ": 505, + "ʡ": 506, + "ʢ": 507, + "ʣ": 508, + "ʤ": 509, + "ʥ": 510, + "ʦ": 511, + "ʧ": 512, + "ʨ": 513, + "ʩ": 514, + "ʪ": 515, + "ʫ": 516, + "ʬ": 517, + "ʭ": 518, + "ʮ": 519, + "ʯ": 520, + "ʰ": 521, + "ʱ": 522, + "ʲ": 523, + "ʳ": 524, + "ʴ": 525, + "ʵ": 526, + "ʶ": 527, + "ʷ": 528, + "ʸ": 529, + "ʹ": 530, + "ʺ": 531, + "ʻ": 532, + "ʼ": 533, + "ʽ": 534, + "ʾ": 535, + "ʿ": 536, + "ˀ": 537, + "ˁ": 538, + "˂": 539, + "˃": 540, + "˄": 541, + "˅": 542, + "ˆ": 543, + "ˇ": 544, + "ˈ": 545, + "ˉ": 546, + "ˊ": 547, + "ˋ": 548, + "ˌ": 549, + "ˍ": 550, + "ˎ": 551, + "ˏ": 552, + "ː": 553, + "ˑ": 554, + "˒": 555, + "˓": 556, + "˔": 557, + "˕": 558, + "˖": 559, + "˗": 560, + "˘": 561, + "˙": 562, + "˚": 563, + "˛": 564, + "˜": 565, + "˝": 566, + "˞": 567, + "˟": 568, + "ˠ": 569, + "ˡ": 570, + "ˢ": 571, + "ˣ": 572, + "ˤ": 573, + "˥": 574, + "˦": 575, + "˧": 576, + "˨": 577, + "˩": 578, + "˪": 579, + "˫": 580, + "ˬ": 581, + "˭": 582, + "ˮ": 583, + "˯": 584, + "˰": 585, + "˱": 586, + "˲": 587, + "˳": 588, + "˴": 589, + "˵": 590, + "˶": 591, + "˷": 592, + "˸": 593, + "˹": 594, + "˺": 595, + "˻": 596, + "˼": 597, + "˽": 598, + "˾": 599, + "˿": 600, + "ā": 601, + "ō": 602, + "…": 603, + "[UH]": 604, + "[UM]": 605, + "[giggle]": 606, + "[laughter]": 607, + "[guffaw]": 608, + "[inhale]": 609, + "[exhale]": 610, + "[sigh]": 611, + "[cry]": 612, + "[bark]": 613, + "[howl]": 614, + "[meow]": 615, + "[singing]": 616, + "[music]": 617, + "[whistle]": 618, + "[humming]": 619, + "[gasp]": 620, + "[groan]": 621, + "[whisper]": 622, + "[mumble]": 623, + "[sniff]": 624, + "[sneeze]": 625, + "[cough]": 626, + "[snore]": 627, + "[chew]": 628, + "[sip]": 629, + "[clear_throat]": 630, + "[kiss]": 631, + "[shhh]": 632, + "[gibberish]": 633, + "[fr]": 634, + "[es]": 635, + "[de]": 636, + "[it]": 637, + "[ipa]": 638, + "[end_of_label]": 639, + "ŋ": 640, + "ᵻ": 641, + "θ": 642, + "̩": 643, + "\u0303": 644, + "ɑː": 645, + "iː": 646, + "uː": 647, + "ɜː": 648, + "ɔː": 649, + "oː": 650, + "eɪ": 651, + "oʊ": 652, + "aɪ": 653, + "aʊ": 654, + "ɔɪ": 655, + "dʒ": 656, + "tʃ": 657, + "ɪŋ": 658, + "ᵻd": 659, + "ˈiː": 660, + "ˌiː": 661, + "ˈɪ": 662, + "ˌɪ": 663, + "ˈeɪ": 664, + "ˌeɪ": 665, + "ˈɛ": 666, + "ˌɛ": 667, + "ˈæ": 668, + "ˌæ": 669, + "ˈɑː": 670, + "ˌɑː": 671, + "ˈɔː": 672, + "ˌɔː": 673, + "oːɹ": 674, + "ˈoːɹ": 675, + "ˌoːɹ": 676, + "ˈoʊ": 677, + "ˌoʊ": 678, + "ˈʊ": 679, + "ˌʊ": 680, + "ˈuː": 681, + "ˌuː": 682, + "ˈɜː": 683, + "ˌɜː": 684, + "ˈʌ": 685, + "ˌʌ": 686, + "ˈaɪ": 687, + "ˌaɪ": 688, + "ˈaʊ": 689, + "ˌaʊ": 690, + "ˈɔɪ": 691, + "ˌɔɪ": 692, + "ˈɚ": 693, + "ˌɐ": 694, + "[PLACEHOLDER55]": 695, + "[PLACEHOLDER56]": 696, + "[PLACEHOLDER57]": 697, + "[PLACEHOLDER58]": 698, + "[PLACEHOLDER59]": 699, + "[PLACEHOLDER60]": 700, + "[PLACEHOLDER61]": 701, + "[PLACEHOLDER62]": 702, + "[PLACEHOLDER63]": 703 + }, + "merges": [ + "t h", + "i n", + "th e", + "a n", + "e r", + "o u", + "r e", + "o n", + "a t", + "e d", + "e n", + "t o", + "in g", + "an d", + "i s", + "a s", + "a l", + "o r", + "o f", + "a r", + "i t", + "e s", + "h e", + "s t", + "l e", + "o m", + "s e", + "b e", + "a d", + "o w", + "l y", + "c h", + "w h", + "th at", + "y ou", + "l i", + "v e", + "a c", + "t i", + "l d", + "m e", + "w as", + "g h", + "i d", + "l l", + "w i", + "en t", + "f or", + "a y", + "r o", + "v er", + "i c", + "h er", + "k e", + "h is", + "n o", + "u t", + "u n", + "i r", + "l o", + "w e", + "r i", + "h a", + "wi th", + "gh t", + "ou t", + "i m", + "i on", + "al l", + "a b", + "on e", + "n e", + "g e", + "ou ld", + "t er", + "m o", + "h ad", + "c e", + "s he", + "g o", + "s h", + "u r", + "a m", + "s o", + "p e", + "m y", + "d e", + "a re", + "b ut", + "om e", + "f r", + "the r", + "f e", + "s u", + "d o", + "c on", + "t e", + "a in", + "er e", + "p o", + "i f", + "the y", + "u s", + "a g", + "t r", + "n ow", + "ou n", + "th is", + "ha ve", + "no t", + "s a", + "i l", + "u p", + "th ing", + "fr om", + "a p", + "h im", + "ac k", + "at ion", + "an t", + "ou r", + "o p", + "li ke", + "u st", + "es s", + "b o", + "o k", + "u l", + "in d", + "e x", + "c om", + "s ome", + "the re", + "er s", + "c o", + "re s", + "m an", + "ar d", + "p l", + "w or", + "w ay", + "ti on", + "f o", + "c a", + "w ere", + "b y", + "at e", + "p ro", + "t ed", + "oun d", + "ow n", + "w ould", + "t s", + "wh at", + "q u", + "al ly", + "i ght", + "c k", + "g r", + "wh en", + "v en", + "c an", + "ou gh", + "in e", + "en d", + "p er", + "ou s", + "o d", + "id e", + "k now", + "t y", + "ver y", + "s i", + "a k", + "wh o", + "ab out", + "i ll", + "the m", + "es t", + "re d", + "y e", + "c ould", + "on g", + "you r", + "the ir", + "e m", + "j ust", + "o ther", + "in to", + "an y", + "wh i", + "u m", + "t w", + "as t", + "d er", + "d id", + "i e", + "be en", + "ac e", + "in k", + "it y", + "b ack", + "t ing", + "b r", + "mo re", + "a ke", + "p p", + "the n", + "s p", + "e l", + "u se", + "b l", + "sa id", + "o ver", + "ge t", + "ɑ ː", + "i ː", + "u ː", + "ɜ ː", + "ɔ ː", + "o ː", + "e ɪ", + "o ʊ", + "a ɪ", + "a ʊ", + "ɔ ɪ", + "d ʒ", + "t ʃ", + "ɪ ŋ", + "ᵻ d", + "ˈ iː", + "ˌ iː", + "ˈ ɪ", + "ˌ ɪ", + "ˈ eɪ", + "ˌ eɪ", + "ˈ ɛ", + "ˌ ɛ", + "ˈ æ", + "ˌ æ", + "ˈ ɑː", + "ˌ ɑː", + "ˈ ɔː", + "ˌ ɔː", + "oː ɹ", + "ˈ oːɹ", + "ˌ oːɹ", + "ˈ oʊ", + "ˌ oʊ", + "ˈ ʊ", + "ˌ ʊ", + "ˈ uː", + "ˌ uː", + "ˈ ɜː", + "ˌ ɜː", + "ˈ ʌ", + "ˌ ʌ", + "ˈ aɪ", + "ˌ aɪ", + "ˈ aʊ", + "ˌ aʊ", + "ˈ ɔɪ", + "ˌ ɔɪ", + "ˈ ɚ", + "ˌ ɐ" + ] + } +} \ No newline at end of file diff --git a/en/onnx/chatterbox_ONNX/tokenizer_config.json b/en/onnx/chatterbox_ONNX/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a425c8438236a2af0fda74dbe75ac86cf45bbe3e --- /dev/null +++ b/en/onnx/chatterbox_ONNX/tokenizer_config.json @@ -0,0 +1,2061 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/multi/expanded_chatterbox_model/.gitattributes b/multi/expanded_chatterbox_model/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/multi/expanded_chatterbox_model/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/multi/expanded_chatterbox_model/conds.pt b/multi/expanded_chatterbox_model/conds.pt new file mode 100644 index 0000000000000000000000000000000000000000..e13b43d1ce809473454627428ff413ebfc7e8660 --- /dev/null +++ b/multi/expanded_chatterbox_model/conds.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6552d70568833628ba019c6b03459e77fe71ca197d5c560cef9411bee9d87f4e +size 107374 diff --git a/multi/expanded_chatterbox_model/s3gen.safetensors b/multi/expanded_chatterbox_model/s3gen.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b752a028b2a1c2843b76e0df9582d8d81d10669d --- /dev/null +++ b/multi/expanded_chatterbox_model/s3gen.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b78103c654207393955e4900aac14a12de8ef25f4b09424f1ef91941f161d4e +size 1056484620 diff --git a/multi/expanded_chatterbox_model/source.txt b/multi/expanded_chatterbox_model/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..e70bd72fd8cb057dd7d0443caab9c8ad9cc33db4 --- /dev/null +++ b/multi/expanded_chatterbox_model/source.txt @@ -0,0 +1 @@ +https://huggingface.co/IIEleven11/expanded_chatterbox_model \ No newline at end of file diff --git a/multi/expanded_chatterbox_model/t3_cfg.safetensors b/multi/expanded_chatterbox_model/t3_cfg.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9d4e34b78de38f2324bdbdd9e36d79b6db90a60b --- /dev/null +++ b/multi/expanded_chatterbox_model/t3_cfg.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39f7c3b0b2cf74ced0beeb1dcf295f41a03c9507bdde8108d17dd4471f8a8f39 +size 2137731056 diff --git a/multi/expanded_chatterbox_model/tokenizer.json b/multi/expanded_chatterbox_model/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..4ba97f4fed282931b70812f4bb75a55f2fb90f1e --- /dev/null +++ b/multi/expanded_chatterbox_model/tokenizer.json @@ -0,0 +1,2458 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "special": true, + "content": "[STOP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 1, + "special": true, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 2, + "special": true, + "content": "[SPACE]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 255, + "special": true, + "content": "[START]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 604, + "content": "[UH]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 605, + "content": "[UM]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 606, + "content": "[giggle]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 607, + "content": "[laughter]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 608, + "content": "[guffaw]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 609, + "content": "[inhale]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 610, + "content": "[exhale]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 611, + "content": "[sigh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 612, + "content": "[cry]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 613, + "content": "[bark]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 614, + "content": "[howl]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 615, + "content": "[meow]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 616, + "content": "[singing]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 617, + "content": "[music]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 618, + "content": "[whistle]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 619, + "content": "[humming]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 620, + "content": "[gasp]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 621, + "content": "[groan]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 622, + "content": "[whisper]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 623, + "content": "[mumble]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 624, + "content": "[sniff]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 625, + "content": "[sneeze]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 626, + "content": "[cough]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 627, + "content": "[snore]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 628, + "content": "[chew]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 629, + "content": "[sip]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 630, + "content": "[clear_throat]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 631, + "content": "[kiss]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 632, + "content": "[shhh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 633, + "content": "[gibberish]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 634, + "content": "[fr]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 635, + "content": "[es]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 636, + "content": "[de]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 637, + "content": "[it]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 638, + "content": "[ipa]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 639, + "content": "[end_of_label]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 695, + "content": "[PLACEHOLDER55]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 696, + "content": "[PLACEHOLDER56]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 697, + "content": "[PLACEHOLDER57]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 698, + "content": "[PLACEHOLDER58]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 699, + "content": "[PLACEHOLDER59]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 700, + "content": "[PLACEHOLDER60]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 701, + "content": "[PLACEHOLDER61]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 702, + "content": "[PLACEHOLDER62]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 703, + "content": "[PLACEHOLDER63]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 704, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 705, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 706, + "content": "", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 707, + "content": "[xh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": null, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "vocab": { + "[STOP]": 0, + "[UNK]": 1, + "[SPACE]": 2, + "!": 3, + "'": 4, + "(": 5, + ")": 6, + ",": 7, + "-": 8, + ".": 9, + "/": 10, + ":": 11, + ";": 12, + "?": 13, + "a": 14, + "b": 15, + "c": 16, + "d": 17, + "e": 18, + "f": 19, + "g": 20, + "h": 21, + "i": 22, + "j": 23, + "k": 24, + "l": 25, + "m": 26, + "n": 27, + "o": 28, + "p": 29, + "q": 30, + "r": 31, + "s": 32, + "t": 33, + "u": 34, + "v": 35, + "w": 36, + "x": 37, + "y": 38, + "z": 39, + "th": 40, + "in": 41, + "the": 42, + "an": 43, + "er": 44, + "ou": 45, + "re": 46, + "on": 47, + "at": 48, + "ed": 49, + "en": 50, + "to": 51, + "ing": 52, + "and": 53, + "is": 54, + "as": 55, + "al": 56, + "or": 57, + "of": 58, + "ar": 59, + "it": 60, + "es": 61, + "he": 62, + "st": 63, + "le": 64, + "om": 65, + "se": 66, + "be": 67, + "ad": 68, + "ow": 69, + "ly": 70, + "ch": 71, + "wh": 72, + "that": 73, + "you": 74, + "li": 75, + "ve": 76, + "ac": 77, + "ti": 78, + "ld": 79, + "me": 80, + "was": 81, + "gh": 82, + "id": 83, + "ll": 84, + "wi": 85, + "ent": 86, + "for": 87, + "ay": 88, + "ro": 89, + "ver": 90, + "ic": 91, + "her": 92, + "ke": 93, + "his": 94, + "no": 95, + "ut": 96, + "un": 97, + "ir": 98, + "lo": 99, + "we": 100, + "ri": 101, + "ha": 102, + "with": 103, + "ght": 104, + "out": 105, + "im": 106, + "ion": 107, + "all": 108, + "ab": 109, + "one": 110, + "ne": 111, + "ge": 112, + "ould": 113, + "ter": 114, + "mo": 115, + "had": 116, + "ce": 117, + "she": 118, + "go": 119, + "sh": 120, + "ur": 121, + "am": 122, + "so": 123, + "pe": 124, + "my": 125, + "de": 126, + "are": 127, + "but": 128, + "ome": 129, + "fr": 130, + "ther": 131, + "fe": 132, + "su": 133, + "do": 134, + "con": 135, + "te": 136, + "ain": 137, + "ere": 138, + "po": 139, + "if": 140, + "they": 141, + "us": 142, + "ag": 143, + "tr": 144, + "now": 145, + "oun": 146, + "this": 147, + "have": 148, + "not": 149, + "sa": 150, + "il": 151, + "up": 152, + "thing": 153, + "from": 154, + "ap": 155, + "him": 156, + "ack": 157, + "ation": 158, + "ant": 159, + "our": 160, + "op": 161, + "like": 162, + "ust": 163, + "ess": 164, + "bo": 165, + "ok": 166, + "ul": 167, + "ind": 168, + "ex": 169, + "com": 170, + "some": 171, + "there": 172, + "ers": 173, + "co": 174, + "res": 175, + "man": 176, + "ard": 177, + "pl": 178, + "wor": 179, + "way": 180, + "tion": 181, + "fo": 182, + "ca": 183, + "were": 184, + "by": 185, + "ate": 186, + "pro": 187, + "ted": 188, + "ound": 189, + "own": 190, + "would": 191, + "ts": 192, + "what": 193, + "qu": 194, + "ally": 195, + "ight": 196, + "ck": 197, + "gr": 198, + "when": 199, + "ven": 200, + "can": 201, + "ough": 202, + "ine": 203, + "end": 204, + "per": 205, + "ous": 206, + "od": 207, + "ide": 208, + "know": 209, + "ty": 210, + "very": 211, + "si": 212, + "ak": 213, + "who": 214, + "about": 215, + "ill": 216, + "them": 217, + "est": 218, + "red": 219, + "ye": 220, + "could": 221, + "ong": 222, + "your": 223, + "their": 224, + "em": 225, + "just": 226, + "other": 227, + "into": 228, + "any": 229, + "whi": 230, + "um": 231, + "tw": 232, + "ast": 233, + "der": 234, + "did": 235, + "ie": 236, + "been": 237, + "ace": 238, + "ink": 239, + "ity": 240, + "back": 241, + "ting": 242, + "br": 243, + "more": 244, + "ake": 245, + "pp": 246, + "then": 247, + "sp": 248, + "el": 249, + "use": 250, + "bl": 251, + "said": 252, + "over": 253, + "get": 254, + "[START]": 255, + "\"": 256, + "#": 257, + "$": 258, + "%": 259, + "&": 260, + "*": 261, + "+": 262, + "0": 263, + "1": 264, + "2": 265, + "3": 266, + "4": 267, + "5": 268, + "6": 269, + "7": 270, + "8": 271, + "9": 272, + "<": 273, + "=": 274, + ">": 275, + "@": 276, + "A": 277, + "B": 278, + "C": 279, + "D": 280, + "E": 281, + "F": 282, + "G": 283, + "H": 284, + "I": 285, + "J": 286, + "K": 287, + "L": 288, + "M": 289, + "N": 290, + "O": 291, + "P": 292, + "Q": 293, + "R": 294, + "S": 295, + "T": 296, + "U": 297, + "V": 298, + "W": 299, + "X": 300, + "Y": 301, + "Z": 302, + "[": 303, + "\\": 304, + "]": 305, + "^": 306, + "_": 307, + "`": 308, + "{": 309, + "|": 310, + "}": 311, + "~": 312, + "‐": 313, + "‑": 314, + "‒": 315, + "–": 316, + "—": 317, + "―": 318, + "‖": 319, + "‗": 320, + "‘": 321, + "’": 322, + "‚": 323, + "‛": 324, + "“": 325, + "”": 326, + "„": 327, + "‟": 328, + " ": 329, + "¡": 330, + "¢": 331, + "£": 332, + "¤": 333, + "¥": 334, + "¦": 335, + "§": 336, + "¨": 337, + "©": 338, + "ª": 339, + "«": 340, + "¬": 341, + "­": 342, + "®": 343, + "¯": 344, + "°": 345, + "±": 346, + "²": 347, + "³": 348, + "´": 349, + "µ": 350, + "¶": 351, + "·": 352, + "¸": 353, + "¹": 354, + "º": 355, + "»": 356, + "¼": 357, + "½": 358, + "¾": 359, + "¿": 360, + "À": 361, + "Á": 362, + "Â": 363, + "Ã": 364, + "Ä": 365, + "Å": 366, + "Æ": 367, + "Ç": 368, + "È": 369, + "É": 370, + "Ê": 371, + "Ë": 372, + "Ì": 373, + "Í": 374, + "Î": 375, + "Ï": 376, + "Ð": 377, + "Ñ": 378, + "Ò": 379, + "Ó": 380, + "Ô": 381, + "Õ": 382, + "Ö": 383, + "×": 384, + "Ø": 385, + "Ù": 386, + "Ú": 387, + "Û": 388, + "Ü": 389, + "Ý": 390, + "Þ": 391, + "ß": 392, + "à": 393, + "á": 394, + "â": 395, + "ã": 396, + "ä": 397, + "å": 398, + "æ": 399, + "ç": 400, + "è": 401, + "é": 402, + "ê": 403, + "ë": 404, + "ì": 405, + "í": 406, + "î": 407, + "ï": 408, + "ð": 409, + "ñ": 410, + "ò": 411, + "ó": 412, + "ô": 413, + "õ": 414, + "ö": 415, + "÷": 416, + "ø": 417, + "ù": 418, + "ú": 419, + "û": 420, + "ü": 421, + "ý": 422, + "þ": 423, + "ÿ": 424, + "ɐ": 425, + "ɑ": 426, + "ɒ": 427, + "ɓ": 428, + "ɔ": 429, + "ɕ": 430, + "ɖ": 431, + "ɗ": 432, + "ɘ": 433, + "ə": 434, + "ɚ": 435, + "ɛ": 436, + "ɜ": 437, + "ɝ": 438, + "ɞ": 439, + "ɟ": 440, + "ɠ": 441, + "ɡ": 442, + "ɢ": 443, + "ɣ": 444, + "ɤ": 445, + "ɥ": 446, + "ɦ": 447, + "ɧ": 448, + "ɨ": 449, + "ɩ": 450, + "ɪ": 451, + "ɫ": 452, + "ɬ": 453, + "ɭ": 454, + "ɮ": 455, + "ɯ": 456, + "ɰ": 457, + "ɱ": 458, + "ɲ": 459, + "ɳ": 460, + "ɴ": 461, + "ɵ": 462, + "ɶ": 463, + "ɷ": 464, + "ɸ": 465, + "ɹ": 466, + "ɺ": 467, + "ɻ": 468, + "ɼ": 469, + "ɽ": 470, + "ɾ": 471, + "ɿ": 472, + "ʀ": 473, + "ʁ": 474, + "ʂ": 475, + "ʃ": 476, + "ʄ": 477, + "ʅ": 478, + "ʆ": 479, + "ʇ": 480, + "ʈ": 481, + "ʉ": 482, + "ʊ": 483, + "ʋ": 484, + "ʌ": 485, + "ʍ": 486, + "ʎ": 487, + "ʏ": 488, + "ʐ": 489, + "ʑ": 490, + "ʒ": 491, + "ʓ": 492, + "ʔ": 493, + "ʕ": 494, + "ʖ": 495, + "ʗ": 496, + "ʘ": 497, + "ʙ": 498, + "ʚ": 499, + "ʛ": 500, + "ʜ": 501, + "ʝ": 502, + "ʞ": 503, + "ʟ": 504, + "ʠ": 505, + "ʡ": 506, + "ʢ": 507, + "ʣ": 508, + "ʤ": 509, + "ʥ": 510, + "ʦ": 511, + "ʧ": 512, + "ʨ": 513, + "ʩ": 514, + "ʪ": 515, + "ʫ": 516, + "ʬ": 517, + "ʭ": 518, + "ʮ": 519, + "ʯ": 520, + "ʰ": 521, + "ʱ": 522, + "ʲ": 523, + "ʳ": 524, + "ʴ": 525, + "ʵ": 526, + "ʶ": 527, + "ʷ": 528, + "ʸ": 529, + "ʹ": 530, + "ʺ": 531, + "ʻ": 532, + "ʼ": 533, + "ʽ": 534, + "ʾ": 535, + "ʿ": 536, + "ˀ": 537, + "ˁ": 538, + "˂": 539, + "˃": 540, + "˄": 541, + "˅": 542, + "ˆ": 543, + "ˇ": 544, + "ˈ": 545, + "ˉ": 546, + "ˊ": 547, + "ˋ": 548, + "ˌ": 549, + "ˍ": 550, + "ˎ": 551, + "ˏ": 552, + "ː": 553, + "ˑ": 554, + "˒": 555, + "˓": 556, + "˔": 557, + "˕": 558, + "˖": 559, + "˗": 560, + "˘": 561, + "˙": 562, + "˚": 563, + "˛": 564, + "˜": 565, + "˝": 566, + "˞": 567, + "˟": 568, + "ˠ": 569, + "ˡ": 570, + "ˢ": 571, + "ˣ": 572, + "ˤ": 573, + "˥": 574, + "˦": 575, + "˧": 576, + "˨": 577, + "˩": 578, + "˪": 579, + "˫": 580, + "ˬ": 581, + "˭": 582, + "ˮ": 583, + "˯": 584, + "˰": 585, + "˱": 586, + "˲": 587, + "˳": 588, + "˴": 589, + "˵": 590, + "˶": 591, + "˷": 592, + "˸": 593, + "˹": 594, + "˺": 595, + "˻": 596, + "˼": 597, + "˽": 598, + "˾": 599, + "˿": 600, + "ā": 601, + "ō": 602, + "…": 603, + "[UH]": 604, + "[UM]": 605, + "[giggle]": 606, + "[laughter]": 607, + "[guffaw]": 608, + "[inhale]": 609, + "[exhale]": 610, + "[sigh]": 611, + "[cry]": 612, + "[bark]": 613, + "[howl]": 614, + "[meow]": 615, + "[singing]": 616, + "[music]": 617, + "[whistle]": 618, + "[humming]": 619, + "[gasp]": 620, + "[groan]": 621, + "[whisper]": 622, + "[mumble]": 623, + "[sniff]": 624, + "[sneeze]": 625, + "[cough]": 626, + "[snore]": 627, + "[chew]": 628, + "[sip]": 629, + "[clear_throat]": 630, + "[kiss]": 631, + "[shhh]": 632, + "[gibberish]": 633, + "[fr]": 634, + "[es]": 635, + "[de]": 636, + "[it]": 637, + "[ipa]": 638, + "[end_of_label]": 639, + "ŋ": 640, + "ᵻ": 641, + "θ": 642, + "̩": 643, + "̃": 644, + "ɑː": 645, + "iː": 646, + "uː": 647, + "ɜː": 648, + "ɔː": 649, + "oː": 650, + "eɪ": 651, + "oʊ": 652, + "aɪ": 653, + "aʊ": 654, + "ɔɪ": 655, + "dʒ": 656, + "tʃ": 657, + "ɪŋ": 658, + "ᵻd": 659, + "ˈiː": 660, + "ˌiː": 661, + "ˈɪ": 662, + "ˌɪ": 663, + "ˈeɪ": 664, + "ˌeɪ": 665, + "ˈɛ": 666, + "ˌɛ": 667, + "ˈæ": 668, + "ˌæ": 669, + "ˈɑː": 670, + "ˌɑː": 671, + "ˈɔː": 672, + "ˌɔː": 673, + "oːɹ": 674, + "ˈoːɹ": 675, + "ˌoːɹ": 676, + "ˈoʊ": 677, + "ˌoʊ": 678, + "ˈʊ": 679, + "ˌʊ": 680, + "ˈuː": 681, + "ˌuː": 682, + "ˈɜː": 683, + "ˌɜː": 684, + "ˈʌ": 685, + "ˌʌ": 686, + "ˈaɪ": 687, + "ˌaɪ": 688, + "ˈaʊ": 689, + "ˌaʊ": 690, + "ˈɔɪ": 691, + "ˌɔɪ": 692, + "ˈɚ": 693, + "ˌɐ": 694, + "[PLACEHOLDER55]": 695, + "[PLACEHOLDER56]": 696, + "[PLACEHOLDER57]": 697, + "[PLACEHOLDER58]": 698, + "[PLACEHOLDER59]": 699, + "[PLACEHOLDER60]": 700, + "[PLACEHOLDER61]": 701, + "[PLACEHOLDER62]": 702, + "[PLACEHOLDER63]": 703, + "": 704, + "": 705, + "": 706, + "[xh]": 707, + "ć": 708, + "Ⴀ": 709, + "Ⴁ": 710, + "Ⴂ": 711, + "Ⴃ": 712, + "Ⴄ": 713, + "Ⴆ": 714, + "Ⴈ": 715, + "Ⴉ": 716, + "Ⴊ": 717, + "Ⴋ": 718, + "Ⴌ": 719, + "Ⴍ": 720, + "Ⴎ": 721, + "Ⴐ": 722, + "Ⴑ": 723, + "Ⴒ": 724, + "Ⴓ": 725, + "Ⴔ": 726, + "Ⴕ": 727, + "Ⴗ": 728, + "Ⴟ": 729, + "Ⴠ": 730, + "ა": 731, + "ბ": 732, + "გ": 733, + "დ": 734, + "ე": 735, + "ვ": 736, + "ზ": 737, + "ი": 738, + "კ": 739, + "ლ": 740, + "მ": 741, + "ნ": 742, + "ო": 743, + "პ": 744, + "რ": 745, + "ს": 746, + "ტ": 747, + "უ": 748, + "ფ": 749, + "ქ": 750, + "ყ": 751, + "ც": 752, + "წ": 753, + "ხ": 754, + "ჯ": 755, + "ჰ": 756, + "": 757, + "კუ": 758, + "ან": 759, + "ელ": 760, + "ენ": 761, + "ზი": 762, + "წა": 763, + "ბა": 764, + "ნგ": 765, + "ტჰ": 766, + "სი": 767, + "ლა": 768, + "ინ": 769, + "მა": 770, + "პჰ": 771, + "ის": 772, + "კჰ": 773, + "კა": 774, + "ლო": 775, + "ყო": 776, + "წე": 777, + "ლუ": 778, + "ყა": 779, + "ლე": 780, + "კო": 781, + "უკუ": 782, + "ონ": 783, + "ყე": 784, + "ოკუ": 785, + "ზა": 786, + "ელა": 787, + "უმ": 788, + "ლი": 789, + "სჰ": 790, + "ანგ": 791, + "ნგა": 792, + "სე": 793, + "ნა": 794, + "ეზი": 795, + "კწა": 796, + "ელე": 797, + "კწ": 798, + "ანა": 799, + "და": 800, + "ბუ": 801, + "ბე": 802, + "ტჰი": 803, + "უნ": 804, + "ყი": 805, + "კე": 806, + "დი": 807, + "სა": 808, + "ბან": 809, + "ენი": 810, + "ბო": 811, + "ჰა": 812, + "ემ": 813, + "პჰა": 814, + "ნტ": 815, + "ომ": 816, + "ტჰა": 817, + "ელო": 818, + "ისა": 819, + "ენგ": 820, + "ზე": 821, + "ამა": 822, + "კჰო": 823, + "ზო": 824, + "ტსჰ": 825, + "ჰლა": 826, + "იმ": 827, + "ანგა": 828, + "ესი": 829, + "კწე": 830, + "ტჰე": 831, + "ულუ": 832, + "ბი": 833, + "ინი": 834, + "ელი": 835, + "უკუბა": 836, + "აბა": 837, + "ენზი": 838, + "ანდ": 839, + "კი": 840, + "ისე": 841, + "დო": 842, + "ლწა": 843, + "ფუნ": 844, + "იზი": 845, + "ნე": 846, + "ბონ": 847, + "ისი": 848, + "ტჰუ": 849, + "ინტ": 850, + "ხა": 851, + "წუ": 852, + "ისო": 853, + "წო": 854, + "კუბა": 855, + "ლეყო": 856, + "ენტ": 857, + "ნგო": 858, + "ანე": 859, + "ნოკუ": 860, + "ვა": 861, + "ჰო": 862, + "ნჯ": 863, + "ნო": 864, + "კან": 865, + "ქა": 866, + "ბანტ": 867, + "კჰე": 868, + "ნგოკუ": 869, + "კუნ": 870, + "მი": 871, + "კჰა": 872, + "ტყ": 873, + "ელწა": 874, + "ნგე": 875, + "ცე": 876, + "ფუ": 877, + "კჰუ": 878, + "ტჰო": 879, + "პჰუ": 880, + "პჰე": 881, + "ცა": 882, + "სებ": 883, + "ქუ": 884, + "სო": 885, + "ჰლ": 886, + "ჰუ": 887, + "ლელა": 888, + "ნტო": 889, + "ნი": 890, + "მან": 891, + "ენე": 892, + "ქო": 893, + "ესჰ": 894, + "ბანტუ": 895, + "ანტ": 896, + "კჰულუ": 897, + "დლა": 898, + "ისწა": 899, + "ონა": 900, + "ოლუ": 901, + "ენზა": 902, + "ქი": 903, + "ჰლო": 904, + "ლწ": 905, + "კწი": 906, + "ფა": 907, + "ეკუ": 908, + "ლოკუ": 909, + "მო": 910, + "დე": 911, + "ნდი": 912, + "პჰო": 913, + "ელწე": 914, + "ლუნგ": 915, + "ვუ": 916, + "პჰი": 917, + "ტი": 918, + "ბჰა": 919, + "ფი": 920, + "ლწე": 921, + "ტყა": 922, + "ყოკუ": 923, + "ზწე": 924, + "სებენზი": 925, + "ნტუ": 926, + "სუ": 927, + "ენა": 928, + "ცი": 929, + "ნდ": 930, + "წანა": 931, + "ფუმ": 932, + "ელელა": 933, + "კუმ": 934, + "ანი": 935, + "ბჰ": 936, + "ენდ": 937, + "იმი": 938, + "ზოკუ": 939, + "ტსჰა": 940, + "ანტსი": 941, + "კილეყო": 942, + "ელელე": 943, + "წენი": 944, + "ნჯენგ": 945, + "ბონა": 946, + "გა": 947, + "მბა": 948, + "ინა": 949, + "ონკე": 950, + "დწა": 951, + "პა": 952, + "ესჰა": 953, + "წან": 954, + "ჯო": 955, + "ნგუ": 956, + "იინ": 957, + "ჰლუ": 958, + "ლოო": 959, + "მბი": 960, + "კანყე": 961, + "ხესჰა": 962, + "ხო": 963, + "ჰლე": 964, + "გქი": 965, + "ფუნა": 966, + "ფუნე": 967, + "ხჰო": 968, + "ანდა": 969, + "ნყე": 970, + "ლისა": 971, + "ცო": 972, + "გუ": 973, + "ქჰუ": 974, + "რჰ": 975, + "აკუ": 976, + "ინზი": 977, + "ნყ": 978, + "ენგა": 979, + "ემი": 980, + "წეყო": 981, + "ინტო": 982, + "გც": 983, + "ულუმ": 984, + "ნამა": 985, + "ცჰა": 986, + "ენკო": 987, + "ინგ": 988, + "მპ": 989, + "ტა": 990, + "ლელე": 991, + "კწაზი": 992, + "ტჰწა": 993, + "კუნყე": 994, + "ენზე": 995, + "ანდლა": 996, + "ნომ": 997, + "სელე": 998, + "პჰაკა": 999, + "აბანტუ": 1000, + "ონდო": 1001, + "ნეზი": 1002, + "კომ": 1003, + "ლაწუ": 1004, + "ელელ": 1005, + "ინდ": 1006, + "ზუ": 1007, + "ენდა": 1008, + "პჰუმ": 1009, + "ანგო": 1010, + "ასი": 1011, + "დოდა": 1012, + "ისჰ": 1013, + "ენტე": 1014, + "უბუ": 1015, + "ბინი": 1016, + "ფო": 1017, + "ოკანყე": 1018, + "ელელო": 1019, + "სეტყ": 1020, + "უყა": 1021, + "ზელა": 1022, + "ტო": 1023, + "სოკუ": 1024, + "ნჯე": 1025, + "ქინ": 1026, + "წანო": 1027, + "მალი": 1028, + "ფუნეკა": 1029, + "ცუ": 1030, + "ტსჰო": 1031, + "ცჰ": 1032, + "ცწა": 1033, + "ტჰეტჰა": 1034, + "რჰულუმ": 1035, + "ნგამა": 1036, + "გო": 1037, + "კუმა": 1038, + "ჰლალა": 1039, + "ფან": 1040, + "ლენი": 1041, + "აყი": 1042, + "კქუ": 1043, + "კილე": 1044, + "ფანა": 1045, + "უმა": 1046, + "კჰი": 1047, + "კუკუ": 1048, + "პჰანტსი": 1049, + "გამა": 1050, + "წაყე": 1051, + "ნგენ": 1052, + "ყამა": 1053, + "კწან": 1054, + "ონო": 1055, + "მე": 1056, + "ონდ": 1057, + "ინდა": 1058, + "ყაკა": 1059, + "ზულუ": 1060, + "იკა": 1061, + "ლანდ": 1062, + "აბან": 1063, + "აპჰო": 1064, + "დალა": 1065, + "ნგუმ": 1066, + "ოკო": 1067, + "სწა": 1068, + "იი": 1069, + "ონი": 1070, + "პი": 1071, + "სისი": 1072, + "ინკო": 1073, + "კჰონა": 1074, + "კწეზი": 1075, + "ბანგა": 1076, + "პჰამბი": 1077, + "კქუბო": 1078, + "ლელო": 1079, + "აკა": 1080, + "ეს": 1081, + "კწაყე": 1082, + "ალო": 1083, + "ბანგ": 1084, + "კოდწა": 1085, + "ბენზი": 1086, + "ანდი": 1087, + "იიმ": 1088, + "იყა": 1089, + "კელე": 1090, + "ქესჰ": 1091, + "ვუმ": 1092, + "რჰულუმენტე": 1093, + "სეტყენზი": 1094, + "ჰლაბა": 1095, + "ნესი": 1096, + "ნდა": 1097, + "ხი": 1098, + "უკუნ": 1099, + "ფუმანა": 1100, + "ქინისე": 1101, + "ობუ": 1102, + "ქჰ": 1103, + "კუკჰო": 1104, + "ელუ": 1105, + "ბანტწანა": 1106, + "ხჰ": 1107, + "ყან": 1108, + "კოლო": 1109, + "ტჰუბა": 1110, + "მბ": 1111, + "კალა": 1112, + "ქალა": 1113, + "ფიკა": 1114, + "ამან": 1115, + "ფუნდო": 1116, + "ნენ": 1117, + "ედ": 1118, + "ქე": 1119, + "ბულა": 1120, + "ზწა": 1121, + "კწამა": 1122, + "ისებენზი": 1123, + "ფანელე": 1124, + "ლის": 1125, + "პჰანდ": 1126, + "ეზა": 1127, + "უმნტუ": 1128, + "პჰუჰლ": 1129, + "ნაბა": 1130, + "ამ": 1131, + "იინტ": 1132, + "ქჰა": 1133, + "სელა": 1134, + "ყინ": 1135, + "წოკუ": 1136, + "ხჰა": 1137, + "ზინ": 1138, + "დუ": 1139, + "სემ": 1140, + "ჯი": 1141, + "კელა": 1142, + "ლინგ": 1143, + "ლულე": 1144, + "ზწ": 1145, + "ლისე": 1146, + "ლოკო": 1147, + "ემა": 1148, + "ზანტსი": 1149, + "კწესი": 1150, + "პო": 1151, + "ელინ": 1152, + "ტჰანდა": 1153, + "კწენზა": 1154, + "რო": 1155, + "პჰაკატჰი": 1156, + "პჰელა": 1157, + "წანგ": 1158, + "ბელე": 1159, + "ტჰეტჰო": 1160, + "ენდლელა": 1161, + "ფუნდი": 1162, + "ეყა": 1163, + "კწენ": 1164, + "ზამა": 1165, + "ტყე": 1166, + "კულუ": 1167, + "კაყო": 1168, + "ინგა": 1169, + "ლაყო": 1170, + "ყესი": 1171, + "ჯონგ": 1172, + "კისა": 1173, + "ტუ": 1174, + "წენ": 1175, + "კაზი": 1176, + "უტჰი": 1177, + "ბანდა": 1178, + "ნზო": 1179, + "სანა": 1180, + "უნგა": 1181, + "ნოკუბა": 1182, + "წანე": 1183, + "კაკჰულუ": 1184, + "კანტი": 1185, + "ცედ": 1186, + "ნემ": 1187, + "წატჰი": 1188, + "ომა": 1189, + "ნანგ": 1190, + "ნკე": 1191, + "უტსჰ": 1192, + "ცწანგ": 1193, + "ბალა": 1194, + "ლომ": 1195, + "სებენზა": 1196, + "აზი": 1197, + "ყაზი": 1198, + "კწისი": 1199, + "ცწადი": 1200, + "იზა": 1201, + "კუფუნეკა": 1202, + "კუტჰი": 1203, + "ბენ": 1204, + "წონკე": 1205, + "ელანა": 1206, + "ბჰუ": 1207, + "სომ": 1208, + "უნყ": 1209, + "ცწანგც": 1210, + "უკწა": 1211, + "ნგამ": 1212, + "კანგა": 1213, + "სენ": 1214, + "ანყ": 1215, + "ეზინ": 1216, + "რა": 1217, + "ნასე": 1218, + "ნჯალო": 1219, + "კჰაყა": 1220, + "ტჰემ": 1221, + "ჰამბა": 1222, + "ჰლობო": 1223, + "ზელე": 1224, + "მბო": 1225, + "ბეკა": 1226, + "ლუნგუ": 1227, + "ხე": 1228, + "ყაყი": 1229, + "სესი": 1230, + "კწიზი": 1231, + "ზეზი": 1232, + "ლინ": 1233, + "ცინ": 1234, + "ლანდელა": 1235, + "ზანა": 1236, + "სჰო": 1237, + "ველა": 1238, + "აპჰა": 1239, + "უყე": 1240, + "ყომ": 1241, + "კწანოკუ": 1242, + "ვო": 1243, + "ემვა": 1244, + "ებუ": 1245, + "კოკუ": 1246, + "წამ": 1247, + "კულე": 1248, + "ბენი": 1249, + "რჰა": 1250, + "კელო": 1251, + "ტჰატჰა": 1252, + "ენზიწა": 1253, + "ხჰოსა": 1254, + "ბჰო": 1255, + "ისწე": 1256, + "ცალა": 1257, + "ლუნგა": 1258, + "ლისი": 1259, + "კუნგა": 1260, + "პჰანდლე": 1261, + "ზემ": 1262, + "ლწენი": 1263, + "პჰამბილი": 1264, + "ბელა": 1265, + "ზელ": 1266, + "ილე": 1267, + "უვა": 1268, + "სეტყენზისწა": 1269, + "ლელ": 1270, + "უკუზი": 1271, + "სჰი": 1272, + "ნგაბა": 1273, + "სუკა": 1274, + "კელელა": 1275, + "ყუ": 1276, + "კალოკუ": 1277, + "ყამ": 1278, + "სჰისჰ": 1279, + "ელეყო": 1280, + "ცედა": 1281, + "წაბა": 1282, + "ფუმანე": 1283, + "ბუმ": 1284, + "კწიმ": 1285, + "სუკუ": 1286, + "კაკუ": 1287, + "ესა": 1288, + "ნგომ": 1289, + "ნამ": 1290, + "ინკოსი": 1291, + "ტჰილე": 1292, + "ენეყო": 1293, + "ლელწა": 1294, + "ლულა": 1295, + "პჰატჰა": 1296, + "ეტჰო": 1297, + "უკუზე": 1298, + "პჰუმა": 1299, + "კისო": 1300, + "ფრ": 1301, + "ყენ": 1302, + "ყისე": 1303, + "მანზი": 1304, + "ანგე": 1305, + "კჰატჰა": 1306, + "პილო": 1307, + "ჯე": 1308, + "კამ": 1309, + "ინდაწო": 1310, + "ვე": 1311, + "ზონკე": 1312, + "ფრიკა": 1313, + "კწინ": 1314, + "ბონკე": 1315, + "წალა": 1316, + "იზინტო": 1317, + "ზომ": 1318, + "კარჰულუმენტე": 1319, + "ტჰენგ": 1320, + "ფუნდა": 1321, + "ნტწანა": 1322, + "ტჰწე": 1323, + "ლანგა": 1324, + "ცწანგცისო": 1325, + "კანა": 1326, + "ზიზი": 1327, + "წაზი": 1328, + "Ⴌგ": 1329, + "სჰე": 1330, + "ჰლაწუ": 1331, + "ტყალა": 1332, + "აბე": 1333, + "გი": 1334, + "ულო": 1335, + "პჰეზულუ": 1336, + "რი": 1337, + "უბა": 1338, + "ჰლწა": 1339, + "ფამა": 1340, + "ნალო": 1341, + "კწაკუ": 1342, + "დოლო": 1343, + "ლწაზი": 1344, + "ზენ": 1345, + "წამა": 1346, + "ნაკუ": 1347, + "ნობუ": 1348, + "პჰუჰლისო": 1349, + "ზწი": 1350, + "ზიკო": 1351, + "ოკუბა": 1352, + "ენზიწე": 1353, + "ბანდლა": 1354, + "ბისი": 1355, + "ნეე": 1356, + "ქონ": 1357, + "ცანდ": 1358, + "ყაკუ": 1359, + "პჰეზუ": 1360, + "ტე": 1361, + "ნგეზი": 1362, + "გუნ": 1363, + "ტჰუმ": 1364, + "უნი": 1365, + "ქონდო": 1366, + "ყოკუბა": 1367, + "იინკო": 1368, + "ცჰაპჰა": 1369, + "სჰუ": 1370, + "ფუნდ": 1371, + "გუქუ": 1372, + "აყე": 1373, + "ეკო": 1374, + "კწაბა": 1375, + "ტჰინი": 1376, + "ქექესჰ": 1377, + "ბაზი": 1378, + "ზამ": 1379, + "ანგან": 1380, + "ყეზი": 1381, + "უზა": 1382, + "ტჰინტ": 1383, + "კჰუსელე": 1384, + "ფე": 1385, + "ყაკჰე": 1386, + "ხჰობო": 1387, + "ცოცე": 1388, + "ლაკჰე": 1389, + "ბანი": 1390, + "კწემ": 1391, + "ილეყო": 1392, + "ხანდ": 1393, + "კუყო": 1394, + "უმზი": 1395, + "ნგოკო": 1396, + "ბჰედ": 1397, + "ინდლელა": 1398, + "სინი": 1399, + "ინკ": 1400, + "ყონა": 1401, + "Ⴀფრიკა": 1402, + "ინყაკა": 1403, + "საპჰო": 1404, + "ენტლა": 1405, + "ელელწანო": 1406, + "ლანდელაყო": 1407, + "კელწა": 1408, + "წაყო": 1409, + "ბაბე": 1410, + "ხაკი": 1411, + "ელელეყო": 1412, + "ენკონზო": 1413, + "კაკუჰლე": 1414, + "აწუ": 1415, + "ბისო": 1416, + "ლინი": 1417, + "ნანგა": 1418, + "ულა": 1419, + "ყის": 1420, + "ლონგ": 1421, + "ენგე": 1422, + "ნემი": 1423, + "ენდი": 1424, + "წაზა": 1425, + "ბესი": 1426, + "ლანა": 1427, + "ენდაწო": 1428, + "აბანგა": 1429, + "ზინი": 1430, + "კოკო": 1431, + "ბანე": 1432, + "ტჰეტჰ": 1433, + "ქჰუბა": 1434, + "ხანდუვა": 1435, + "ველ": 1436, + "ოყა": 1437, + "რჰო": 1438, + "კჰაწუ": 1439, + "ქელა": 1440, + "წელე": 1441, + "პჰონდო": 1442, + "პჰეპჰა": 1443, + "ლაწულო": 1444, + "ბალულე": 1445, + "ლამ": 1446, + "სებე": 1447, + "ეზ": 1448, + "ლისწა": 1449, + "ლან": 1450, + "ყატჰ": 1451, + "უკუყი": 1452, + "სჰა": 1453, + "ლესი": 1454, + "ქოქო": 1455, + "ენზეკა": 1456, + "ზინგა": 1457, + "ვაკა": 1458, + "კელწე": 1459, + "წესი": 1460, + "უკუტყა": 1461, + "ბომი": 1462, + "ჰამბ": 1463, + "ხიბ": 1464, + "ნგესი": 1465, + "ისანა": 1466, + "სჰისჰინი": 1467, + "მანდლა": 1468, + "ნონ": 1469, + "ენტო": 1470, + "ცედისა": 1471, + "ლიკა": 1472, + "აკუკჰო": 1473, + "ქანა": 1474, + "ყენა": 1475, + "სინგა": 1476, + "ყაბა": 1477, + "ემპ": 1478, + "ბონაკა": 1479, + "გოსა": 1480, + "ელინყე": 1481, + "ბისა": 1482, + "ილო": 1483, + "ნინზი": 1484, + "კუბე": 1485, + "ნგაყო": 1486, + "კჰალა": 1487, + "ფაკა": 1488, + "იწა": 1489, + "ნგელი": 1490, + "ყონკე": 1491, + "დლალა": 1492, + "ბჰე": 1493, + "კჰიწო": 1494, + "ცანდელო": 1495, + "დუმ": 1496, + "ნგენა": 1497, + "აბამ": 1498, + "დლალო": 1499, + "ჯონგენე": 1500, + "ზაყო": 1501, + "ზიმა": 1502, + "ზალი": 1503, + "ომბი": 1504, + "ლონგწანე": 1505, + "გრ": 1506, + "ყინტ": 1507, + "კუყა": 1508, + "სწე": 1509, + "ელენი": 1510, + "ზაბა": 1511, + "ზატჰუ": 1512, + "ბომ": 1513, + "ქოქოსჰო": 1514, + "მელა": 1515, + "ქუმ": 1516, + "ტჰატჰუ": 1517, + "ებე": 1518, + "ლისო": 1519, + "ყელა": 1520, + "ესიტჰი": 1521, + "ფაზი": 1522, + "ბონაკალა": 1523, + "ცოცეკო": 1524, + "Ⴉუ": 1525, + "Ⴋზანტსი": 1526, + "Ⴑი": 1527, + "ასე": 1528, + "ელელწა": 1529, + "ენყე": 1530, + "კჰუპჰა": 1531, + "ლაწულა": 1532, + "ცელო": 1533, + "კწენი": 1534, + "სებენზისა": 1535, + "ბანდაკან": 1536, + "ელწანო": 1537, + "ზიმ": 1538, + "ტჰელა": 1539, + "ეზინყე": 1540, + "ქუკა": 1541, + "კჰიწა": 1542, + "Ⴋა": 1543, + "ჯწა": 1544, + "კჰანგ": 1545, + "წეტჰუ": 1546, + "ამადოდა": 1547, + "ლუნგისა": 1548, + "ნდლუ": 1549, + "ლინდ": 1550, + "ტსა": 1551, + "უსი": 1552, + "მალუნგა": 1553, + "დიბან": 1554, + "ბანა": 1555, + "ლილო": 1556, + "00": 1557, + "განგა": 1558, + "კცუ": 1559, + "სეკუ": 1560, + "კჰუტჰა": 1561, + "ცაც": 1562, + "ლწინი": 1563, + "ება": 1564, + "ყოლუ": 1565, + "კწამ": 1566, + "ტსჰანე": 1567, + "ცებო": 1568, + "ლალა": 1569, + "ლეკო": 1570, + "კწაკჰე": 1571, + "ვან": 1572, + "ილი": 1573, + "მნ": 1574, + "ყანგა": 1575, + "ისელო": 1576, + "კჰუმ": 1577, + "პჰატჰ": 1578, + "კწეტჰუ": 1579, + "გცინ": 1580, + "ბაყა": 1581, + "ტჰანდ": 1582, + "კჰან": 1583, + "ყიმ": 1584, + "ცჰი": 1585, + "ზელწა": 1586, + "ნგელა": 1587, + "ნოკო": 1588, + "ზესი": 1589, + "ელწეყო": 1590, + "იბე": 1591, + "რე": 1592, + "უსა": 1593, + "ყენი": 1594, + "მასი": 1595, + "გცინა": 1596, + "აბანტწანა": 1597, + "მნტუ": 1598, + "სელწა": 1599, + "ჯელო": 1600, + "ყილე": 1601, + "კჰოსი": 1602, + "ეზო": 1603, + "კულო": 1604, + "კუპჰელა": 1605, + "ყემ": 1606, + "პჰალო": 1607, + "ტსჰი": 1608, + "კანი": 1609, + "პაწუ": 1610, + "პჰუმო": 1611, + "უტჰიხო": 1612, + "უკუმ": 1613, + "უმნ": 1614, + "ბუყა": 1615, + "ნგეკჰო": 1616, + "ელუნგ": 1617, + "ფანელეკილეყო": 1618, + "ზელწე": 1619, + "იხესჰა": 1620, + "წაბო": 1621, + "ბამ": 1622, + "სჰუმ": 1623, + "ეზინგა": 1624, + "გაქო": 1625, + "ნგამელი": 1626, + "იინგ": 1627, + "კრ": 1628, + "ბალი": 1629, + "სეკო": 1630, + "ნგენხა": 1631, + "სონ": 1632, + "ტყი": 1633, + "ბასე": 1634, + "ვუმა": 1635, + "ხიბელელწანო": 1636, + "ზიბე": 1637, + "სინ": 1638, + "კწემი": 1639, + "კუმბი": 1640, + "ბანგელა": 1641, + "უბე": 1642, + "კუდალა": 1643, + "ლეკა": 1644, + "ჰაყი": 1645, + "ზონა": 1646, + "წანგა": 1647, + "ლუნგის": 1648, + "ენკომო": 1649, + "ლინგან": 1650, + "მანგა": 1651, + "უკა": 1652, + "სიკა": 1653, + "ლუმ": 1654, + "ნაყე": 1655, + "ტჰეტჰე": 1656, + "ცედო": 1657, + "ცინგა": 1658, + "ფუმანეკა": 1659, + "Ⴓკუ": 1660, + "მფო": 1661, + "წაკჰე": 1662, + "ბუსო": 1663, + "ქაკუ": 1664, + "ფუტსჰანე": 1665, + "ცალუ": 1666, + "უკუნქ": 1667, + "სელ": 1668, + "კუზო": 1669, + "ლწანა": 1670, + "ფიკ": 1671, + "ქჰუბე": 1672, + "ხელელა": 1673, + "ჯა": 1674, + "ლამა": 1675, + "ზაზი": 1676, + "ლილეყო": 1677, + "ინტეტჰო": 1678, + "ჰლან": 1679, + "პალა": 1680, + "19": 1681, + "წომ": 1682, + "ხუ": 1683, + "კწენზეკა": 1684, + "პჰულო": 1685, + "ნდლელა": 1686, + "ცჰუ": 1687, + "კულა": 1688, + "კუყე": 1689 + }, + "merges": [ + "t h", + "i n", + "th e", + "a n", + "e r", + "o u", + "r e", + "o n", + "a t", + "e d", + "e n", + "t o", + "in g", + "an d", + "i s", + "a s", + "a l", + "o r", + "o f", + "a r", + "i t", + "e s", + "h e", + "s t", + "l e", + "o m", + "s e", + "b e", + "a d", + "o w", + "l y", + "c h", + "w h", + "th at", + "y ou", + "l i", + "v e", + "a c", + "t i", + "l d", + "m e", + "w as", + "g h", + "i d", + "l l", + "w i", + "en t", + "f or", + "a y", + "r o", + "v er", + "i c", + "h er", + "k e", + "h is", + "n o", + "u t", + "u n", + "i r", + "l o", + "w e", + "r i", + "h a", + "wi th", + "gh t", + "ou t", + "i m", + "i on", + "al l", + "a b", + "on e", + "n e", + "g e", + "ou ld", + "t er", + "m o", + "h ad", + "c e", + "s he", + "g o", + "s h", + "u r", + "a m", + "s o", + "p e", + "m y", + "d e", + "a re", + "b ut", + "om e", + "f r", + "the r", + "f e", + "s u", + "d o", + "c on", + "t e", + "a in", + "er e", + "p o", + "i f", + "the y", + "u s", + "a g", + "t r", + "n ow", + "ou n", + "th is", + "ha ve", + "no t", + "s a", + "i l", + "u p", + "th ing", + "fr om", + "a p", + "h im", + "ac k", + "at ion", + "an t", + "ou r", + "o p", + "li ke", + "u st", + "es s", + "b o", + "o k", + "u l", + "in d", + "e x", + "c om", + "s ome", + "the re", + "er s", + "c o", + "re s", + "m an", + "ar d", + "p l", + "w or", + "w ay", + "ti on", + "f o", + "c a", + "w ere", + "b y", + "at e", + "p ro", + "t ed", + "oun d", + "ow n", + "w ould", + "t s", + "wh at", + "q u", + "al ly", + "i ght", + "c k", + "g r", + "wh en", + "v en", + "c an", + "ou gh", + "in e", + "en d", + "p er", + "ou s", + "o d", + "id e", + "k now", + "t y", + "ver y", + "s i", + "a k", + "wh o", + "ab out", + "i ll", + "the m", + "es t", + "re d", + "y e", + "c ould", + "on g", + "you r", + "the ir", + "e m", + "j ust", + "o ther", + "in to", + "an y", + "wh i", + "u m", + "t w", + "as t", + "d er", + "d id", + "i e", + "be en", + "ac e", + "in k", + "it y", + "b ack", + "t ing", + "b r", + "mo re", + "a ke", + "p p", + "the n", + "s p", + "e l", + "u se", + "b l", + "sa id", + "o ver", + "ge t", + "ɑ ː", + "i ː", + "u ː", + "ɜ ː", + "ɔ ː", + "o ː", + "e ɪ", + "o ʊ", + "a ɪ", + "a ʊ", + "ɔ ɪ", + "d ʒ", + "t ʃ", + "ɪ ŋ", + "ᵻ d", + "ˈ iː", + "ˌ iː", + "ˈ ɪ", + "ˌ ɪ", + "ˈ eɪ", + "ˌ eɪ", + "ˈ ɛ", + "ˌ ɛ", + "ˈ æ", + "ˌ æ", + "ˈ ɑː", + "ˌ ɑː", + "ˈ ɔː", + "ˌ ɔː", + "oː ɹ", + "ˈ oːɹ", + "ˌ oːɹ", + "ˈ oʊ", + "ˌ oʊ", + "ˈ ʊ", + "ˌ ʊ", + "ˈ uː", + "ˌ uː", + "ˈ ɜː", + "ˌ ɜː", + "ˈ ʌ", + "ˌ ʌ", + "ˈ aɪ", + "ˌ aɪ", + "ˈ aʊ", + "ˌ aʊ", + "ˈ ɔɪ", + "ˌ ɔɪ", + "ˈ ɚ", + "ˌ ɐ" + ], + "language": "xh" + } +} \ No newline at end of file diff --git a/multi/expanded_chatterbox_model/ve.safetensors b/multi/expanded_chatterbox_model/ve.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0713f1587e627f23d93121e154a7de490d549dfb --- /dev/null +++ b/multi/expanded_chatterbox_model/ve.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0921cab452fa278bc25cd23ffd59d36f816d7dc5181dd1bef9751a7fb61f63c +size 5695784 diff --git a/pt/Chatterbox-TTS-Portuguese/.gitattributes b/pt/Chatterbox-TTS-Portuguese/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/pt/Chatterbox-TTS-Portuguese/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/pt/Chatterbox-TTS-Portuguese/README.md b/pt/Chatterbox-TTS-Portuguese/README.md new file mode 100644 index 0000000000000000000000000000000000000000..237200b0d79498f854d4313dcafd3fe04972a054 --- /dev/null +++ b/pt/Chatterbox-TTS-Portuguese/README.md @@ -0,0 +1,34 @@ +--- +license: mit +datasets: +- AdrienB134/portuguese-tts +- freds0/BRSpeech-TTS +language: +- pt +metrics: +- accuracy +base_model: +- ResembleAI/chatterbox +pipeline_tag: text-to-speech +tags: +- voice +- tts +- pt +- pt-br +- portuguese +--- + +

Chatterbox TTS Portuguese

+ +# Overview + +- **This project is a finetuned version of the ResembleAI/Chatterbox model, extended to include support for the Portuguese language. Hosted on Hugging Face at FearL0rd/Chatterbox-TTS-Portuguese, this model enhances text-to-speech (TTS) capabilities for Portuguese, focusing on natural and accurate speech generation.** + +# Features + + +- **Portuguese TTS Support: Optimized for generating high-quality Portuguese speech, including Brazilian and European variants.** +- **Multilingual Compatibility: Retains the original multilingual capabilities of ResembleAI/Chatterbox.** +- **High-Quality Audio Output: Finetuned for clear, natural-sounding Portuguese speech suitable for conversational AI, audiobooks, and more.** +- **Applications: Ideal for TTS applications, virtual assistants, and accessibility tools requiring Portuguese support.** + diff --git a/pt/Chatterbox-TTS-Portuguese/conds.pt b/pt/Chatterbox-TTS-Portuguese/conds.pt new file mode 100644 index 0000000000000000000000000000000000000000..e13b43d1ce809473454627428ff413ebfc7e8660 --- /dev/null +++ b/pt/Chatterbox-TTS-Portuguese/conds.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6552d70568833628ba019c6b03459e77fe71ca197d5c560cef9411bee9d87f4e +size 107374 diff --git a/pt/Chatterbox-TTS-Portuguese/s3gen.safetensors b/pt/Chatterbox-TTS-Portuguese/s3gen.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95ce115e3d0b51d6c0b47e8f5af10bc8f95b79ad --- /dev/null +++ b/pt/Chatterbox-TTS-Portuguese/s3gen.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeaa7890908fad1067121b8922b6d3dd6901f590fbeab419c500a4a6cda922cb +size 1056486308 diff --git a/pt/Chatterbox-TTS-Portuguese/source.txt b/pt/Chatterbox-TTS-Portuguese/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..a8748fb9d7cb37e933674404af1b1b72b6937937 --- /dev/null +++ b/pt/Chatterbox-TTS-Portuguese/source.txt @@ -0,0 +1 @@ +https://huggingface.co/FearL0rd/Chatterbox-TTS-Portuguese \ No newline at end of file diff --git a/pt/Chatterbox-TTS-Portuguese/t3_cfg.safetensors b/pt/Chatterbox-TTS-Portuguese/t3_cfg.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e431b2e743a158f38ef82c71ac0b7fa059701bb2 --- /dev/null +++ b/pt/Chatterbox-TTS-Portuguese/t3_cfg.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cecd6ef6d5fd3aced2feeb9ed2553f803751a944fb66f04483d5127a4d36084 +size 2129653744 diff --git a/pt/Chatterbox-TTS-Portuguese/tokenizer.json b/pt/Chatterbox-TTS-Portuguese/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..abd07c710243ba89bf1b21780e7c37ddde92334e --- /dev/null +++ b/pt/Chatterbox-TTS-Portuguese/tokenizer.json @@ -0,0 +1,1435 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "special": true, + "content": "[STOP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 1, + "special": true, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 2, + "special": true, + "content": "[SPACE]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 255, + "special": true, + "content": "[START]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 604, + "content": "[UH]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 605, + "content": "[UM]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 606, + "content": "[giggle]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 607, + "content": "[laughter]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 608, + "content": "[guffaw]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 609, + "content": "[inhale]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 610, + "content": "[exhale]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 611, + "content": "[sigh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 612, + "content": "[cry]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 613, + "content": "[bark]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 614, + "content": "[howl]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 615, + "content": "[meow]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 616, + "content": "[singing]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 617, + "content": "[music]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 618, + "content": "[whistle]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 619, + "content": "[humming]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 620, + "content": "[gasp]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 621, + "content": "[groan]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 622, + "content": "[whisper]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 623, + "content": "[mumble]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 624, + "content": "[sniff]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 625, + "content": "[sneeze]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 626, + "content": "[cough]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 627, + "content": "[snore]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 628, + "content": "[chew]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 629, + "content": "[sip]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 630, + "content": "[clear_throat]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 631, + "content": "[kiss]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 632, + "content": "[shhh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 633, + "content": "[gibberish]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 634, + "content": "[fr]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 635, + "content": "[es]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 636, + "content": "[de]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 637, + "content": "[it]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 638, + "content": "[ipa]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 639, + "content": "[end_of_label]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 695, + "content": "[PLACEHOLDER55]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 696, + "content": "[PLACEHOLDER56]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 697, + "content": "[PLACEHOLDER57]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 698, + "content": "[PLACEHOLDER58]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 699, + "content": "[PLACEHOLDER59]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 700, + "content": "[PLACEHOLDER60]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 701, + "content": "[PLACEHOLDER61]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 702, + "content": "[PLACEHOLDER62]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 703, + "content": "[PLACEHOLDER63]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": null, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "vocab": { + "[STOP]": 0, + "[UNK]": 1, + "[SPACE]": 2, + "!": 3, + "'": 4, + "(": 5, + ")": 6, + ",": 7, + "-": 8, + ".": 9, + "/": 10, + ":": 11, + ";": 12, + "?": 13, + "a": 14, + "b": 15, + "c": 16, + "d": 17, + "e": 18, + "f": 19, + "g": 20, + "h": 21, + "i": 22, + "j": 23, + "k": 24, + "l": 25, + "m": 26, + "n": 27, + "o": 28, + "p": 29, + "q": 30, + "r": 31, + "s": 32, + "t": 33, + "u": 34, + "v": 35, + "w": 36, + "x": 37, + "y": 38, + "z": 39, + "th": 40, + "in": 41, + "the": 42, + "an": 43, + "er": 44, + "ou": 45, + "re": 46, + "on": 47, + "at": 48, + "ed": 49, + "en": 50, + "to": 51, + "ing": 52, + "and": 53, + "is": 54, + "as": 55, + "al": 56, + "or": 57, + "of": 58, + "ar": 59, + "it": 60, + "es": 61, + "he": 62, + "st": 63, + "le": 64, + "om": 65, + "se": 66, + "be": 67, + "ad": 68, + "ow": 69, + "ly": 70, + "ch": 71, + "wh": 72, + "that": 73, + "you": 74, + "li": 75, + "ve": 76, + "ac": 77, + "ti": 78, + "ld": 79, + "me": 80, + "was": 81, + "gh": 82, + "id": 83, + "ll": 84, + "wi": 85, + "ent": 86, + "for": 87, + "ay": 88, + "ro": 89, + "ver": 90, + "ic": 91, + "her": 92, + "ke": 93, + "his": 94, + "no": 95, + "ut": 96, + "un": 97, + "ir": 98, + "lo": 99, + "we": 100, + "ri": 101, + "ha": 102, + "with": 103, + "ght": 104, + "out": 105, + "im": 106, + "ion": 107, + "all": 108, + "ab": 109, + "one": 110, + "ne": 111, + "ge": 112, + "ould": 113, + "ter": 114, + "mo": 115, + "had": 116, + "ce": 117, + "she": 118, + "go": 119, + "sh": 120, + "ur": 121, + "am": 122, + "so": 123, + "pe": 124, + "my": 125, + "de": 126, + "are": 127, + "but": 128, + "ome": 129, + "fr": 130, + "ther": 131, + "fe": 132, + "su": 133, + "do": 134, + "con": 135, + "te": 136, + "ain": 137, + "ere": 138, + "po": 139, + "if": 140, + "they": 141, + "us": 142, + "ag": 143, + "tr": 144, + "now": 145, + "oun": 146, + "this": 147, + "have": 148, + "not": 149, + "sa": 150, + "il": 151, + "up": 152, + "thing": 153, + "from": 154, + "ap": 155, + "him": 156, + "ack": 157, + "ation": 158, + "ant": 159, + "our": 160, + "op": 161, + "like": 162, + "ust": 163, + "ess": 164, + "bo": 165, + "ok": 166, + "ul": 167, + "ind": 168, + "ex": 169, + "com": 170, + "some": 171, + "there": 172, + "ers": 173, + "co": 174, + "res": 175, + "man": 176, + "ard": 177, + "pl": 178, + "wor": 179, + "way": 180, + "tion": 181, + "fo": 182, + "ca": 183, + "were": 184, + "by": 185, + "ate": 186, + "pro": 187, + "ted": 188, + "ound": 189, + "own": 190, + "would": 191, + "ts": 192, + "what": 193, + "qu": 194, + "ally": 195, + "ight": 196, + "ck": 197, + "gr": 198, + "when": 199, + "ven": 200, + "can": 201, + "ough": 202, + "ine": 203, + "end": 204, + "per": 205, + "ous": 206, + "od": 207, + "ide": 208, + "know": 209, + "ty": 210, + "very": 211, + "si": 212, + "ak": 213, + "who": 214, + "about": 215, + "ill": 216, + "them": 217, + "est": 218, + "red": 219, + "ye": 220, + "could": 221, + "ong": 222, + "your": 223, + "their": 224, + "em": 225, + "just": 226, + "other": 227, + "into": 228, + "any": 229, + "whi": 230, + "um": 231, + "tw": 232, + "ast": 233, + "der": 234, + "did": 235, + "ie": 236, + "been": 237, + "ace": 238, + "ink": 239, + "ity": 240, + "back": 241, + "ting": 242, + "br": 243, + "more": 244, + "ake": 245, + "pp": 246, + "then": 247, + "sp": 248, + "el": 249, + "use": 250, + "bl": 251, + "said": 252, + "over": 253, + "get": 254, + "[START]": 255, + "\"": 256, + "#": 257, + "$": 258, + "%": 259, + "&": 260, + "*": 261, + "+": 262, + "0": 263, + "1": 264, + "2": 265, + "3": 266, + "4": 267, + "5": 268, + "6": 269, + "7": 270, + "8": 271, + "9": 272, + "<": 273, + "=": 274, + ">": 275, + "@": 276, + "A": 277, + "B": 278, + "C": 279, + "D": 280, + "E": 281, + "F": 282, + "G": 283, + "H": 284, + "I": 285, + "J": 286, + "K": 287, + "L": 288, + "M": 289, + "N": 290, + "O": 291, + "P": 292, + "Q": 293, + "R": 294, + "S": 295, + "T": 296, + "U": 297, + "V": 298, + "W": 299, + "X": 300, + "Y": 301, + "Z": 302, + "[": 303, + "\\": 304, + "]": 305, + "^": 306, + "_": 307, + "`": 308, + "{": 309, + "|": 310, + "}": 311, + "~": 312, + "‐": 313, + "‑": 314, + "‒": 315, + "–": 316, + "—": 317, + "―": 318, + "‖": 319, + "‗": 320, + "‘": 321, + "’": 322, + "‚": 323, + "‛": 324, + "“": 325, + "”": 326, + "„": 327, + "‟": 328, + " ": 329, + "¡": 330, + "¢": 331, + "£": 332, + "¤": 333, + "¥": 334, + "¦": 335, + "§": 336, + "¨": 337, + "©": 338, + "ª": 339, + "«": 340, + "¬": 341, + "­": 342, + "®": 343, + "¯": 344, + "°": 345, + "±": 346, + "²": 347, + "³": 348, + "´": 349, + "µ": 350, + "¶": 351, + "·": 352, + "¸": 353, + "¹": 354, + "º": 355, + "»": 356, + "¼": 357, + "½": 358, + "¾": 359, + "¿": 360, + "À": 361, + "Á": 362, + "Â": 363, + "Ã": 364, + "Ä": 365, + "Å": 366, + "Æ": 367, + "Ç": 368, + "È": 369, + "É": 370, + "Ê": 371, + "Ë": 372, + "Ì": 373, + "Í": 374, + "Î": 375, + "Ï": 376, + "Ð": 377, + "Ñ": 378, + "Ò": 379, + "Ó": 380, + "Ô": 381, + "Õ": 382, + "Ö": 383, + "×": 384, + "Ø": 385, + "Ù": 386, + "Ú": 387, + "Û": 388, + "Ü": 389, + "Ý": 390, + "Þ": 391, + "ß": 392, + "à": 393, + "á": 394, + "â": 395, + "ã": 396, + "ä": 397, + "å": 398, + "æ": 399, + "ç": 400, + "è": 401, + "é": 402, + "ê": 403, + "ë": 404, + "ì": 405, + "í": 406, + "î": 407, + "ï": 408, + "ð": 409, + "ñ": 410, + "ò": 411, + "ó": 412, + "ô": 413, + "õ": 414, + "ö": 415, + "÷": 416, + "ø": 417, + "ù": 418, + "ú": 419, + "û": 420, + "ü": 421, + "ý": 422, + "þ": 423, + "ÿ": 424, + "ɐ": 425, + "ɑ": 426, + "ɒ": 427, + "ɓ": 428, + "ɔ": 429, + "ɕ": 430, + "ɖ": 431, + "ɗ": 432, + "ɘ": 433, + "ə": 434, + "ɚ": 435, + "ɛ": 436, + "ɜ": 437, + "ɝ": 438, + "ɞ": 439, + "ɟ": 440, + "ɠ": 441, + "ɡ": 442, + "ɢ": 443, + "ɣ": 444, + "ɤ": 445, + "ɥ": 446, + "ɦ": 447, + "ɧ": 448, + "ɨ": 449, + "ɩ": 450, + "ɪ": 451, + "ɫ": 452, + "ɬ": 453, + "ɭ": 454, + "ɮ": 455, + "ɯ": 456, + "ɰ": 457, + "ɱ": 458, + "ɲ": 459, + "ɳ": 460, + "ɴ": 461, + "ɵ": 462, + "ɶ": 463, + "ɷ": 464, + "ɸ": 465, + "ɹ": 466, + "ɺ": 467, + "ɻ": 468, + "ɼ": 469, + "ɽ": 470, + "ɾ": 471, + "ɿ": 472, + "ʀ": 473, + "ʁ": 474, + "ʂ": 475, + "ʃ": 476, + "ʄ": 477, + "ʅ": 478, + "ʆ": 479, + "ʇ": 480, + "ʈ": 481, + "ʉ": 482, + "ʊ": 483, + "ʋ": 484, + "ʌ": 485, + "ʍ": 486, + "ʎ": 487, + "ʏ": 488, + "ʐ": 489, + "ʑ": 490, + "ʒ": 491, + "ʓ": 492, + "ʔ": 493, + "ʕ": 494, + "ʖ": 495, + "ʗ": 496, + "ʘ": 497, + "ʙ": 498, + "ʚ": 499, + "ʛ": 500, + "ʜ": 501, + "ʝ": 502, + "ʞ": 503, + "ʟ": 504, + "ʠ": 505, + "ʡ": 506, + "ʢ": 507, + "ʣ": 508, + "ʤ": 509, + "ʥ": 510, + "ʦ": 511, + "ʧ": 512, + "ʨ": 513, + "ʩ": 514, + "ʪ": 515, + "ʫ": 516, + "ʬ": 517, + "ʭ": 518, + "ʮ": 519, + "ʯ": 520, + "ʰ": 521, + "ʱ": 522, + "ʲ": 523, + "ʳ": 524, + "ʴ": 525, + "ʵ": 526, + "ʶ": 527, + "ʷ": 528, + "ʸ": 529, + "ʹ": 530, + "ʺ": 531, + "ʻ": 532, + "ʼ": 533, + "ʽ": 534, + "ʾ": 535, + "ʿ": 536, + "ˀ": 537, + "ˁ": 538, + "˂": 539, + "˃": 540, + "˄": 541, + "˅": 542, + "ˆ": 543, + "ˇ": 544, + "ˈ": 545, + "ˉ": 546, + "ˊ": 547, + "ˋ": 548, + "ˌ": 549, + "ˍ": 550, + "ˎ": 551, + "ˏ": 552, + "ː": 553, + "ˑ": 554, + "˒": 555, + "˓": 556, + "˔": 557, + "˕": 558, + "˖": 559, + "˗": 560, + "˘": 561, + "˙": 562, + "˚": 563, + "˛": 564, + "˜": 565, + "˝": 566, + "˞": 567, + "˟": 568, + "ˠ": 569, + "ˡ": 570, + "ˢ": 571, + "ˣ": 572, + "ˤ": 573, + "˥": 574, + "˦": 575, + "˧": 576, + "˨": 577, + "˩": 578, + "˪": 579, + "˫": 580, + "ˬ": 581, + "˭": 582, + "ˮ": 583, + "˯": 584, + "˰": 585, + "˱": 586, + "˲": 587, + "˳": 588, + "˴": 589, + "˵": 590, + "˶": 591, + "˷": 592, + "˸": 593, + "˹": 594, + "˺": 595, + "˻": 596, + "˼": 597, + "˽": 598, + "˾": 599, + "˿": 600, + "ā": 601, + "ō": 602, + "…": 603, + "[UH]": 604, + "[UM]": 605, + "[giggle]": 606, + "[laughter]": 607, + "[guffaw]": 608, + "[inhale]": 609, + "[exhale]": 610, + "[sigh]": 611, + "[cry]": 612, + "[bark]": 613, + "[howl]": 614, + "[meow]": 615, + "[singing]": 616, + "[music]": 617, + "[whistle]": 618, + "[humming]": 619, + "[gasp]": 620, + "[groan]": 621, + "[whisper]": 622, + "[mumble]": 623, + "[sniff]": 624, + "[sneeze]": 625, + "[cough]": 626, + "[snore]": 627, + "[chew]": 628, + "[sip]": 629, + "[clear_throat]": 630, + "[kiss]": 631, + "[shhh]": 632, + "[gibberish]": 633, + "[fr]": 634, + "[es]": 635, + "[de]": 636, + "[it]": 637, + "[ipa]": 638, + "[end_of_label]": 639, + "ŋ": 640, + "ᵻ": 641, + "θ": 642, + "̩": 643, + "\u0303": 644, + "ɑː": 645, + "iː": 646, + "uː": 647, + "ɜː": 648, + "ɔː": 649, + "oː": 650, + "eɪ": 651, + "oʊ": 652, + "aɪ": 653, + "aʊ": 654, + "ɔɪ": 655, + "dʒ": 656, + "tʃ": 657, + "ɪŋ": 658, + "ᵻd": 659, + "ˈiː": 660, + "ˌiː": 661, + "ˈɪ": 662, + "ˌɪ": 663, + "ˈeɪ": 664, + "ˌeɪ": 665, + "ˈɛ": 666, + "ˌɛ": 667, + "ˈæ": 668, + "ˌæ": 669, + "ˈɑː": 670, + "ˌɑː": 671, + "ˈɔː": 672, + "ˌɔː": 673, + "oːɹ": 674, + "ˈoːɹ": 675, + "ˌoːɹ": 676, + "ˈoʊ": 677, + "ˌoʊ": 678, + "ˈʊ": 679, + "ˌʊ": 680, + "ˈuː": 681, + "ˌuː": 682, + "ˈɜː": 683, + "ˌɜː": 684, + "ˈʌ": 685, + "ˌʌ": 686, + "ˈaɪ": 687, + "ˌaɪ": 688, + "ˈaʊ": 689, + "ˌaʊ": 690, + "ˈɔɪ": 691, + "ˌɔɪ": 692, + "ˈɚ": 693, + "ˌɐ": 694, + "[PLACEHOLDER55]": 695, + "[PLACEHOLDER56]": 696, + "[PLACEHOLDER57]": 697, + "[PLACEHOLDER58]": 698, + "[PLACEHOLDER59]": 699, + "[PLACEHOLDER60]": 700, + "[PLACEHOLDER61]": 701, + "[PLACEHOLDER62]": 702, + "[PLACEHOLDER63]": 703 + }, + "merges": [ + "t h", + "i n", + "th e", + "a n", + "e r", + "o u", + "r e", + "o n", + "a t", + "e d", + "e n", + "t o", + "in g", + "an d", + "i s", + "a s", + "a l", + "o r", + "o f", + "a r", + "i t", + "e s", + "h e", + "s t", + "l e", + "o m", + "s e", + "b e", + "a d", + "o w", + "l y", + "c h", + "w h", + "th at", + "y ou", + "l i", + "v e", + "a c", + "t i", + "l d", + "m e", + "w as", + "g h", + "i d", + "l l", + "w i", + "en t", + "f or", + "a y", + "r o", + "v er", + "i c", + "h er", + "k e", + "h is", + "n o", + "u t", + "u n", + "i r", + "l o", + "w e", + "r i", + "h a", + "wi th", + "gh t", + "ou t", + "i m", + "i on", + "al l", + "a b", + "on e", + "n e", + "g e", + "ou ld", + "t er", + "m o", + "h ad", + "c e", + "s he", + "g o", + "s h", + "u r", + "a m", + "s o", + "p e", + "m y", + "d e", + "a re", + "b ut", + "om e", + "f r", + "the r", + "f e", + "s u", + "d o", + "c on", + "t e", + "a in", + "er e", + "p o", + "i f", + "the y", + "u s", + "a g", + "t r", + "n ow", + "ou n", + "th is", + "ha ve", + "no t", + "s a", + "i l", + "u p", + "th ing", + "fr om", + "a p", + "h im", + "ac k", + "at ion", + "an t", + "ou r", + "o p", + "li ke", + "u st", + "es s", + "b o", + "o k", + "u l", + "in d", + "e x", + "c om", + "s ome", + "the re", + "er s", + "c o", + "re s", + "m an", + "ar d", + "p l", + "w or", + "w ay", + "ti on", + "f o", + "c a", + "w ere", + "b y", + "at e", + "p ro", + "t ed", + "oun d", + "ow n", + "w ould", + "t s", + "wh at", + "q u", + "al ly", + "i ght", + "c k", + "g r", + "wh en", + "v en", + "c an", + "ou gh", + "in e", + "en d", + "p er", + "ou s", + "o d", + "id e", + "k now", + "t y", + "ver y", + "s i", + "a k", + "wh o", + "ab out", + "i ll", + "the m", + "es t", + "re d", + "y e", + "c ould", + "on g", + "you r", + "the ir", + "e m", + "j ust", + "o ther", + "in to", + "an y", + "wh i", + "u m", + "t w", + "as t", + "d er", + "d id", + "i e", + "be en", + "ac e", + "in k", + "it y", + "b ack", + "t ing", + "b r", + "mo re", + "a ke", + "p p", + "the n", + "s p", + "e l", + "u se", + "b l", + "sa id", + "o ver", + "ge t", + "ɑ ː", + "i ː", + "u ː", + "ɜ ː", + "ɔ ː", + "o ː", + "e ɪ", + "o ʊ", + "a ɪ", + "a ʊ", + "ɔ ɪ", + "d ʒ", + "t ʃ", + "ɪ ŋ", + "ᵻ d", + "ˈ iː", + "ˌ iː", + "ˈ ɪ", + "ˌ ɪ", + "ˈ eɪ", + "ˌ eɪ", + "ˈ ɛ", + "ˌ ɛ", + "ˈ æ", + "ˌ æ", + "ˈ ɑː", + "ˌ ɑː", + "ˈ ɔː", + "ˌ ɔː", + "oː ɹ", + "ˈ oːɹ", + "ˌ oːɹ", + "ˈ oʊ", + "ˌ oʊ", + "ˈ ʊ", + "ˌ ʊ", + "ˈ uː", + "ˌ uː", + "ˈ ɜː", + "ˌ ɜː", + "ˈ ʌ", + "ˌ ʌ", + "ˈ aɪ", + "ˌ aɪ", + "ˈ aʊ", + "ˌ aʊ", + "ˈ ɔɪ", + "ˌ ɔɪ", + "ˈ ɚ", + "ˌ ɐ" + ] + } +} \ No newline at end of file diff --git a/pt/Chatterbox-TTS-Portuguese/ve.safetensors b/pt/Chatterbox-TTS-Portuguese/ve.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0713f1587e627f23d93121e154a7de490d549dfb --- /dev/null +++ b/pt/Chatterbox-TTS-Portuguese/ve.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0921cab452fa278bc25cd23ffd59d36f816d7dc5181dd1bef9751a7fb61f63c +size 5695784 diff --git a/ru/Chatterbox-Russian-TTS/.gitattributes b/ru/Chatterbox-Russian-TTS/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/ru/Chatterbox-Russian-TTS/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/ru/Chatterbox-Russian-TTS/source.txt b/ru/Chatterbox-Russian-TTS/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..7ef3f8e31335db12f865f1f621d1cc8c8b84bfab --- /dev/null +++ b/ru/Chatterbox-Russian-TTS/source.txt @@ -0,0 +1 @@ +https://huggingface.co/zAnonymousWizard/Chatterbox-Russian-TTS \ No newline at end of file diff --git a/ru/Chatterbox-Russian-TTS/t3_cfg_16000_steps.safetensors b/ru/Chatterbox-Russian-TTS/t3_cfg_16000_steps.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c930f559d7e613dee130f6f82afceff32bc5a9f --- /dev/null +++ b/ru/Chatterbox-Russian-TTS/t3_cfg_16000_steps.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:762f8e20e2605f806ac6e7706ba37cb98b6f5281eebc76b10b12136cb1a0016e +size 2132112248 diff --git a/ru/Chatterbox-Russian-TTS/tokenizer.json b/ru/Chatterbox-Russian-TTS/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..54c25cf21b1b815dc7a72de4d7edcb21f4cc94b4 --- /dev/null +++ b/ru/Chatterbox-Russian-TTS/tokenizer.json @@ -0,0 +1,1735 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "special": true, + "content": "[STOP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 1, + "special": true, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 2, + "special": true, + "content": "[SPACE]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 255, + "special": true, + "content": "[START]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 604, + "content": "[UH]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 605, + "content": "[UM]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 606, + "content": "[giggle]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 607, + "content": "[laughter]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 608, + "content": "[guffaw]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 609, + "content": "[inhale]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 610, + "content": "[exhale]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 611, + "content": "[sigh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 612, + "content": "[cry]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 613, + "content": "[bark]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 614, + "content": "[howl]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 615, + "content": "[meow]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 616, + "content": "[singing]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 617, + "content": "[music]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 618, + "content": "[whistle]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 619, + "content": "[humming]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 620, + "content": "[gasp]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 621, + "content": "[groan]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 622, + "content": "[whisper]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 623, + "content": "[mumble]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 624, + "content": "[sniff]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 625, + "content": "[sneeze]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 626, + "content": "[cough]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 627, + "content": "[snore]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 628, + "content": "[chew]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 629, + "content": "[sip]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 630, + "content": "[clear_throat]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 631, + "content": "[kiss]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 632, + "content": "[shhh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 633, + "content": "[gibberish]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 634, + "content": "[fr]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 635, + "content": "[es]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 636, + "content": "[de]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 637, + "content": "[it]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 638, + "content": "[ipa]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 639, + "content": "[end_of_label]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 695, + "content": "[PLACEHOLDER55]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 696, + "content": "[PLACEHOLDER56]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 697, + "content": "[PLACEHOLDER57]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 698, + "content": "[PLACEHOLDER58]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 699, + "content": "[PLACEHOLDER59]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 700, + "content": "[PLACEHOLDER60]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 701, + "content": "[PLACEHOLDER61]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 702, + "content": "[PLACEHOLDER62]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 703, + "content": "[PLACEHOLDER63]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": null, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "vocab": { + "[STOP]": 0, + "[UNK]": 1, + "[SPACE]": 2, + "!": 3, + "'": 4, + "(": 5, + ")": 6, + ",": 7, + "-": 8, + ".": 9, + "/": 10, + ":": 11, + ";": 12, + "?": 13, + "a": 14, + "b": 15, + "c": 16, + "d": 17, + "e": 18, + "f": 19, + "g": 20, + "h": 21, + "i": 22, + "j": 23, + "k": 24, + "l": 25, + "m": 26, + "n": 27, + "o": 28, + "p": 29, + "q": 30, + "r": 31, + "s": 32, + "t": 33, + "u": 34, + "v": 35, + "w": 36, + "x": 37, + "y": 38, + "z": 39, + "th": 40, + "in": 41, + "the": 42, + "an": 43, + "er": 44, + "ou": 45, + "re": 46, + "on": 47, + "at": 48, + "ed": 49, + "en": 50, + "to": 51, + "ing": 52, + "and": 53, + "is": 54, + "as": 55, + "al": 56, + "or": 57, + "of": 58, + "ar": 59, + "it": 60, + "es": 61, + "he": 62, + "st": 63, + "le": 64, + "om": 65, + "se": 66, + "be": 67, + "ad": 68, + "ow": 69, + "ly": 70, + "ch": 71, + "wh": 72, + "that": 73, + "you": 74, + "li": 75, + "ve": 76, + "ac": 77, + "ti": 78, + "ld": 79, + "me": 80, + "was": 81, + "gh": 82, + "id": 83, + "ll": 84, + "wi": 85, + "ent": 86, + "for": 87, + "ay": 88, + "ro": 89, + "ver": 90, + "ic": 91, + "her": 92, + "ke": 93, + "his": 94, + "no": 95, + "ut": 96, + "un": 97, + "ir": 98, + "lo": 99, + "we": 100, + "ri": 101, + "ha": 102, + "with": 103, + "ght": 104, + "out": 105, + "im": 106, + "ion": 107, + "all": 108, + "ab": 109, + "one": 110, + "ne": 111, + "ge": 112, + "ould": 113, + "ter": 114, + "mo": 115, + "had": 116, + "ce": 117, + "she": 118, + "go": 119, + "sh": 120, + "ur": 121, + "am": 122, + "so": 123, + "pe": 124, + "my": 125, + "de": 126, + "are": 127, + "but": 128, + "ome": 129, + "fr": 130, + "ther": 131, + "fe": 132, + "su": 133, + "do": 134, + "con": 135, + "te": 136, + "ain": 137, + "ere": 138, + "po": 139, + "if": 140, + "they": 141, + "us": 142, + "ag": 143, + "tr": 144, + "now": 145, + "oun": 146, + "this": 147, + "have": 148, + "not": 149, + "sa": 150, + "il": 151, + "up": 152, + "thing": 153, + "from": 154, + "ap": 155, + "him": 156, + "ack": 157, + "ation": 158, + "ant": 159, + "our": 160, + "op": 161, + "like": 162, + "ust": 163, + "ess": 164, + "bo": 165, + "ok": 166, + "ul": 167, + "ind": 168, + "ex": 169, + "com": 170, + "some": 171, + "there": 172, + "ers": 173, + "co": 174, + "res": 175, + "man": 176, + "ard": 177, + "pl": 178, + "wor": 179, + "way": 180, + "tion": 181, + "fo": 182, + "ca": 183, + "were": 184, + "by": 185, + "ate": 186, + "pro": 187, + "ted": 188, + "ound": 189, + "own": 190, + "would": 191, + "ts": 192, + "what": 193, + "qu": 194, + "ally": 195, + "ight": 196, + "ck": 197, + "gr": 198, + "when": 199, + "ven": 200, + "can": 201, + "ough": 202, + "ine": 203, + "end": 204, + "per": 205, + "ous": 206, + "od": 207, + "ide": 208, + "know": 209, + "ty": 210, + "very": 211, + "si": 212, + "ak": 213, + "who": 214, + "about": 215, + "ill": 216, + "them": 217, + "est": 218, + "red": 219, + "ye": 220, + "could": 221, + "ong": 222, + "your": 223, + "their": 224, + "em": 225, + "just": 226, + "other": 227, + "into": 228, + "any": 229, + "whi": 230, + "um": 231, + "tw": 232, + "ast": 233, + "der": 234, + "did": 235, + "ie": 236, + "been": 237, + "ace": 238, + "ink": 239, + "ity": 240, + "back": 241, + "ting": 242, + "br": 243, + "more": 244, + "ake": 245, + "pp": 246, + "then": 247, + "sp": 248, + "el": 249, + "use": 250, + "bl": 251, + "said": 252, + "over": 253, + "get": 254, + "[START]": 255, + "\"": 256, + "#": 257, + "$": 258, + "%": 259, + "&": 260, + "*": 261, + "+": 262, + "0": 263, + "1": 264, + "2": 265, + "3": 266, + "4": 267, + "5": 268, + "6": 269, + "7": 270, + "8": 271, + "9": 272, + "<": 273, + "=": 274, + ">": 275, + "@": 276, + "A": 277, + "B": 278, + "C": 279, + "D": 280, + "E": 281, + "F": 282, + "G": 283, + "H": 284, + "I": 285, + "J": 286, + "K": 287, + "L": 288, + "M": 289, + "N": 290, + "O": 291, + "P": 292, + "Q": 293, + "R": 294, + "S": 295, + "T": 296, + "U": 297, + "V": 298, + "W": 299, + "X": 300, + "Y": 301, + "Z": 302, + "[": 303, + "\\": 304, + "]": 305, + "^": 306, + "_": 307, + "`": 308, + "{": 309, + "|": 310, + "}": 311, + "~": 312, + "‐": 313, + "‑": 314, + "‒": 315, + "–": 316, + "—": 317, + "―": 318, + "‖": 319, + "‗": 320, + "‘": 321, + "’": 322, + "‚": 323, + "‛": 324, + "“": 325, + "”": 326, + "„": 327, + "‟": 328, + " ": 329, + "¡": 330, + "¢": 331, + "£": 332, + "¤": 333, + "¥": 334, + "¦": 335, + "§": 336, + "¨": 337, + "©": 338, + "ª": 339, + "«": 340, + "¬": 341, + "­": 342, + "®": 343, + "¯": 344, + "°": 345, + "±": 346, + "²": 347, + "³": 348, + "´": 349, + "µ": 350, + "¶": 351, + "·": 352, + "¸": 353, + "¹": 354, + "º": 355, + "»": 356, + "¼": 357, + "½": 358, + "¾": 359, + "¿": 360, + "À": 361, + "Á": 362, + "Â": 363, + "Ã": 364, + "Ä": 365, + "Å": 366, + "Æ": 367, + "Ç": 368, + "È": 369, + "É": 370, + "Ê": 371, + "Ë": 372, + "Ì": 373, + "Í": 374, + "Î": 375, + "Ï": 376, + "Ð": 377, + "Ñ": 378, + "Ò": 379, + "Ó": 380, + "Ô": 381, + "Õ": 382, + "Ö": 383, + "×": 384, + "Ø": 385, + "Ù": 386, + "Ú": 387, + "Û": 388, + "Ü": 389, + "Ý": 390, + "Þ": 391, + "ß": 392, + "à": 393, + "á": 394, + "â": 395, + "ã": 396, + "ä": 397, + "å": 398, + "æ": 399, + "ç": 400, + "è": 401, + "é": 402, + "ê": 403, + "ë": 404, + "ì": 405, + "í": 406, + "î": 407, + "ï": 408, + "ð": 409, + "ñ": 410, + "ò": 411, + "ó": 412, + "ô": 413, + "õ": 414, + "ö": 415, + "÷": 416, + "ø": 417, + "ù": 418, + "ú": 419, + "û": 420, + "ü": 421, + "ý": 422, + "þ": 423, + "ÿ": 424, + "ɐ": 425, + "ɑ": 426, + "ɒ": 427, + "ɓ": 428, + "ɔ": 429, + "ɕ": 430, + "ɖ": 431, + "ɗ": 432, + "ɘ": 433, + "ə": 434, + "ɚ": 435, + "ɛ": 436, + "ɜ": 437, + "ɝ": 438, + "ɞ": 439, + "ɟ": 440, + "ɠ": 441, + "ɡ": 442, + "ɢ": 443, + "ɣ": 444, + "ɤ": 445, + "ɥ": 446, + "ɦ": 447, + "ɧ": 448, + "ɨ": 449, + "ɩ": 450, + "ɪ": 451, + "ɫ": 452, + "ɬ": 453, + "ɭ": 454, + "ɮ": 455, + "ɯ": 456, + "ɰ": 457, + "ɱ": 458, + "ɲ": 459, + "ɳ": 460, + "ɴ": 461, + "ɵ": 462, + "ɶ": 463, + "ɷ": 464, + "ɸ": 465, + "ɹ": 466, + "ɺ": 467, + "ɻ": 468, + "ɼ": 469, + "ɽ": 470, + "ɾ": 471, + "ɿ": 472, + "ʀ": 473, + "ʁ": 474, + "ʂ": 475, + "ʃ": 476, + "ʄ": 477, + "ʅ": 478, + "ʆ": 479, + "ʇ": 480, + "ʈ": 481, + "ʉ": 482, + "ʊ": 483, + "ʋ": 484, + "ʌ": 485, + "ʍ": 486, + "ʎ": 487, + "ʏ": 488, + "ʐ": 489, + "ʑ": 490, + "ʒ": 491, + "ʓ": 492, + "ʔ": 493, + "ʕ": 494, + "ʖ": 495, + "ʗ": 496, + "ʘ": 497, + "ʙ": 498, + "ʚ": 499, + "ʛ": 500, + "ʜ": 501, + "ʝ": 502, + "ʞ": 503, + "ʟ": 504, + "ʠ": 505, + "ʡ": 506, + "ʢ": 507, + "ʣ": 508, + "ʤ": 509, + "ʥ": 510, + "ʦ": 511, + "ʧ": 512, + "ʨ": 513, + "ʩ": 514, + "ʪ": 515, + "ʫ": 516, + "ʬ": 517, + "ʭ": 518, + "ʮ": 519, + "ʯ": 520, + "ʰ": 521, + "ʱ": 522, + "ʲ": 523, + "ʳ": 524, + "ʴ": 525, + "ʵ": 526, + "ʶ": 527, + "ʷ": 528, + "ʸ": 529, + "ʹ": 530, + "ʺ": 531, + "ʻ": 532, + "ʼ": 533, + "ʽ": 534, + "ʾ": 535, + "ʿ": 536, + "ˀ": 537, + "ˁ": 538, + "˂": 539, + "˃": 540, + "˄": 541, + "˅": 542, + "ˆ": 543, + "ˇ": 544, + "ˈ": 545, + "ˉ": 546, + "ˊ": 547, + "ˋ": 548, + "ˌ": 549, + "ˍ": 550, + "ˎ": 551, + "ˏ": 552, + "ː": 553, + "ˑ": 554, + "˒": 555, + "˓": 556, + "˔": 557, + "˕": 558, + "˖": 559, + "˗": 560, + "˘": 561, + "˙": 562, + "˚": 563, + "˛": 564, + "˜": 565, + "˝": 566, + "˞": 567, + "˟": 568, + "ˠ": 569, + "ˡ": 570, + "ˢ": 571, + "ˣ": 572, + "ˤ": 573, + "˥": 574, + "˦": 575, + "˧": 576, + "˨": 577, + "˩": 578, + "˪": 579, + "˫": 580, + "ˬ": 581, + "˭": 582, + "ˮ": 583, + "˯": 584, + "˰": 585, + "˱": 586, + "˲": 587, + "˳": 588, + "˴": 589, + "˵": 590, + "˶": 591, + "˷": 592, + "˸": 593, + "˹": 594, + "˺": 595, + "˻": 596, + "˼": 597, + "˽": 598, + "˾": 599, + "˿": 600, + "ā": 601, + "ō": 602, + "…": 603, + "[UH]": 604, + "[UM]": 605, + "[giggle]": 606, + "[laughter]": 607, + "[guffaw]": 608, + "[inhale]": 609, + "[exhale]": 610, + "[sigh]": 611, + "[cry]": 612, + "[bark]": 613, + "[howl]": 614, + "[meow]": 615, + "[singing]": 616, + "[music]": 617, + "[whistle]": 618, + "[humming]": 619, + "[gasp]": 620, + "[groan]": 621, + "[whisper]": 622, + "[mumble]": 623, + "[sniff]": 624, + "[sneeze]": 625, + "[cough]": 626, + "[snore]": 627, + "[chew]": 628, + "[sip]": 629, + "[clear_throat]": 630, + "[kiss]": 631, + "[shhh]": 632, + "[gibberish]": 633, + "[fr]": 634, + "[es]": 635, + "[de]": 636, + "[it]": 637, + "[ipa]": 638, + "[end_of_label]": 639, + "ŋ": 640, + "ᵻ": 641, + "θ": 642, + "̩": 643, + "̃": 644, + "ɑː": 645, + "iː": 646, + "uː": 647, + "ɜː": 648, + "ɔː": 649, + "oː": 650, + "eɪ": 651, + "oʊ": 652, + "aɪ": 653, + "aʊ": 654, + "ɔɪ": 655, + "dʒ": 656, + "tʃ": 657, + "ɪŋ": 658, + "ᵻd": 659, + "ˈiː": 660, + "ˌiː": 661, + "ˈɪ": 662, + "ˌɪ": 663, + "ˈeɪ": 664, + "ˌeɪ": 665, + "ˈɛ": 666, + "ˌɛ": 667, + "ˈæ": 668, + "ˌæ": 669, + "ˈɑː": 670, + "ˌɑː": 671, + "ˈɔː": 672, + "ˌɔː": 673, + "oːɹ": 674, + "ˈoːɹ": 675, + "ˌoːɹ": 676, + "ˈoʊ": 677, + "ˌoʊ": 678, + "ˈʊ": 679, + "ˌʊ": 680, + "ˈuː": 681, + "ˌuː": 682, + "ˈɜː": 683, + "ˌɜː": 684, + "ˈʌ": 685, + "ˌʌ": 686, + "ˈaɪ": 687, + "ˌaɪ": 688, + "ˈaʊ": 689, + "ˌaʊ": 690, + "ˈɔɪ": 691, + "ˌɔɪ": 692, + "ˈɚ": 693, + "ˌɐ": 694, + "[PLACEHOLDER55]": 695, + "[PLACEHOLDER56]": 696, + "[PLACEHOLDER57]": 697, + "[PLACEHOLDER58]": 698, + "[PLACEHOLDER59]": 699, + "[PLACEHOLDER60]": 700, + "[PLACEHOLDER61]": 701, + "[PLACEHOLDER62]": 702, + "[PLACEHOLDER63]": 703, + "[s": 704, + "[spac": 705, + "[space]": 706, + "[space]а": 707, + "[space]ар": 708, + "[space]б": 709, + "[space]бо": 710, + "[space]бу": 711, + "[space]в": 712, + "[space]в[space]": 713, + "[space]в[space]с": 714, + "[space]ва": 715, + "[space]ви": 716, + "[space]включ": 717, + "[space]включи": 718, + "[space]включи[space]": 719, + "[space]во": 720, + "[space]вос": 721, + "[space]восемь": 722, + "[space]вы": 723, + "[space]г": 724, + "[space]д": 725, + "[space]два": 726, + "[space]двадцать": 727, + "[space]двадцать[space]": 728, + "[space]две": 729, + "[space]дев": 730, + "[space]девять": 731, + "[space]день": 732, + "[space]дес": 733, + "[space]ди": 734, + "[space]до": 735, + "[space]долла": 736, + "[space]з": 737, + "[space]за": 738, + "[space]и": 739, + "[space]и[space]": 740, + "[space]к": 741, + "[space]ка": 742, + "[space]кар": 743, + "[space]ки": 744, + "[space]ко": 745, + "[space]ку": 746, + "[space]ли": 747, + "[space]м": 748, + "[space]ма": 749, + "[space]манчестер": 750, + "[space]мат": 751, + "[space]ме": 752, + "[space]ми": 753, + "[space]мне": 754, + "[space]мо": 755, + "[space]можешь": 756, + "[space]му": 757, + "[space]на": 758, + "[space]на[space]смотрешке": 759, + "[space]на[space]тв": 760, + "[space]най": 761, + "[space]найдет": 762, + "[space]но": 763, + "[space]ноль": 764, + "[space]о": 765, + "[space]один": 766, + "[space]от": 767, + "[space]п": 768, + "[space]па": 769, + "[space]пер": 770, + "[space]пере": 771, + "[space]перев": 772, + "[space]переда": 773, + "[space]по": 774, + "[space]под": 775, + "[space]пос": 776, + "[space]послед": 777, + "[space]посмотре": 778, + "[space]пре": 779, + "[space]при": 780, + "[space]про": 781, + "[space]пят": 782, + "[space]пять": 783, + "[space]пятьдесят": 784, + "[space]ре": 785, + "[space]ро": 786, + "[space]руб": 787, + "[space]с": 788, + "[space]са": 789, + "[space]се": 790, + "[space]сезо": 791, + "[space]сезон": 792, + "[space]сезона": 793, + "[space]семь": 794, + "[space]семьдесят": 795, + "[space]сер": 796, + "[space]сери": 797, + "[space]сериал": 798, + "[space]серия": 799, + "[space]си": 800, + "[space]ско": 801, + "[space]сколько": 802, + "[space]смотре": 803, + "[space]смотреш": 804, + "[space]смотрешке": 805, + "[space]смотрешке[space]": 806, + "[space]со": 807, + "[space]сорок": 808, + "[space]ст": 809, + "[space]сто": 810, + "[space]т": 811, + "[space]та": 812, + "[space]тв": 813, + "[space]те": 814, + "[space]теб": 815, + "[space]теле": 816, + "[space]телеви": 817, + "[space]телевизо": 818, + "[space]телевизоре": 819, + "[space]телеканал": 820, + "[space]телефо": 821, + "[space]тре": 822, + "[space]три": 823, + "[space]три[space]": 824, + "[space]тридцать": 825, + "[space]ты": 826, + "[space]тыся": 827, + "[space]тысяч": 828, + "[space]у": 829, + "[space]ф": 830, + "[space]фильм": 831, + "[space]х": 832, + "[space]ч": 833, + "[space]четыре": 834, + "[space]ш": 835, + "[space]шесть": 836, + "[space]э": 837, + "[space]ю": 838, + "[space]я": 839, + "а": 840, + "ай": 841, + "ал": 842, + "але": 843, + "али": 844, + "алиса": 845, + "аль": 846, + "ан": 847, + "анд": 848, + "ани": 849, + "анчест": 850, + "анчестер": 851, + "ар": 852, + "афи": 853, + "афина": 854, + "афина[space]": 855, + "б": 856, + "ба": 857, + "бан": 858, + "бе": 859, + "бер": 860, + "би": 861, + "бо": 862, + "бот": 863, + "бу": 864, + "будет": 865, + "бы": 866, + "в": 867, + "ва": 868, + "ве": 869, + "вер": 870, + "вет": 871, + "ви": 872, + "вич": 873, + "включ": 874, + "включи[space]": 875, + "во": 876, + "вой": 877, + "ву": 878, + "вы": 879, + "вь": 880, + "г": 881, + "га": 882, + "ге": 883, + "ги": 884, + "ги[space]": 885, + "го": 886, + "год": 887, + "гра": 888, + "гу": 889, + "д": 890, + "да": 891, + "де": 892, + "дев": 893, + "дес": 894, + "десят": 895, + "дет": 896, + "дж": 897, + "джо": 898, + "джой": 899, + "джой[space]": 900, + "ди": 901, + "ди[space]": 902, + "до": 903, + "ду": 904, + "дца": 905, + "дцать": 906, + "дцать[space]": 907, + "ды": 908, + "дь": 909, + "е": 910, + "е[space]": 911, + "ев": 912, + "еви": 913, + "его": 914, + "ез": 915, + "ей": 916, + "ел": 917, + "ело": 918, + "ем": 919, + "емь": 920, + "ен": 921, + "ени": 922, + "ент": 923, + "ень": 924, + "ер": 925, + "ери": 926, + "ес": 927, + "ест": 928, + "есть": 929, + "есть[space]": 930, + "есть[space]ли[space]": 931, + "ет": 932, + "еты": 933, + "етыре": 934, + "ешь": 935, + "ж": 936, + "жал": 937, + "жалуйста": 938, + "жан": 939, + "жд": 940, + "же": 941, + "жешь": 942, + "жи": 943, + "жно": 944, + "з": 945, + "за": 946, + "запу": 947, + "зи": 948, + "зна": 949, + "зо": 950, + "зод": 951, + "зы": 952, + "и": 953, + "и[space]": 954, + "и[space]м": 955, + "и[space]мне": 956, + "и[space]мне[space]": 957, + "ив": 958, + "из": 959, + "ин": 960, + "ит": 961, + "ищ": 962, + "й": 963, + "й[space]": 964, + "й[space]сезон": 965, + "й[space]э": 966, + "й[space]эпизод": 967, + "к": 968, + "ка": 969, + "каж": 970, + "кажи[space]": 971, + "каза": 972, + "казать": 973, + "как": 974, + "канал": 975, + "кар": 976, + "ке": 977, + "ке[space]": 978, + "ки": 979, + "ки[space]": 980, + "кие": 981, + "кино": 982, + "ключ": 983, + "ко": 984, + "ков": 985, + "кой": 986, + "ком": 987, + "кро": 988, + "кс": 989, + "кто": 990, + "ку": 991, + "л": 992, + "ла": 993, + "лай": 994, + "ле": 995, + "лед": 996, + "лей": 997, + "лен": 998, + "ли": 999, + "ли[space]": 1000, + "лий": 1001, + "лла": 1002, + "ло": 1003 + }, + "merges": [ + "t h", + "i n", + "th e", + "a n", + "e r", + "o u", + "r e", + "o n", + "a t", + "e d", + "e n", + "t o", + "in g", + "an d", + "i s", + "a s", + "a l", + "o r", + "o f", + "a r", + "i t", + "e s", + "h e", + "s t", + "l e", + "o m", + "s e", + "b e", + "a d", + "o w", + "l y", + "c h", + "w h", + "th at", + "y ou", + "l i", + "v e", + "a c", + "t i", + "l d", + "m e", + "w as", + "g h", + "i d", + "l l", + "w i", + "en t", + "f or", + "a y", + "r o", + "v er", + "i c", + "h er", + "k e", + "h is", + "n o", + "u t", + "u n", + "i r", + "l o", + "w e", + "r i", + "h a", + "wi th", + "gh t", + "ou t", + "i m", + "i on", + "al l", + "a b", + "on e", + "n e", + "g e", + "ou ld", + "t er", + "m o", + "h ad", + "c e", + "s he", + "g o", + "s h", + "u r", + "a m", + "s o", + "p e", + "m y", + "d e", + "a re", + "b ut", + "om e", + "f r", + "the r", + "f e", + "s u", + "d o", + "c on", + "t e", + "a in", + "er e", + "p o", + "i f", + "the y", + "u s", + "a g", + "t r", + "n ow", + "ou n", + "th is", + "ha ve", + "no t", + "s a", + "i l", + "u p", + "th ing", + "fr om", + "a p", + "h im", + "ac k", + "at ion", + "an t", + "ou r", + "o p", + "li ke", + "u st", + "es s", + "b o", + "o k", + "u l", + "in d", + "e x", + "c om", + "s ome", + "the re", + "er s", + "c o", + "re s", + "m an", + "ar d", + "p l", + "w or", + "w ay", + "ti on", + "f o", + "c a", + "w ere", + "b y", + "at e", + "p ro", + "t ed", + "oun d", + "ow n", + "w ould", + "t s", + "wh at", + "q u", + "al ly", + "i ght", + "c k", + "g r", + "wh en", + "v en", + "c an", + "ou gh", + "in e", + "en d", + "p er", + "ou s", + "o d", + "id e", + "k now", + "t y", + "ver y", + "s i", + "a k", + "wh o", + "ab out", + "i ll", + "the m", + "es t", + "re d", + "y e", + "c ould", + "on g", + "you r", + "the ir", + "e m", + "j ust", + "o ther", + "in to", + "an y", + "wh i", + "u m", + "t w", + "as t", + "d er", + "d id", + "i e", + "be en", + "ac e", + "in k", + "it y", + "b ack", + "t ing", + "b r", + "mo re", + "a ke", + "p p", + "the n", + "s p", + "e l", + "u se", + "b l", + "sa id", + "o ver", + "ge t", + "ɑ ː", + "i ː", + "u ː", + "ɜ ː", + "ɔ ː", + "o ː", + "e ɪ", + "o ʊ", + "a ɪ", + "a ʊ", + "ɔ ɪ", + "d ʒ", + "t ʃ", + "ɪ ŋ", + "ᵻ d", + "ˈ iː", + "ˌ iː", + "ˈ ɪ", + "ˌ ɪ", + "ˈ eɪ", + "ˌ eɪ", + "ˈ ɛ", + "ˌ ɛ", + "ˈ æ", + "ˌ æ", + "ˈ ɑː", + "ˌ ɑː", + "ˈ ɔː", + "ˌ ɔː", + "oː ɹ", + "ˈ oːɹ", + "ˌ oːɹ", + "ˈ oʊ", + "ˌ oʊ", + "ˈ ʊ", + "ˌ ʊ", + "ˈ uː", + "ˌ uː", + "ˈ ɜː", + "ˌ ɜː", + "ˈ ʌ", + "ˌ ʌ", + "ˈ aɪ", + "ˌ aɪ", + "ˈ aʊ", + "ˌ aʊ", + "ˈ ɔɪ", + "ˌ ɔɪ", + "ˈ ɚ", + "ˌ ɐ" + ] + } +} \ No newline at end of file diff --git a/ru/chatterbox-ru-t3k/.gitattributes b/ru/chatterbox-ru-t3k/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..dab9a4e17afd2ef39d90ccb0b40ef2786fe77422 --- /dev/null +++ b/ru/chatterbox-ru-t3k/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/ru/chatterbox-ru-t3k/all_results.json b/ru/chatterbox-ru-t3k/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..aa9e11253367dc1bfa042ccac9525e749388aa31 --- /dev/null +++ b/ru/chatterbox-ru-t3k/all_results.json @@ -0,0 +1,11 @@ +{ + "epoch": 8.0, + "eval_runtime": 5.7978, + "eval_samples_per_second": 0.862, + "eval_steps_per_second": 0.172, + "total_flos": 0.0, + "train_loss": 2.4700030918121336, + "train_runtime": 33124.2258, + "train_samples_per_second": 2.414, + "train_steps_per_second": 0.06 +} \ No newline at end of file diff --git a/ru/chatterbox-ru-t3k/checkpoint-1200/model.safetensors b/ru/chatterbox-ru-t3k/checkpoint-1200/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2a86ea3054e660961615ee6fb902d3b5c09f672 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-1200/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fca9f6c9da56beb6a3b1f2798a579b13abb57477384cc8d8f1b9ae8affce884 +size 2129654648 diff --git a/ru/chatterbox-ru-t3k/checkpoint-1200/optimizer.pt b/ru/chatterbox-ru-t3k/checkpoint-1200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..38d8adaabebb46edb16dc078b39c6032b10ccf5a --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3654e427631d4a610b52be1ecd3d8c28c0195f737c988a00cdfa12f73ff35573 +size 4259421562 diff --git a/ru/chatterbox-ru-t3k/checkpoint-1200/rng_state.pth b/ru/chatterbox-ru-t3k/checkpoint-1200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..61885134499b414e242fccecd178c7ee0e9318c6 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da464937ba316d48ec9949d07484fd45b440ffa7234c330d726d67cebd362fc9 +size 14244 diff --git a/ru/chatterbox-ru-t3k/checkpoint-1200/scheduler.pt b/ru/chatterbox-ru-t3k/checkpoint-1200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..021a0db5145f075b7abab9a9c003ce8fdd0da640 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83f2cfdd4644fc5166a75699380c870d3e26c0d4eb1d51bfb38fdf5956ed91f0 +size 1064 diff --git a/ru/chatterbox-ru-t3k/checkpoint-1200/trainer_state.json b/ru/chatterbox-ru-t3k/checkpoint-1200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9e19a31b61a3485a6f913adba05f2b94e69a21b0 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-1200/trainer_state.json @@ -0,0 +1,166 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.8, + "eval_steps": 200, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0, + "eval_runtime": 17.4243, + "eval_samples_per_second": 0.287, + "eval_steps_per_second": 0.057, + "step": 0 + }, + { + "epoch": 0.4, + "grad_norm": 10.4270658493042, + "learning_rate": 1.9600000000000002e-05, + "loss": 6.4004, + "step": 100 + }, + { + "epoch": 0.8, + "grad_norm": 9.59022331237793, + "learning_rate": 1.8968421052631582e-05, + "loss": 5.1737, + "step": 200 + }, + { + "epoch": 0.8, + "eval_runtime": 5.1058, + "eval_samples_per_second": 0.979, + "eval_steps_per_second": 0.196, + "step": 200 + }, + { + "epoch": 1.2, + "grad_norm": 16.000308990478516, + "learning_rate": 1.7915789473684214e-05, + "loss": 4.7286, + "step": 300 + }, + { + "epoch": 1.6, + "grad_norm": 12.360644340515137, + "learning_rate": 1.6863157894736844e-05, + "loss": 4.442, + "step": 400 + }, + { + "epoch": 1.6, + "eval_runtime": 5.1817, + "eval_samples_per_second": 0.965, + "eval_steps_per_second": 0.193, + "step": 400 + }, + { + "epoch": 2.0, + "grad_norm": 16.141651153564453, + "learning_rate": 1.5810526315789473e-05, + "loss": 4.3198, + "step": 500 + }, + { + "epoch": 2.4, + "grad_norm": 18.464941024780273, + "learning_rate": 1.4757894736842106e-05, + "loss": 3.5096, + "step": 600 + }, + { + "epoch": 2.4, + "eval_runtime": 5.173, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.193, + "step": 600 + }, + { + "epoch": 2.8, + "grad_norm": 22.829710006713867, + "learning_rate": 1.371578947368421e-05, + "loss": 3.485, + "step": 700 + }, + { + "epoch": 3.2, + "grad_norm": 29.9942569732666, + "learning_rate": 1.2663157894736843e-05, + "loss": 3.032, + "step": 800 + }, + { + "epoch": 3.2, + "eval_runtime": 5.362, + "eval_samples_per_second": 0.932, + "eval_steps_per_second": 0.186, + "step": 800 + }, + { + "epoch": 3.6, + "grad_norm": 36.433963775634766, + "learning_rate": 1.1621052631578948e-05, + "loss": 2.5797, + "step": 900 + }, + { + "epoch": 4.0, + "grad_norm": 24.55065155029297, + "learning_rate": 1.0568421052631579e-05, + "loss": 2.5185, + "step": 1000 + }, + { + "epoch": 4.0, + "eval_runtime": 4.999, + "eval_samples_per_second": 1.0, + "eval_steps_per_second": 0.2, + "step": 1000 + }, + { + "epoch": 4.4, + "grad_norm": 25.797325134277344, + "learning_rate": 9.515789473684212e-06, + "loss": 1.6965, + "step": 1100 + }, + { + "epoch": 4.8, + "grad_norm": 32.363624572753906, + "learning_rate": 8.463157894736843e-06, + "loss": 1.7272, + "step": 1200 + }, + { + "epoch": 4.8, + "eval_runtime": 5.5586, + "eval_samples_per_second": 0.9, + "eval_steps_per_second": 0.18, + "step": 1200 + } + ], + "logging_steps": 100, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 400, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 20, + "trial_name": null, + "trial_params": null +} diff --git a/ru/chatterbox-ru-t3k/checkpoint-1200/training_args.bin b/ru/chatterbox-ru-t3k/checkpoint-1200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d78823461d97a70d49679184c240c4e863ac5dfb --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad1b851ec11d67e4d085f4a6f854146e4156257df5267eca566491537523572 +size 5304 diff --git a/ru/chatterbox-ru-t3k/checkpoint-1600/model.safetensors b/ru/chatterbox-ru-t3k/checkpoint-1600/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76c48a8b4adaaf8cfc6841e5bdefb8fe8dea04cc --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-1600/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:902415da233b09f8c2c52d6e350f099b4f86612afa032d84f51a410e3741f170 +size 2129654648 diff --git a/ru/chatterbox-ru-t3k/checkpoint-1600/optimizer.pt b/ru/chatterbox-ru-t3k/checkpoint-1600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..742c31c3663c3c37ed3892b8a1afc5186672ff3a --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-1600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f434012329ebeb6688ad2358c04e4a2e3e636fc65e4b194544c5a46569e74e1 +size 4259421562 diff --git a/ru/chatterbox-ru-t3k/checkpoint-1600/rng_state.pth b/ru/chatterbox-ru-t3k/checkpoint-1600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c6053d5d5c444e21bf9695cb2ebe4d0ccdf5f9f9 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-1600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a0c7d2ac7e7922c6c20d56dd9bd8a988296fe2f7c05c853452c8ae7bd92c24e +size 14244 diff --git a/ru/chatterbox-ru-t3k/checkpoint-1600/scheduler.pt b/ru/chatterbox-ru-t3k/checkpoint-1600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d0a1345b33158bcef059cf1dc6411f15a6fef06 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-1600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdef282d6fa9c230a8b037437ed0cc8311f4607a9651aefa82d9ca7b19fb5cad +size 1064 diff --git a/ru/chatterbox-ru-t3k/checkpoint-1600/trainer_state.json b/ru/chatterbox-ru-t3k/checkpoint-1600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8105e60792bd9b705ab2ec939a68a68157392436 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-1600/trainer_state.json @@ -0,0 +1,208 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.4, + "eval_steps": 200, + "global_step": 1600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0, + "eval_runtime": 17.4243, + "eval_samples_per_second": 0.287, + "eval_steps_per_second": 0.057, + "step": 0 + }, + { + "epoch": 0.4, + "grad_norm": 10.4270658493042, + "learning_rate": 1.9600000000000002e-05, + "loss": 6.4004, + "step": 100 + }, + { + "epoch": 0.8, + "grad_norm": 9.59022331237793, + "learning_rate": 1.8968421052631582e-05, + "loss": 5.1737, + "step": 200 + }, + { + "epoch": 0.8, + "eval_runtime": 5.1058, + "eval_samples_per_second": 0.979, + "eval_steps_per_second": 0.196, + "step": 200 + }, + { + "epoch": 1.2, + "grad_norm": 16.000308990478516, + "learning_rate": 1.7915789473684214e-05, + "loss": 4.7286, + "step": 300 + }, + { + "epoch": 1.6, + "grad_norm": 12.360644340515137, + "learning_rate": 1.6863157894736844e-05, + "loss": 4.442, + "step": 400 + }, + { + "epoch": 1.6, + "eval_runtime": 5.1817, + "eval_samples_per_second": 0.965, + "eval_steps_per_second": 0.193, + "step": 400 + }, + { + "epoch": 2.0, + "grad_norm": 16.141651153564453, + "learning_rate": 1.5810526315789473e-05, + "loss": 4.3198, + "step": 500 + }, + { + "epoch": 2.4, + "grad_norm": 18.464941024780273, + "learning_rate": 1.4757894736842106e-05, + "loss": 3.5096, + "step": 600 + }, + { + "epoch": 2.4, + "eval_runtime": 5.173, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.193, + "step": 600 + }, + { + "epoch": 2.8, + "grad_norm": 22.829710006713867, + "learning_rate": 1.371578947368421e-05, + "loss": 3.485, + "step": 700 + }, + { + "epoch": 3.2, + "grad_norm": 29.9942569732666, + "learning_rate": 1.2663157894736843e-05, + "loss": 3.032, + "step": 800 + }, + { + "epoch": 3.2, + "eval_runtime": 5.362, + "eval_samples_per_second": 0.932, + "eval_steps_per_second": 0.186, + "step": 800 + }, + { + "epoch": 3.6, + "grad_norm": 36.433963775634766, + "learning_rate": 1.1621052631578948e-05, + "loss": 2.5797, + "step": 900 + }, + { + "epoch": 4.0, + "grad_norm": 24.55065155029297, + "learning_rate": 1.0568421052631579e-05, + "loss": 2.5185, + "step": 1000 + }, + { + "epoch": 4.0, + "eval_runtime": 4.999, + "eval_samples_per_second": 1.0, + "eval_steps_per_second": 0.2, + "step": 1000 + }, + { + "epoch": 4.4, + "grad_norm": 25.797325134277344, + "learning_rate": 9.515789473684212e-06, + "loss": 1.6965, + "step": 1100 + }, + { + "epoch": 4.8, + "grad_norm": 32.363624572753906, + "learning_rate": 8.463157894736843e-06, + "loss": 1.7272, + "step": 1200 + }, + { + "epoch": 4.8, + "eval_runtime": 5.5586, + "eval_samples_per_second": 0.9, + "eval_steps_per_second": 0.18, + "step": 1200 + }, + { + "epoch": 5.2, + "grad_norm": 36.48984146118164, + "learning_rate": 7.410526315789475e-06, + "loss": 1.342, + "step": 1300 + }, + { + "epoch": 5.6, + "grad_norm": 33.30397415161133, + "learning_rate": 6.357894736842106e-06, + "loss": 1.0472, + "step": 1400 + }, + { + "epoch": 5.6, + "eval_runtime": 5.6569, + "eval_samples_per_second": 0.884, + "eval_steps_per_second": 0.177, + "step": 1400 + }, + { + "epoch": 6.0, + "grad_norm": 29.339319229125977, + "learning_rate": 5.305263157894738e-06, + "loss": 1.0126, + "step": 1500 + }, + { + "epoch": 6.4, + "grad_norm": 48.872501373291016, + "learning_rate": 4.252631578947369e-06, + "loss": 0.5935, + "step": 1600 + }, + { + "epoch": 6.4, + "eval_runtime": 5.4682, + "eval_samples_per_second": 0.914, + "eval_steps_per_second": 0.183, + "step": 1600 + } + ], + "logging_steps": 100, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 400, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 20, + "trial_name": null, + "trial_params": null +} diff --git a/ru/chatterbox-ru-t3k/checkpoint-1600/training_args.bin b/ru/chatterbox-ru-t3k/checkpoint-1600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d78823461d97a70d49679184c240c4e863ac5dfb --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-1600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad1b851ec11d67e4d085f4a6f854146e4156257df5267eca566491537523572 +size 5304 diff --git a/ru/chatterbox-ru-t3k/checkpoint-2000/model.safetensors b/ru/chatterbox-ru-t3k/checkpoint-2000/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d92c5f1e2ba99a4d3f32e2e8bc20e9ee1e39777e --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-2000/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3883569bca84e6be1e6c6d7e9dc15d669fd19d9bac686ae4016c17e309d786a +size 2129654648 diff --git a/ru/chatterbox-ru-t3k/checkpoint-2000/optimizer.pt b/ru/chatterbox-ru-t3k/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..51173c1ed586bc00ca501386c853f7bd3a7332f2 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:388bd42ba13cf2d3da719422697287600e2ba5ec55c20155328de5eedbfe6439 +size 4259421562 diff --git a/ru/chatterbox-ru-t3k/checkpoint-2000/rng_state.pth b/ru/chatterbox-ru-t3k/checkpoint-2000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1b13c7161b8826ead1ae7c862210d659b382368f --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-2000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f46f7da5cd7c7db43854f2273f0c557a92ed685ea1ccaa4ff67c1f1d5a34b09b +size 14244 diff --git a/ru/chatterbox-ru-t3k/checkpoint-2000/scheduler.pt b/ru/chatterbox-ru-t3k/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c3adf42514f13d63e4271496bbc9b49e48e6c11 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:082019e7b03ad629f41316647d16bc517ff10441ea44dfd6dff39ecc4e0b008f +size 1064 diff --git a/ru/chatterbox-ru-t3k/checkpoint-2000/trainer_state.json b/ru/chatterbox-ru-t3k/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5da3959e7257cc49568175004bf8a0aacbd94b40 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-2000/trainer_state.json @@ -0,0 +1,250 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.0, + "eval_steps": 200, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0, + "eval_runtime": 17.4243, + "eval_samples_per_second": 0.287, + "eval_steps_per_second": 0.057, + "step": 0 + }, + { + "epoch": 0.4, + "grad_norm": 10.4270658493042, + "learning_rate": 1.9600000000000002e-05, + "loss": 6.4004, + "step": 100 + }, + { + "epoch": 0.8, + "grad_norm": 9.59022331237793, + "learning_rate": 1.8968421052631582e-05, + "loss": 5.1737, + "step": 200 + }, + { + "epoch": 0.8, + "eval_runtime": 5.1058, + "eval_samples_per_second": 0.979, + "eval_steps_per_second": 0.196, + "step": 200 + }, + { + "epoch": 1.2, + "grad_norm": 16.000308990478516, + "learning_rate": 1.7915789473684214e-05, + "loss": 4.7286, + "step": 300 + }, + { + "epoch": 1.6, + "grad_norm": 12.360644340515137, + "learning_rate": 1.6863157894736844e-05, + "loss": 4.442, + "step": 400 + }, + { + "epoch": 1.6, + "eval_runtime": 5.1817, + "eval_samples_per_second": 0.965, + "eval_steps_per_second": 0.193, + "step": 400 + }, + { + "epoch": 2.0, + "grad_norm": 16.141651153564453, + "learning_rate": 1.5810526315789473e-05, + "loss": 4.3198, + "step": 500 + }, + { + "epoch": 2.4, + "grad_norm": 18.464941024780273, + "learning_rate": 1.4757894736842106e-05, + "loss": 3.5096, + "step": 600 + }, + { + "epoch": 2.4, + "eval_runtime": 5.173, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.193, + "step": 600 + }, + { + "epoch": 2.8, + "grad_norm": 22.829710006713867, + "learning_rate": 1.371578947368421e-05, + "loss": 3.485, + "step": 700 + }, + { + "epoch": 3.2, + "grad_norm": 29.9942569732666, + "learning_rate": 1.2663157894736843e-05, + "loss": 3.032, + "step": 800 + }, + { + "epoch": 3.2, + "eval_runtime": 5.362, + "eval_samples_per_second": 0.932, + "eval_steps_per_second": 0.186, + "step": 800 + }, + { + "epoch": 3.6, + "grad_norm": 36.433963775634766, + "learning_rate": 1.1621052631578948e-05, + "loss": 2.5797, + "step": 900 + }, + { + "epoch": 4.0, + "grad_norm": 24.55065155029297, + "learning_rate": 1.0568421052631579e-05, + "loss": 2.5185, + "step": 1000 + }, + { + "epoch": 4.0, + "eval_runtime": 4.999, + "eval_samples_per_second": 1.0, + "eval_steps_per_second": 0.2, + "step": 1000 + }, + { + "epoch": 4.4, + "grad_norm": 25.797325134277344, + "learning_rate": 9.515789473684212e-06, + "loss": 1.6965, + "step": 1100 + }, + { + "epoch": 4.8, + "grad_norm": 32.363624572753906, + "learning_rate": 8.463157894736843e-06, + "loss": 1.7272, + "step": 1200 + }, + { + "epoch": 4.8, + "eval_runtime": 5.5586, + "eval_samples_per_second": 0.9, + "eval_steps_per_second": 0.18, + "step": 1200 + }, + { + "epoch": 5.2, + "grad_norm": 36.48984146118164, + "learning_rate": 7.410526315789475e-06, + "loss": 1.342, + "step": 1300 + }, + { + "epoch": 5.6, + "grad_norm": 33.30397415161133, + "learning_rate": 6.357894736842106e-06, + "loss": 1.0472, + "step": 1400 + }, + { + "epoch": 5.6, + "eval_runtime": 5.6569, + "eval_samples_per_second": 0.884, + "eval_steps_per_second": 0.177, + "step": 1400 + }, + { + "epoch": 6.0, + "grad_norm": 29.339319229125977, + "learning_rate": 5.305263157894738e-06, + "loss": 1.0126, + "step": 1500 + }, + { + "epoch": 6.4, + "grad_norm": 48.872501373291016, + "learning_rate": 4.252631578947369e-06, + "loss": 0.5935, + "step": 1600 + }, + { + "epoch": 6.4, + "eval_runtime": 5.4682, + "eval_samples_per_second": 0.914, + "eval_steps_per_second": 0.183, + "step": 1600 + }, + { + "epoch": 6.8, + "grad_norm": 24.40644073486328, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.5881, + "step": 1700 + }, + { + "epoch": 7.2, + "grad_norm": 13.23859691619873, + "learning_rate": 2.1473684210526317e-06, + "loss": 0.4895, + "step": 1800 + }, + { + "epoch": 7.2, + "eval_runtime": 5.2891, + "eval_samples_per_second": 0.945, + "eval_steps_per_second": 0.189, + "step": 1800 + }, + { + "epoch": 7.6, + "grad_norm": 13.824739456176758, + "learning_rate": 1.0947368421052632e-06, + "loss": 0.3585, + "step": 1900 + }, + { + "epoch": 8.0, + "grad_norm": 13.559722900390625, + "learning_rate": 4.2105263157894737e-08, + "loss": 0.3556, + "step": 2000 + }, + { + "epoch": 8.0, + "eval_runtime": 5.0911, + "eval_samples_per_second": 0.982, + "eval_steps_per_second": 0.196, + "step": 2000 + } + ], + "logging_steps": 100, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 400, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 20, + "trial_name": null, + "trial_params": null +} diff --git a/ru/chatterbox-ru-t3k/checkpoint-2000/training_args.bin b/ru/chatterbox-ru-t3k/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d78823461d97a70d49679184c240c4e863ac5dfb --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad1b851ec11d67e4d085f4a6f854146e4156257df5267eca566491537523572 +size 5304 diff --git a/ru/chatterbox-ru-t3k/checkpoint-400/model.safetensors b/ru/chatterbox-ru-t3k/checkpoint-400/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09dbd531af2763cdd2463317fbbcf0820783a546 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-400/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ae32f65e2212f36d8621b7231e2f60a5762e784a1393e2432e562bca28ee48 +size 2129654648 diff --git a/ru/chatterbox-ru-t3k/checkpoint-400/optimizer.pt b/ru/chatterbox-ru-t3k/checkpoint-400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..335272c2314e69d1bde0eb08fd9bd2b0d6a75e18 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4109e80fc91eb04276b35665ea000c121fdc6e9077fa5818faed182867d1e1f8 +size 4259421562 diff --git a/ru/chatterbox-ru-t3k/checkpoint-400/rng_state.pth b/ru/chatterbox-ru-t3k/checkpoint-400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7c6c6760c4c39954e060d7cca20f2c46b4b44d87 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b25da2e44980101aef9a18ac9409295e1f3d03f0e5103bb451f67f5c4b912b53 +size 14244 diff --git a/ru/chatterbox-ru-t3k/checkpoint-400/scheduler.pt b/ru/chatterbox-ru-t3k/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd6a605029784a1a7ff47d06da76005dccb5cde9 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:101e6f062695ac4ab8b7996003dbd03725bc4b4457db1bbca93928c20e9aa8bd +size 1064 diff --git a/ru/chatterbox-ru-t3k/checkpoint-400/trainer_state.json b/ru/chatterbox-ru-t3k/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1367951f6107154712827b11a8d01f2e98ba02b2 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-400/trainer_state.json @@ -0,0 +1,82 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6, + "eval_steps": 200, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0, + "eval_runtime": 17.4243, + "eval_samples_per_second": 0.287, + "eval_steps_per_second": 0.057, + "step": 0 + }, + { + "epoch": 0.4, + "grad_norm": 10.4270658493042, + "learning_rate": 1.9600000000000002e-05, + "loss": 6.4004, + "step": 100 + }, + { + "epoch": 0.8, + "grad_norm": 9.59022331237793, + "learning_rate": 1.8968421052631582e-05, + "loss": 5.1737, + "step": 200 + }, + { + "epoch": 0.8, + "eval_runtime": 5.1058, + "eval_samples_per_second": 0.979, + "eval_steps_per_second": 0.196, + "step": 200 + }, + { + "epoch": 1.2, + "grad_norm": 16.000308990478516, + "learning_rate": 1.7915789473684214e-05, + "loss": 4.7286, + "step": 300 + }, + { + "epoch": 1.6, + "grad_norm": 12.360644340515137, + "learning_rate": 1.6863157894736844e-05, + "loss": 4.442, + "step": 400 + }, + { + "epoch": 1.6, + "eval_runtime": 5.1817, + "eval_samples_per_second": 0.965, + "eval_steps_per_second": 0.193, + "step": 400 + } + ], + "logging_steps": 100, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 400, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 20, + "trial_name": null, + "trial_params": null +} diff --git a/ru/chatterbox-ru-t3k/checkpoint-400/training_args.bin b/ru/chatterbox-ru-t3k/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d78823461d97a70d49679184c240c4e863ac5dfb --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad1b851ec11d67e4d085f4a6f854146e4156257df5267eca566491537523572 +size 5304 diff --git a/ru/chatterbox-ru-t3k/checkpoint-800/model.safetensors b/ru/chatterbox-ru-t3k/checkpoint-800/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..399de476cf4554603d9dd8dc8d5e1d6de34a0a6a --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-800/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:338fe91450a4d910e8ee142bc315fefc598eee7157cd2429ff107aacc42b95b2 +size 2129654648 diff --git a/ru/chatterbox-ru-t3k/checkpoint-800/optimizer.pt b/ru/chatterbox-ru-t3k/checkpoint-800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0adc744e16ba186c69c0c5160654242472c14318 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:315e9053821cdf767ca1d1ae89fb8d678198a90b0ee6f4419fc58654b58b4150 +size 4259421562 diff --git a/ru/chatterbox-ru-t3k/checkpoint-800/rng_state.pth b/ru/chatterbox-ru-t3k/checkpoint-800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d9fc40597154b9256a5fcf9993a7892acd39a467 --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba75d18af7153db6603875d2b8ca73cb7cf1b1c3976a2bfd36198e36a7c8eaab +size 14244 diff --git a/ru/chatterbox-ru-t3k/checkpoint-800/scheduler.pt b/ru/chatterbox-ru-t3k/checkpoint-800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..de13bcded9afb58119571b8bed49b4bcc4298a8d --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14d6831ab37f8830e4154fcdb7718cedf89e6d166381a1e05f293c5f59c8d402 +size 1064 diff --git a/ru/chatterbox-ru-t3k/checkpoint-800/trainer_state.json b/ru/chatterbox-ru-t3k/checkpoint-800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c693e31a61aef156698675f3584ce9eed051e57a --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-800/trainer_state.json @@ -0,0 +1,124 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.2, + "eval_steps": 200, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0, + "eval_runtime": 17.4243, + "eval_samples_per_second": 0.287, + "eval_steps_per_second": 0.057, + "step": 0 + }, + { + "epoch": 0.4, + "grad_norm": 10.4270658493042, + "learning_rate": 1.9600000000000002e-05, + "loss": 6.4004, + "step": 100 + }, + { + "epoch": 0.8, + "grad_norm": 9.59022331237793, + "learning_rate": 1.8968421052631582e-05, + "loss": 5.1737, + "step": 200 + }, + { + "epoch": 0.8, + "eval_runtime": 5.1058, + "eval_samples_per_second": 0.979, + "eval_steps_per_second": 0.196, + "step": 200 + }, + { + "epoch": 1.2, + "grad_norm": 16.000308990478516, + "learning_rate": 1.7915789473684214e-05, + "loss": 4.7286, + "step": 300 + }, + { + "epoch": 1.6, + "grad_norm": 12.360644340515137, + "learning_rate": 1.6863157894736844e-05, + "loss": 4.442, + "step": 400 + }, + { + "epoch": 1.6, + "eval_runtime": 5.1817, + "eval_samples_per_second": 0.965, + "eval_steps_per_second": 0.193, + "step": 400 + }, + { + "epoch": 2.0, + "grad_norm": 16.141651153564453, + "learning_rate": 1.5810526315789473e-05, + "loss": 4.3198, + "step": 500 + }, + { + "epoch": 2.4, + "grad_norm": 18.464941024780273, + "learning_rate": 1.4757894736842106e-05, + "loss": 3.5096, + "step": 600 + }, + { + "epoch": 2.4, + "eval_runtime": 5.173, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.193, + "step": 600 + }, + { + "epoch": 2.8, + "grad_norm": 22.829710006713867, + "learning_rate": 1.371578947368421e-05, + "loss": 3.485, + "step": 700 + }, + { + "epoch": 3.2, + "grad_norm": 29.9942569732666, + "learning_rate": 1.2663157894736843e-05, + "loss": 3.032, + "step": 800 + }, + { + "epoch": 3.2, + "eval_runtime": 5.362, + "eval_samples_per_second": 0.932, + "eval_steps_per_second": 0.186, + "step": 800 + } + ], + "logging_steps": 100, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 400, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 20, + "trial_name": null, + "trial_params": null +} diff --git a/ru/chatterbox-ru-t3k/checkpoint-800/training_args.bin b/ru/chatterbox-ru-t3k/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d78823461d97a70d49679184c240c4e863ac5dfb --- /dev/null +++ b/ru/chatterbox-ru-t3k/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad1b851ec11d67e4d085f4a6f854146e4156257df5267eca566491537523572 +size 5304 diff --git a/ru/chatterbox-ru-t3k/conds.pt b/ru/chatterbox-ru-t3k/conds.pt new file mode 100644 index 0000000000000000000000000000000000000000..e13b43d1ce809473454627428ff413ebfc7e8660 --- /dev/null +++ b/ru/chatterbox-ru-t3k/conds.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6552d70568833628ba019c6b03459e77fe71ca197d5c560cef9411bee9d87f4e +size 107374 diff --git a/ru/chatterbox-ru-t3k/eval_results.json b/ru/chatterbox-ru-t3k/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..8cff453a1f2e9956bf6569d33324a0f3f24e8cf6 --- /dev/null +++ b/ru/chatterbox-ru-t3k/eval_results.json @@ -0,0 +1,6 @@ +{ + "epoch": 8.0, + "eval_runtime": 5.7978, + "eval_samples_per_second": 0.862, + "eval_steps_per_second": 0.172 +} \ No newline at end of file diff --git a/ru/chatterbox-ru-t3k/model.safetensors b/ru/chatterbox-ru-t3k/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d92c5f1e2ba99a4d3f32e2e8bc20e9ee1e39777e --- /dev/null +++ b/ru/chatterbox-ru-t3k/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3883569bca84e6be1e6c6d7e9dc15d669fd19d9bac686ae4016c17e309d786a +size 2129654648 diff --git a/ru/chatterbox-ru-t3k/pretrained_model_download/conds.pt b/ru/chatterbox-ru-t3k/pretrained_model_download/conds.pt new file mode 100644 index 0000000000000000000000000000000000000000..e13b43d1ce809473454627428ff413ebfc7e8660 --- /dev/null +++ b/ru/chatterbox-ru-t3k/pretrained_model_download/conds.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6552d70568833628ba019c6b03459e77fe71ca197d5c560cef9411bee9d87f4e +size 107374 diff --git a/ru/chatterbox-ru-t3k/pretrained_model_download/s3gen.safetensors b/ru/chatterbox-ru-t3k/pretrained_model_download/s3gen.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b752a028b2a1c2843b76e0df9582d8d81d10669d --- /dev/null +++ b/ru/chatterbox-ru-t3k/pretrained_model_download/s3gen.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b78103c654207393955e4900aac14a12de8ef25f4b09424f1ef91941f161d4e +size 1056484620 diff --git a/ru/chatterbox-ru-t3k/pretrained_model_download/t3_cfg.safetensors b/ru/chatterbox-ru-t3k/pretrained_model_download/t3_cfg.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2dd9884f4acb611912740cf3d9c8b33711a694ce --- /dev/null +++ b/ru/chatterbox-ru-t3k/pretrained_model_download/t3_cfg.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:914cb1696f47527fe8852ca8f1fe1fa63cb34f76f9c715e84e067b744dd0da81 +size 2129653744 diff --git a/ru/chatterbox-ru-t3k/pretrained_model_download/tokenizer.json b/ru/chatterbox-ru-t3k/pretrained_model_download/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..8db974d9d0d9cba9b2568601d2c697062ba0b789 --- /dev/null +++ b/ru/chatterbox-ru-t3k/pretrained_model_download/tokenizer.json @@ -0,0 +1,1435 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "special": true, + "content": "[STOP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 1, + "special": true, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 2, + "special": true, + "content": "[SPACE]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 255, + "special": true, + "content": "[START]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 604, + "content": "[UH]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 605, + "content": "[UM]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 606, + "content": "[giggle]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 607, + "content": "[laughter]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 608, + "content": "[guffaw]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 609, + "content": "[inhale]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 610, + "content": "[exhale]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 611, + "content": "[sigh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 612, + "content": "[cry]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 613, + "content": "[bark]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 614, + "content": "[howl]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 615, + "content": "[meow]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 616, + "content": "[singing]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 617, + "content": "[music]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 618, + "content": "[whistle]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 619, + "content": "[humming]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 620, + "content": "[gasp]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 621, + "content": "[groan]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 622, + "content": "[whisper]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 623, + "content": "[mumble]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 624, + "content": "[sniff]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 625, + "content": "[sneeze]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 626, + "content": "[cough]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 627, + "content": "[snore]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 628, + "content": "[chew]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 629, + "content": "[sip]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 630, + "content": "[clear_throat]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 631, + "content": "[kiss]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 632, + "content": "[shhh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 633, + "content": "[gibberish]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 634, + "content": "[fr]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 635, + "content": "[es]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 636, + "content": "[de]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 637, + "content": "[it]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 638, + "content": "[ipa]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 639, + "content": "[end_of_label]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 695, + "content": "[PLACEHOLDER55]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 696, + "content": "[PLACEHOLDER56]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 697, + "content": "[PLACEHOLDER57]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 698, + "content": "[PLACEHOLDER58]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 699, + "content": "[PLACEHOLDER59]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 700, + "content": "[PLACEHOLDER60]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 701, + "content": "[PLACEHOLDER61]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 702, + "content": "[PLACEHOLDER62]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 703, + "content": "[PLACEHOLDER63]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": null, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "vocab": { + "[STOP]": 0, + "[UNK]": 1, + "[SPACE]": 2, + "!": 3, + "'": 4, + "(": 5, + ")": 6, + ",": 7, + "-": 8, + ".": 9, + "/": 10, + ":": 11, + ";": 12, + "?": 13, + "a": 14, + "b": 15, + "c": 16, + "d": 17, + "e": 18, + "f": 19, + "g": 20, + "h": 21, + "i": 22, + "j": 23, + "k": 24, + "l": 25, + "m": 26, + "n": 27, + "o": 28, + "p": 29, + "q": 30, + "r": 31, + "s": 32, + "t": 33, + "u": 34, + "v": 35, + "w": 36, + "x": 37, + "y": 38, + "z": 39, + "th": 40, + "in": 41, + "the": 42, + "an": 43, + "er": 44, + "ou": 45, + "re": 46, + "on": 47, + "at": 48, + "ed": 49, + "en": 50, + "to": 51, + "ing": 52, + "and": 53, + "is": 54, + "as": 55, + "al": 56, + "or": 57, + "of": 58, + "ar": 59, + "it": 60, + "es": 61, + "he": 62, + "st": 63, + "le": 64, + "om": 65, + "se": 66, + "be": 67, + "ad": 68, + "ow": 69, + "ly": 70, + "ch": 71, + "wh": 72, + "that": 73, + "you": 74, + "li": 75, + "ve": 76, + "ac": 77, + "ti": 78, + "ld": 79, + "me": 80, + "was": 81, + "gh": 82, + "id": 83, + "ll": 84, + "wi": 85, + "ent": 86, + "for": 87, + "ay": 88, + "ro": 89, + "ver": 90, + "ic": 91, + "her": 92, + "ke": 93, + "his": 94, + "no": 95, + "ut": 96, + "un": 97, + "ir": 98, + "lo": 99, + "we": 100, + "ri": 101, + "ha": 102, + "with": 103, + "ght": 104, + "out": 105, + "im": 106, + "ion": 107, + "all": 108, + "ab": 109, + "one": 110, + "ne": 111, + "ge": 112, + "ould": 113, + "ter": 114, + "mo": 115, + "had": 116, + "ce": 117, + "she": 118, + "go": 119, + "sh": 120, + "ur": 121, + "am": 122, + "so": 123, + "pe": 124, + "my": 125, + "de": 126, + "are": 127, + "but": 128, + "ome": 129, + "fr": 130, + "ther": 131, + "fe": 132, + "su": 133, + "do": 134, + "con": 135, + "te": 136, + "ain": 137, + "ere": 138, + "po": 139, + "if": 140, + "they": 141, + "us": 142, + "ag": 143, + "tr": 144, + "now": 145, + "oun": 146, + "this": 147, + "have": 148, + "not": 149, + "sa": 150, + "il": 151, + "up": 152, + "thing": 153, + "from": 154, + "ap": 155, + "him": 156, + "ack": 157, + "ation": 158, + "ant": 159, + "our": 160, + "op": 161, + "like": 162, + "ust": 163, + "ess": 164, + "bo": 165, + "ok": 166, + "ul": 167, + "ind": 168, + "ex": 169, + "com": 170, + "some": 171, + "there": 172, + "ers": 173, + "co": 174, + "res": 175, + "man": 176, + "ard": 177, + "pl": 178, + "wor": 179, + "way": 180, + "tion": 181, + "fo": 182, + "ca": 183, + "were": 184, + "by": 185, + "ate": 186, + "pro": 187, + "ted": 188, + "ound": 189, + "own": 190, + "would": 191, + "ts": 192, + "what": 193, + "qu": 194, + "ally": 195, + "ight": 196, + "ck": 197, + "gr": 198, + "when": 199, + "ven": 200, + "can": 201, + "ough": 202, + "ine": 203, + "end": 204, + "per": 205, + "ous": 206, + "od": 207, + "ide": 208, + "know": 209, + "ty": 210, + "very": 211, + "si": 212, + "ak": 213, + "who": 214, + "about": 215, + "ill": 216, + "them": 217, + "est": 218, + "red": 219, + "ye": 220, + "could": 221, + "ong": 222, + "your": 223, + "their": 224, + "em": 225, + "just": 226, + "other": 227, + "into": 228, + "any": 229, + "whi": 230, + "um": 231, + "tw": 232, + "ast": 233, + "der": 234, + "did": 235, + "ie": 236, + "been": 237, + "ace": 238, + "ink": 239, + "ity": 240, + "back": 241, + "ting": 242, + "br": 243, + "more": 244, + "ake": 245, + "pp": 246, + "then": 247, + "sp": 248, + "el": 249, + "use": 250, + "bl": 251, + "said": 252, + "over": 253, + "get": 254, + "[START]": 255, + "\"": 256, + "#": 257, + "$": 258, + "%": 259, + "&": 260, + "*": 261, + "+": 262, + "0": 263, + "1": 264, + "2": 265, + "3": 266, + "4": 267, + "5": 268, + "6": 269, + "7": 270, + "8": 271, + "9": 272, + "<": 273, + "=": 274, + ">": 275, + "@": 276, + "A": 277, + "B": 278, + "C": 279, + "D": 280, + "E": 281, + "F": 282, + "G": 283, + "H": 284, + "I": 285, + "J": 286, + "K": 287, + "L": 288, + "M": 289, + "N": 290, + "O": 291, + "P": 292, + "Q": 293, + "R": 294, + "S": 295, + "T": 296, + "U": 297, + "V": 298, + "W": 299, + "X": 300, + "Y": 301, + "Z": 302, + "[": 303, + "\\": 304, + "]": 305, + "^": 306, + "_": 307, + "`": 308, + "{": 309, + "|": 310, + "}": 311, + "~": 312, + "‐": 313, + "‑": 314, + "‒": 315, + "–": 316, + "—": 317, + "―": 318, + "‖": 319, + "‗": 320, + "‘": 321, + "’": 322, + "‚": 323, + "‛": 324, + "“": 325, + "”": 326, + "„": 327, + "‟": 328, + " ": 329, + "¡": 330, + "¢": 331, + "£": 332, + "¤": 333, + "¥": 334, + "¦": 335, + "§": 336, + "¨": 337, + "©": 338, + "ª": 339, + "«": 340, + "¬": 341, + "­": 342, + "®": 343, + "¯": 344, + "°": 345, + "±": 346, + "²": 347, + "³": 348, + "´": 349, + "µ": 350, + "¶": 351, + "·": 352, + "¸": 353, + "¹": 354, + "º": 355, + "»": 356, + "¼": 357, + "½": 358, + "¾": 359, + "¿": 360, + "À": 361, + "Á": 362, + "Â": 363, + "Ã": 364, + "Ä": 365, + "Å": 366, + "Æ": 367, + "Ç": 368, + "È": 369, + "É": 370, + "Ê": 371, + "Ë": 372, + "Ì": 373, + "Í": 374, + "Î": 375, + "Ï": 376, + "Ð": 377, + "Ñ": 378, + "Ò": 379, + "Ó": 380, + "Ô": 381, + "Õ": 382, + "Ö": 383, + "×": 384, + "Ø": 385, + "Ù": 386, + "Ú": 387, + "Û": 388, + "Ü": 389, + "Ý": 390, + "Þ": 391, + "ß": 392, + "à": 393, + "á": 394, + "â": 395, + "ã": 396, + "ä": 397, + "å": 398, + "æ": 399, + "ç": 400, + "è": 401, + "é": 402, + "ê": 403, + "ë": 404, + "ì": 405, + "í": 406, + "î": 407, + "ï": 408, + "ð": 409, + "ñ": 410, + "ò": 411, + "ó": 412, + "ô": 413, + "õ": 414, + "ö": 415, + "÷": 416, + "ø": 417, + "ù": 418, + "ú": 419, + "û": 420, + "ü": 421, + "ý": 422, + "þ": 423, + "ÿ": 424, + "ɐ": 425, + "ɑ": 426, + "ɒ": 427, + "ɓ": 428, + "ɔ": 429, + "ɕ": 430, + "ɖ": 431, + "ɗ": 432, + "ɘ": 433, + "ə": 434, + "ɚ": 435, + "ɛ": 436, + "ɜ": 437, + "ɝ": 438, + "ɞ": 439, + "ɟ": 440, + "ɠ": 441, + "ɡ": 442, + "ɢ": 443, + "ɣ": 444, + "ɤ": 445, + "ɥ": 446, + "ɦ": 447, + "ɧ": 448, + "ɨ": 449, + "ɩ": 450, + "ɪ": 451, + "ɫ": 452, + "ɬ": 453, + "ɭ": 454, + "ɮ": 455, + "ɯ": 456, + "ɰ": 457, + "ɱ": 458, + "ɲ": 459, + "ɳ": 460, + "ɴ": 461, + "ɵ": 462, + "ɶ": 463, + "ɷ": 464, + "ɸ": 465, + "ɹ": 466, + "ɺ": 467, + "ɻ": 468, + "ɼ": 469, + "ɽ": 470, + "ɾ": 471, + "ɿ": 472, + "ʀ": 473, + "ʁ": 474, + "ʂ": 475, + "ʃ": 476, + "ʄ": 477, + "ʅ": 478, + "ʆ": 479, + "ʇ": 480, + "ʈ": 481, + "ʉ": 482, + "ʊ": 483, + "ʋ": 484, + "ʌ": 485, + "ʍ": 486, + "ʎ": 487, + "ʏ": 488, + "ʐ": 489, + "ʑ": 490, + "ʒ": 491, + "ʓ": 492, + "ʔ": 493, + "ʕ": 494, + "ʖ": 495, + "ʗ": 496, + "ʘ": 497, + "ʙ": 498, + "ʚ": 499, + "ʛ": 500, + "ʜ": 501, + "ʝ": 502, + "ʞ": 503, + "ʟ": 504, + "ʠ": 505, + "ʡ": 506, + "ʢ": 507, + "ʣ": 508, + "ʤ": 509, + "ʥ": 510, + "ʦ": 511, + "ʧ": 512, + "ʨ": 513, + "ʩ": 514, + "ʪ": 515, + "ʫ": 516, + "ʬ": 517, + "ʭ": 518, + "ʮ": 519, + "ʯ": 520, + "ʰ": 521, + "ʱ": 522, + "ʲ": 523, + "ʳ": 524, + "ʴ": 525, + "ʵ": 526, + "ʶ": 527, + "ʷ": 528, + "ʸ": 529, + "ʹ": 530, + "ʺ": 531, + "ʻ": 532, + "ʼ": 533, + "ʽ": 534, + "ʾ": 535, + "ʿ": 536, + "ˀ": 537, + "ˁ": 538, + "˂": 539, + "˃": 540, + "˄": 541, + "˅": 542, + "ˆ": 543, + "ˇ": 544, + "ˈ": 545, + "ˉ": 546, + "ˊ": 547, + "ˋ": 548, + "ˌ": 549, + "ˍ": 550, + "ˎ": 551, + "ˏ": 552, + "ː": 553, + "ˑ": 554, + "˒": 555, + "˓": 556, + "˔": 557, + "˕": 558, + "˖": 559, + "˗": 560, + "˘": 561, + "˙": 562, + "˚": 563, + "˛": 564, + "˜": 565, + "˝": 566, + "˞": 567, + "˟": 568, + "ˠ": 569, + "ˡ": 570, + "ˢ": 571, + "ˣ": 572, + "ˤ": 573, + "˥": 574, + "˦": 575, + "˧": 576, + "˨": 577, + "˩": 578, + "˪": 579, + "˫": 580, + "ˬ": 581, + "˭": 582, + "ˮ": 583, + "˯": 584, + "˰": 585, + "˱": 586, + "˲": 587, + "˳": 588, + "˴": 589, + "˵": 590, + "˶": 591, + "˷": 592, + "˸": 593, + "˹": 594, + "˺": 595, + "˻": 596, + "˼": 597, + "˽": 598, + "˾": 599, + "˿": 600, + "ā": 601, + "ō": 602, + "…": 603, + "[UH]": 604, + "[UM]": 605, + "[giggle]": 606, + "[laughter]": 607, + "[guffaw]": 608, + "[inhale]": 609, + "[exhale]": 610, + "[sigh]": 611, + "[cry]": 612, + "[bark]": 613, + "[howl]": 614, + "[meow]": 615, + "[singing]": 616, + "[music]": 617, + "[whistle]": 618, + "[humming]": 619, + "[gasp]": 620, + "[groan]": 621, + "[whisper]": 622, + "[mumble]": 623, + "[sniff]": 624, + "[sneeze]": 625, + "[cough]": 626, + "[snore]": 627, + "[chew]": 628, + "[sip]": 629, + "[clear_throat]": 630, + "[kiss]": 631, + "[shhh]": 632, + "[gibberish]": 633, + "[fr]": 634, + "[es]": 635, + "[de]": 636, + "[it]": 637, + "[ipa]": 638, + "[end_of_label]": 639, + "ŋ": 640, + "ᵻ": 641, + "θ": 642, + "̩": 643, + "\u0303": 644, + "ɑː": 645, + "iː": 646, + "uː": 647, + "ɜː": 648, + "ɔː": 649, + "oː": 650, + "eɪ": 651, + "oʊ": 652, + "aɪ": 653, + "aʊ": 654, + "ɔɪ": 655, + "dʒ": 656, + "tʃ": 657, + "ɪŋ": 658, + "ᵻd": 659, + "ˈiː": 660, + "ˌiː": 661, + "ˈɪ": 662, + "ˌɪ": 663, + "ˈeɪ": 664, + "ˌeɪ": 665, + "ˈɛ": 666, + "ˌɛ": 667, + "ˈæ": 668, + "ˌæ": 669, + "ˈɑː": 670, + "ˌɑː": 671, + "ˈɔː": 672, + "ˌɔː": 673, + "oːɹ": 674, + "ˈoːɹ": 675, + "ˌoːɹ": 676, + "ˈoʊ": 677, + "ˌoʊ": 678, + "ˈʊ": 679, + "ˌʊ": 680, + "ˈuː": 681, + "ˌuː": 682, + "ˈɜː": 683, + "ˌɜː": 684, + "ˈʌ": 685, + "ˌʌ": 686, + "ˈaɪ": 687, + "ˌaɪ": 688, + "ˈaʊ": 689, + "ˌaʊ": 690, + "ˈɔɪ": 691, + "ˌɔɪ": 692, + "ˈɚ": 693, + "ˌɐ": 694, + "[PLACEHOLDER55]": 695, + "[PLACEHOLDER56]": 696, + "[PLACEHOLDER57]": 697, + "[PLACEHOLDER58]": 698, + "[PLACEHOLDER59]": 699, + "[PLACEHOLDER60]": 700, + "[PLACEHOLDER61]": 701, + "[PLACEHOLDER62]": 702, + "[PLACEHOLDER63]": 703 + }, + "merges": [ + "t h", + "i n", + "th e", + "a n", + "e r", + "o u", + "r e", + "o n", + "a t", + "e d", + "e n", + "t o", + "in g", + "an d", + "i s", + "a s", + "a l", + "o r", + "o f", + "a r", + "i t", + "e s", + "h e", + "s t", + "l e", + "o m", + "s e", + "b e", + "a d", + "o w", + "l y", + "c h", + "w h", + "th at", + "y ou", + "l i", + "v e", + "a c", + "t i", + "l d", + "m e", + "w as", + "g h", + "i d", + "l l", + "w i", + "en t", + "f or", + "a y", + "r o", + "v er", + "i c", + "h er", + "k e", + "h is", + "n o", + "u t", + "u n", + "i r", + "l o", + "w e", + "r i", + "h a", + "wi th", + "gh t", + "ou t", + "i m", + "i on", + "al l", + "a b", + "on e", + "n e", + "g e", + "ou ld", + "t er", + "m o", + "h ad", + "c e", + "s he", + "g o", + "s h", + "u r", + "a m", + "s o", + "p e", + "m y", + "d e", + "a re", + "b ut", + "om e", + "f r", + "the r", + "f e", + "s u", + "d o", + "c on", + "t e", + "a in", + "er e", + "p o", + "i f", + "the y", + "u s", + "a g", + "t r", + "n ow", + "ou n", + "th is", + "ha ve", + "no t", + "s a", + "i l", + "u p", + "th ing", + "fr om", + "a p", + "h im", + "ac k", + "at ion", + "an t", + "ou r", + "o p", + "li ke", + "u st", + "es s", + "b o", + "o k", + "u l", + "in d", + "e x", + "c om", + "s ome", + "the re", + "er s", + "c o", + "re s", + "m an", + "ar d", + "p l", + "w or", + "w ay", + "ti on", + "f o", + "c a", + "w ere", + "b y", + "at e", + "p ro", + "t ed", + "oun d", + "ow n", + "w ould", + "t s", + "wh at", + "q u", + "al ly", + "i ght", + "c k", + "g r", + "wh en", + "v en", + "c an", + "ou gh", + "in e", + "en d", + "p er", + "ou s", + "o d", + "id e", + "k now", + "t y", + "ver y", + "s i", + "a k", + "wh o", + "ab out", + "i ll", + "the m", + "es t", + "re d", + "y e", + "c ould", + "on g", + "you r", + "the ir", + "e m", + "j ust", + "o ther", + "in to", + "an y", + "wh i", + "u m", + "t w", + "as t", + "d er", + "d id", + "i e", + "be en", + "ac e", + "in k", + "it y", + "b ack", + "t ing", + "b r", + "mo re", + "a ke", + "p p", + "the n", + "s p", + "e l", + "u se", + "b l", + "sa id", + "o ver", + "ge t", + "ɑ ː", + "i ː", + "u ː", + "ɜ ː", + "ɔ ː", + "o ː", + "e ɪ", + "o ʊ", + "a ɪ", + "a ʊ", + "ɔ ɪ", + "d ʒ", + "t ʃ", + "ɪ ŋ", + "ᵻ d", + "ˈ iː", + "ˌ iː", + "ˈ ɪ", + "ˌ ɪ", + "ˈ eɪ", + "ˌ eɪ", + "ˈ ɛ", + "ˌ ɛ", + "ˈ æ", + "ˌ æ", + "ˈ ɑː", + "ˌ ɑː", + "ˈ ɔː", + "ˌ ɔː", + "oː ɹ", + "ˈ oːɹ", + "ˌ oːɹ", + "ˈ oʊ", + "ˌ oʊ", + "ˈ ʊ", + "ˌ ʊ", + "ˈ uː", + "ˌ uː", + "ˈ ɜː", + "ˌ ɜː", + "ˈ ʌ", + "ˌ ʌ", + "ˈ aɪ", + "ˌ aɪ", + "ˈ aʊ", + "ˌ aʊ", + "ˈ ɔɪ", + "ˌ ɔɪ", + "ˈ ɚ", + "ˌ ɐ" + ] + } +} \ No newline at end of file diff --git a/ru/chatterbox-ru-t3k/pretrained_model_download/ve.safetensors b/ru/chatterbox-ru-t3k/pretrained_model_download/ve.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0713f1587e627f23d93121e154a7de490d549dfb --- /dev/null +++ b/ru/chatterbox-ru-t3k/pretrained_model_download/ve.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0921cab452fa278bc25cd23ffd59d36f816d7dc5181dd1bef9751a7fb61f63c +size 5695784 diff --git a/ru/chatterbox-ru-t3k/s3gen.safetensors b/ru/chatterbox-ru-t3k/s3gen.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b752a028b2a1c2843b76e0df9582d8d81d10669d --- /dev/null +++ b/ru/chatterbox-ru-t3k/s3gen.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b78103c654207393955e4900aac14a12de8ef25f4b09424f1ef91941f161d4e +size 1056484620 diff --git a/ru/chatterbox-ru-t3k/source.txt b/ru/chatterbox-ru-t3k/source.txt new file mode 100644 index 0000000000000000000000000000000000000000..2b0494395980e24838bbd31e32ec6ecfcd539e6a --- /dev/null +++ b/ru/chatterbox-ru-t3k/source.txt @@ -0,0 +1 @@ +https://huggingface.co/fron1runner/chatterbox-ru-t3k \ No newline at end of file diff --git a/ru/chatterbox-ru-t3k/t3_cfg.safetensors b/ru/chatterbox-ru-t3k/t3_cfg.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81f600329900ab363ee4bb4ea3d7f5733a8d1f27 --- /dev/null +++ b/ru/chatterbox-ru-t3k/t3_cfg.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44781f7027ea5f157e139b420071203cc0cd9b7958db3c0a737c3dae4661293c +size 2129653744 diff --git a/ru/chatterbox-ru-t3k/tokenizer.json b/ru/chatterbox-ru-t3k/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..8db974d9d0d9cba9b2568601d2c697062ba0b789 --- /dev/null +++ b/ru/chatterbox-ru-t3k/tokenizer.json @@ -0,0 +1,1435 @@ +{ + "version": "1.0", + "truncation": null, + "padding": null, + "added_tokens": [ + { + "id": 0, + "special": true, + "content": "[STOP]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 1, + "special": true, + "content": "[UNK]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 2, + "special": true, + "content": "[SPACE]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 255, + "special": true, + "content": "[START]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false + }, + { + "id": 604, + "content": "[UH]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 605, + "content": "[UM]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 606, + "content": "[giggle]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 607, + "content": "[laughter]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 608, + "content": "[guffaw]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 609, + "content": "[inhale]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 610, + "content": "[exhale]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 611, + "content": "[sigh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 612, + "content": "[cry]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 613, + "content": "[bark]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 614, + "content": "[howl]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 615, + "content": "[meow]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 616, + "content": "[singing]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 617, + "content": "[music]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 618, + "content": "[whistle]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 619, + "content": "[humming]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 620, + "content": "[gasp]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 621, + "content": "[groan]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 622, + "content": "[whisper]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 623, + "content": "[mumble]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 624, + "content": "[sniff]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 625, + "content": "[sneeze]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 626, + "content": "[cough]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 627, + "content": "[snore]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 628, + "content": "[chew]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 629, + "content": "[sip]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 630, + "content": "[clear_throat]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 631, + "content": "[kiss]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 632, + "content": "[shhh]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 633, + "content": "[gibberish]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 634, + "content": "[fr]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 635, + "content": "[es]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 636, + "content": "[de]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 637, + "content": "[it]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 638, + "content": "[ipa]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 639, + "content": "[end_of_label]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 695, + "content": "[PLACEHOLDER55]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 696, + "content": "[PLACEHOLDER56]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 697, + "content": "[PLACEHOLDER57]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 698, + "content": "[PLACEHOLDER58]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 699, + "content": "[PLACEHOLDER59]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 700, + "content": "[PLACEHOLDER60]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 701, + "content": "[PLACEHOLDER61]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 702, + "content": "[PLACEHOLDER62]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + }, + { + "id": 703, + "content": "[PLACEHOLDER63]", + "single_word": false, + "lstrip": false, + "rstrip": false, + "normalized": false, + "special": true + } + ], + "normalizer": null, + "pre_tokenizer": { + "type": "Whitespace" + }, + "post_processor": null, + "decoder": null, + "model": { + "type": "BPE", + "dropout": null, + "unk_token": "[UNK]", + "continuing_subword_prefix": null, + "end_of_word_suffix": null, + "fuse_unk": false, + "vocab": { + "[STOP]": 0, + "[UNK]": 1, + "[SPACE]": 2, + "!": 3, + "'": 4, + "(": 5, + ")": 6, + ",": 7, + "-": 8, + ".": 9, + "/": 10, + ":": 11, + ";": 12, + "?": 13, + "a": 14, + "b": 15, + "c": 16, + "d": 17, + "e": 18, + "f": 19, + "g": 20, + "h": 21, + "i": 22, + "j": 23, + "k": 24, + "l": 25, + "m": 26, + "n": 27, + "o": 28, + "p": 29, + "q": 30, + "r": 31, + "s": 32, + "t": 33, + "u": 34, + "v": 35, + "w": 36, + "x": 37, + "y": 38, + "z": 39, + "th": 40, + "in": 41, + "the": 42, + "an": 43, + "er": 44, + "ou": 45, + "re": 46, + "on": 47, + "at": 48, + "ed": 49, + "en": 50, + "to": 51, + "ing": 52, + "and": 53, + "is": 54, + "as": 55, + "al": 56, + "or": 57, + "of": 58, + "ar": 59, + "it": 60, + "es": 61, + "he": 62, + "st": 63, + "le": 64, + "om": 65, + "se": 66, + "be": 67, + "ad": 68, + "ow": 69, + "ly": 70, + "ch": 71, + "wh": 72, + "that": 73, + "you": 74, + "li": 75, + "ve": 76, + "ac": 77, + "ti": 78, + "ld": 79, + "me": 80, + "was": 81, + "gh": 82, + "id": 83, + "ll": 84, + "wi": 85, + "ent": 86, + "for": 87, + "ay": 88, + "ro": 89, + "ver": 90, + "ic": 91, + "her": 92, + "ke": 93, + "his": 94, + "no": 95, + "ut": 96, + "un": 97, + "ir": 98, + "lo": 99, + "we": 100, + "ri": 101, + "ha": 102, + "with": 103, + "ght": 104, + "out": 105, + "im": 106, + "ion": 107, + "all": 108, + "ab": 109, + "one": 110, + "ne": 111, + "ge": 112, + "ould": 113, + "ter": 114, + "mo": 115, + "had": 116, + "ce": 117, + "she": 118, + "go": 119, + "sh": 120, + "ur": 121, + "am": 122, + "so": 123, + "pe": 124, + "my": 125, + "de": 126, + "are": 127, + "but": 128, + "ome": 129, + "fr": 130, + "ther": 131, + "fe": 132, + "su": 133, + "do": 134, + "con": 135, + "te": 136, + "ain": 137, + "ere": 138, + "po": 139, + "if": 140, + "they": 141, + "us": 142, + "ag": 143, + "tr": 144, + "now": 145, + "oun": 146, + "this": 147, + "have": 148, + "not": 149, + "sa": 150, + "il": 151, + "up": 152, + "thing": 153, + "from": 154, + "ap": 155, + "him": 156, + "ack": 157, + "ation": 158, + "ant": 159, + "our": 160, + "op": 161, + "like": 162, + "ust": 163, + "ess": 164, + "bo": 165, + "ok": 166, + "ul": 167, + "ind": 168, + "ex": 169, + "com": 170, + "some": 171, + "there": 172, + "ers": 173, + "co": 174, + "res": 175, + "man": 176, + "ard": 177, + "pl": 178, + "wor": 179, + "way": 180, + "tion": 181, + "fo": 182, + "ca": 183, + "were": 184, + "by": 185, + "ate": 186, + "pro": 187, + "ted": 188, + "ound": 189, + "own": 190, + "would": 191, + "ts": 192, + "what": 193, + "qu": 194, + "ally": 195, + "ight": 196, + "ck": 197, + "gr": 198, + "when": 199, + "ven": 200, + "can": 201, + "ough": 202, + "ine": 203, + "end": 204, + "per": 205, + "ous": 206, + "od": 207, + "ide": 208, + "know": 209, + "ty": 210, + "very": 211, + "si": 212, + "ak": 213, + "who": 214, + "about": 215, + "ill": 216, + "them": 217, + "est": 218, + "red": 219, + "ye": 220, + "could": 221, + "ong": 222, + "your": 223, + "their": 224, + "em": 225, + "just": 226, + "other": 227, + "into": 228, + "any": 229, + "whi": 230, + "um": 231, + "tw": 232, + "ast": 233, + "der": 234, + "did": 235, + "ie": 236, + "been": 237, + "ace": 238, + "ink": 239, + "ity": 240, + "back": 241, + "ting": 242, + "br": 243, + "more": 244, + "ake": 245, + "pp": 246, + "then": 247, + "sp": 248, + "el": 249, + "use": 250, + "bl": 251, + "said": 252, + "over": 253, + "get": 254, + "[START]": 255, + "\"": 256, + "#": 257, + "$": 258, + "%": 259, + "&": 260, + "*": 261, + "+": 262, + "0": 263, + "1": 264, + "2": 265, + "3": 266, + "4": 267, + "5": 268, + "6": 269, + "7": 270, + "8": 271, + "9": 272, + "<": 273, + "=": 274, + ">": 275, + "@": 276, + "A": 277, + "B": 278, + "C": 279, + "D": 280, + "E": 281, + "F": 282, + "G": 283, + "H": 284, + "I": 285, + "J": 286, + "K": 287, + "L": 288, + "M": 289, + "N": 290, + "O": 291, + "P": 292, + "Q": 293, + "R": 294, + "S": 295, + "T": 296, + "U": 297, + "V": 298, + "W": 299, + "X": 300, + "Y": 301, + "Z": 302, + "[": 303, + "\\": 304, + "]": 305, + "^": 306, + "_": 307, + "`": 308, + "{": 309, + "|": 310, + "}": 311, + "~": 312, + "‐": 313, + "‑": 314, + "‒": 315, + "–": 316, + "—": 317, + "―": 318, + "‖": 319, + "‗": 320, + "‘": 321, + "’": 322, + "‚": 323, + "‛": 324, + "“": 325, + "”": 326, + "„": 327, + "‟": 328, + " ": 329, + "¡": 330, + "¢": 331, + "£": 332, + "¤": 333, + "¥": 334, + "¦": 335, + "§": 336, + "¨": 337, + "©": 338, + "ª": 339, + "«": 340, + "¬": 341, + "­": 342, + "®": 343, + "¯": 344, + "°": 345, + "±": 346, + "²": 347, + "³": 348, + "´": 349, + "µ": 350, + "¶": 351, + "·": 352, + "¸": 353, + "¹": 354, + "º": 355, + "»": 356, + "¼": 357, + "½": 358, + "¾": 359, + "¿": 360, + "À": 361, + "Á": 362, + "Â": 363, + "Ã": 364, + "Ä": 365, + "Å": 366, + "Æ": 367, + "Ç": 368, + "È": 369, + "É": 370, + "Ê": 371, + "Ë": 372, + "Ì": 373, + "Í": 374, + "Î": 375, + "Ï": 376, + "Ð": 377, + "Ñ": 378, + "Ò": 379, + "Ó": 380, + "Ô": 381, + "Õ": 382, + "Ö": 383, + "×": 384, + "Ø": 385, + "Ù": 386, + "Ú": 387, + "Û": 388, + "Ü": 389, + "Ý": 390, + "Þ": 391, + "ß": 392, + "à": 393, + "á": 394, + "â": 395, + "ã": 396, + "ä": 397, + "å": 398, + "æ": 399, + "ç": 400, + "è": 401, + "é": 402, + "ê": 403, + "ë": 404, + "ì": 405, + "í": 406, + "î": 407, + "ï": 408, + "ð": 409, + "ñ": 410, + "ò": 411, + "ó": 412, + "ô": 413, + "õ": 414, + "ö": 415, + "÷": 416, + "ø": 417, + "ù": 418, + "ú": 419, + "û": 420, + "ü": 421, + "ý": 422, + "þ": 423, + "ÿ": 424, + "ɐ": 425, + "ɑ": 426, + "ɒ": 427, + "ɓ": 428, + "ɔ": 429, + "ɕ": 430, + "ɖ": 431, + "ɗ": 432, + "ɘ": 433, + "ə": 434, + "ɚ": 435, + "ɛ": 436, + "ɜ": 437, + "ɝ": 438, + "ɞ": 439, + "ɟ": 440, + "ɠ": 441, + "ɡ": 442, + "ɢ": 443, + "ɣ": 444, + "ɤ": 445, + "ɥ": 446, + "ɦ": 447, + "ɧ": 448, + "ɨ": 449, + "ɩ": 450, + "ɪ": 451, + "ɫ": 452, + "ɬ": 453, + "ɭ": 454, + "ɮ": 455, + "ɯ": 456, + "ɰ": 457, + "ɱ": 458, + "ɲ": 459, + "ɳ": 460, + "ɴ": 461, + "ɵ": 462, + "ɶ": 463, + "ɷ": 464, + "ɸ": 465, + "ɹ": 466, + "ɺ": 467, + "ɻ": 468, + "ɼ": 469, + "ɽ": 470, + "ɾ": 471, + "ɿ": 472, + "ʀ": 473, + "ʁ": 474, + "ʂ": 475, + "ʃ": 476, + "ʄ": 477, + "ʅ": 478, + "ʆ": 479, + "ʇ": 480, + "ʈ": 481, + "ʉ": 482, + "ʊ": 483, + "ʋ": 484, + "ʌ": 485, + "ʍ": 486, + "ʎ": 487, + "ʏ": 488, + "ʐ": 489, + "ʑ": 490, + "ʒ": 491, + "ʓ": 492, + "ʔ": 493, + "ʕ": 494, + "ʖ": 495, + "ʗ": 496, + "ʘ": 497, + "ʙ": 498, + "ʚ": 499, + "ʛ": 500, + "ʜ": 501, + "ʝ": 502, + "ʞ": 503, + "ʟ": 504, + "ʠ": 505, + "ʡ": 506, + "ʢ": 507, + "ʣ": 508, + "ʤ": 509, + "ʥ": 510, + "ʦ": 511, + "ʧ": 512, + "ʨ": 513, + "ʩ": 514, + "ʪ": 515, + "ʫ": 516, + "ʬ": 517, + "ʭ": 518, + "ʮ": 519, + "ʯ": 520, + "ʰ": 521, + "ʱ": 522, + "ʲ": 523, + "ʳ": 524, + "ʴ": 525, + "ʵ": 526, + "ʶ": 527, + "ʷ": 528, + "ʸ": 529, + "ʹ": 530, + "ʺ": 531, + "ʻ": 532, + "ʼ": 533, + "ʽ": 534, + "ʾ": 535, + "ʿ": 536, + "ˀ": 537, + "ˁ": 538, + "˂": 539, + "˃": 540, + "˄": 541, + "˅": 542, + "ˆ": 543, + "ˇ": 544, + "ˈ": 545, + "ˉ": 546, + "ˊ": 547, + "ˋ": 548, + "ˌ": 549, + "ˍ": 550, + "ˎ": 551, + "ˏ": 552, + "ː": 553, + "ˑ": 554, + "˒": 555, + "˓": 556, + "˔": 557, + "˕": 558, + "˖": 559, + "˗": 560, + "˘": 561, + "˙": 562, + "˚": 563, + "˛": 564, + "˜": 565, + "˝": 566, + "˞": 567, + "˟": 568, + "ˠ": 569, + "ˡ": 570, + "ˢ": 571, + "ˣ": 572, + "ˤ": 573, + "˥": 574, + "˦": 575, + "˧": 576, + "˨": 577, + "˩": 578, + "˪": 579, + "˫": 580, + "ˬ": 581, + "˭": 582, + "ˮ": 583, + "˯": 584, + "˰": 585, + "˱": 586, + "˲": 587, + "˳": 588, + "˴": 589, + "˵": 590, + "˶": 591, + "˷": 592, + "˸": 593, + "˹": 594, + "˺": 595, + "˻": 596, + "˼": 597, + "˽": 598, + "˾": 599, + "˿": 600, + "ā": 601, + "ō": 602, + "…": 603, + "[UH]": 604, + "[UM]": 605, + "[giggle]": 606, + "[laughter]": 607, + "[guffaw]": 608, + "[inhale]": 609, + "[exhale]": 610, + "[sigh]": 611, + "[cry]": 612, + "[bark]": 613, + "[howl]": 614, + "[meow]": 615, + "[singing]": 616, + "[music]": 617, + "[whistle]": 618, + "[humming]": 619, + "[gasp]": 620, + "[groan]": 621, + "[whisper]": 622, + "[mumble]": 623, + "[sniff]": 624, + "[sneeze]": 625, + "[cough]": 626, + "[snore]": 627, + "[chew]": 628, + "[sip]": 629, + "[clear_throat]": 630, + "[kiss]": 631, + "[shhh]": 632, + "[gibberish]": 633, + "[fr]": 634, + "[es]": 635, + "[de]": 636, + "[it]": 637, + "[ipa]": 638, + "[end_of_label]": 639, + "ŋ": 640, + "ᵻ": 641, + "θ": 642, + "̩": 643, + "\u0303": 644, + "ɑː": 645, + "iː": 646, + "uː": 647, + "ɜː": 648, + "ɔː": 649, + "oː": 650, + "eɪ": 651, + "oʊ": 652, + "aɪ": 653, + "aʊ": 654, + "ɔɪ": 655, + "dʒ": 656, + "tʃ": 657, + "ɪŋ": 658, + "ᵻd": 659, + "ˈiː": 660, + "ˌiː": 661, + "ˈɪ": 662, + "ˌɪ": 663, + "ˈeɪ": 664, + "ˌeɪ": 665, + "ˈɛ": 666, + "ˌɛ": 667, + "ˈæ": 668, + "ˌæ": 669, + "ˈɑː": 670, + "ˌɑː": 671, + "ˈɔː": 672, + "ˌɔː": 673, + "oːɹ": 674, + "ˈoːɹ": 675, + "ˌoːɹ": 676, + "ˈoʊ": 677, + "ˌoʊ": 678, + "ˈʊ": 679, + "ˌʊ": 680, + "ˈuː": 681, + "ˌuː": 682, + "ˈɜː": 683, + "ˌɜː": 684, + "ˈʌ": 685, + "ˌʌ": 686, + "ˈaɪ": 687, + "ˌaɪ": 688, + "ˈaʊ": 689, + "ˌaʊ": 690, + "ˈɔɪ": 691, + "ˌɔɪ": 692, + "ˈɚ": 693, + "ˌɐ": 694, + "[PLACEHOLDER55]": 695, + "[PLACEHOLDER56]": 696, + "[PLACEHOLDER57]": 697, + "[PLACEHOLDER58]": 698, + "[PLACEHOLDER59]": 699, + "[PLACEHOLDER60]": 700, + "[PLACEHOLDER61]": 701, + "[PLACEHOLDER62]": 702, + "[PLACEHOLDER63]": 703 + }, + "merges": [ + "t h", + "i n", + "th e", + "a n", + "e r", + "o u", + "r e", + "o n", + "a t", + "e d", + "e n", + "t o", + "in g", + "an d", + "i s", + "a s", + "a l", + "o r", + "o f", + "a r", + "i t", + "e s", + "h e", + "s t", + "l e", + "o m", + "s e", + "b e", + "a d", + "o w", + "l y", + "c h", + "w h", + "th at", + "y ou", + "l i", + "v e", + "a c", + "t i", + "l d", + "m e", + "w as", + "g h", + "i d", + "l l", + "w i", + "en t", + "f or", + "a y", + "r o", + "v er", + "i c", + "h er", + "k e", + "h is", + "n o", + "u t", + "u n", + "i r", + "l o", + "w e", + "r i", + "h a", + "wi th", + "gh t", + "ou t", + "i m", + "i on", + "al l", + "a b", + "on e", + "n e", + "g e", + "ou ld", + "t er", + "m o", + "h ad", + "c e", + "s he", + "g o", + "s h", + "u r", + "a m", + "s o", + "p e", + "m y", + "d e", + "a re", + "b ut", + "om e", + "f r", + "the r", + "f e", + "s u", + "d o", + "c on", + "t e", + "a in", + "er e", + "p o", + "i f", + "the y", + "u s", + "a g", + "t r", + "n ow", + "ou n", + "th is", + "ha ve", + "no t", + "s a", + "i l", + "u p", + "th ing", + "fr om", + "a p", + "h im", + "ac k", + "at ion", + "an t", + "ou r", + "o p", + "li ke", + "u st", + "es s", + "b o", + "o k", + "u l", + "in d", + "e x", + "c om", + "s ome", + "the re", + "er s", + "c o", + "re s", + "m an", + "ar d", + "p l", + "w or", + "w ay", + "ti on", + "f o", + "c a", + "w ere", + "b y", + "at e", + "p ro", + "t ed", + "oun d", + "ow n", + "w ould", + "t s", + "wh at", + "q u", + "al ly", + "i ght", + "c k", + "g r", + "wh en", + "v en", + "c an", + "ou gh", + "in e", + "en d", + "p er", + "ou s", + "o d", + "id e", + "k now", + "t y", + "ver y", + "s i", + "a k", + "wh o", + "ab out", + "i ll", + "the m", + "es t", + "re d", + "y e", + "c ould", + "on g", + "you r", + "the ir", + "e m", + "j ust", + "o ther", + "in to", + "an y", + "wh i", + "u m", + "t w", + "as t", + "d er", + "d id", + "i e", + "be en", + "ac e", + "in k", + "it y", + "b ack", + "t ing", + "b r", + "mo re", + "a ke", + "p p", + "the n", + "s p", + "e l", + "u se", + "b l", + "sa id", + "o ver", + "ge t", + "ɑ ː", + "i ː", + "u ː", + "ɜ ː", + "ɔ ː", + "o ː", + "e ɪ", + "o ʊ", + "a ɪ", + "a ʊ", + "ɔ ɪ", + "d ʒ", + "t ʃ", + "ɪ ŋ", + "ᵻ d", + "ˈ iː", + "ˌ iː", + "ˈ ɪ", + "ˌ ɪ", + "ˈ eɪ", + "ˌ eɪ", + "ˈ ɛ", + "ˌ ɛ", + "ˈ æ", + "ˌ æ", + "ˈ ɑː", + "ˌ ɑː", + "ˈ ɔː", + "ˌ ɔː", + "oː ɹ", + "ˈ oːɹ", + "ˌ oːɹ", + "ˈ oʊ", + "ˌ oʊ", + "ˈ ʊ", + "ˌ ʊ", + "ˈ uː", + "ˌ uː", + "ˈ ɜː", + "ˌ ɜː", + "ˈ ʌ", + "ˌ ʌ", + "ˈ aɪ", + "ˌ aɪ", + "ˈ aʊ", + "ˌ aʊ", + "ˈ ɔɪ", + "ˌ ɔɪ", + "ˈ ɚ", + "ˌ ɐ" + ] + } +} \ No newline at end of file diff --git a/ru/chatterbox-ru-t3k/train_results.json b/ru/chatterbox-ru-t3k/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..1dcedca39bcb5eb43e0567bb35d62b4e44f8cefb --- /dev/null +++ b/ru/chatterbox-ru-t3k/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 8.0, + "total_flos": 0.0, + "train_loss": 2.4700030918121336, + "train_runtime": 33124.2258, + "train_samples_per_second": 2.414, + "train_steps_per_second": 0.06 +} \ No newline at end of file diff --git a/ru/chatterbox-ru-t3k/trainer_state.json b/ru/chatterbox-ru-t3k/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0e2ef0313d1b1af049e122bba23161056c9377 --- /dev/null +++ b/ru/chatterbox-ru-t3k/trainer_state.json @@ -0,0 +1,259 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.0, + "eval_steps": 200, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0, + "eval_runtime": 17.4243, + "eval_samples_per_second": 0.287, + "eval_steps_per_second": 0.057, + "step": 0 + }, + { + "epoch": 0.4, + "grad_norm": 10.4270658493042, + "learning_rate": 1.9600000000000002e-05, + "loss": 6.4004, + "step": 100 + }, + { + "epoch": 0.8, + "grad_norm": 9.59022331237793, + "learning_rate": 1.8968421052631582e-05, + "loss": 5.1737, + "step": 200 + }, + { + "epoch": 0.8, + "eval_runtime": 5.1058, + "eval_samples_per_second": 0.979, + "eval_steps_per_second": 0.196, + "step": 200 + }, + { + "epoch": 1.2, + "grad_norm": 16.000308990478516, + "learning_rate": 1.7915789473684214e-05, + "loss": 4.7286, + "step": 300 + }, + { + "epoch": 1.6, + "grad_norm": 12.360644340515137, + "learning_rate": 1.6863157894736844e-05, + "loss": 4.442, + "step": 400 + }, + { + "epoch": 1.6, + "eval_runtime": 5.1817, + "eval_samples_per_second": 0.965, + "eval_steps_per_second": 0.193, + "step": 400 + }, + { + "epoch": 2.0, + "grad_norm": 16.141651153564453, + "learning_rate": 1.5810526315789473e-05, + "loss": 4.3198, + "step": 500 + }, + { + "epoch": 2.4, + "grad_norm": 18.464941024780273, + "learning_rate": 1.4757894736842106e-05, + "loss": 3.5096, + "step": 600 + }, + { + "epoch": 2.4, + "eval_runtime": 5.173, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.193, + "step": 600 + }, + { + "epoch": 2.8, + "grad_norm": 22.829710006713867, + "learning_rate": 1.371578947368421e-05, + "loss": 3.485, + "step": 700 + }, + { + "epoch": 3.2, + "grad_norm": 29.9942569732666, + "learning_rate": 1.2663157894736843e-05, + "loss": 3.032, + "step": 800 + }, + { + "epoch": 3.2, + "eval_runtime": 5.362, + "eval_samples_per_second": 0.932, + "eval_steps_per_second": 0.186, + "step": 800 + }, + { + "epoch": 3.6, + "grad_norm": 36.433963775634766, + "learning_rate": 1.1621052631578948e-05, + "loss": 2.5797, + "step": 900 + }, + { + "epoch": 4.0, + "grad_norm": 24.55065155029297, + "learning_rate": 1.0568421052631579e-05, + "loss": 2.5185, + "step": 1000 + }, + { + "epoch": 4.0, + "eval_runtime": 4.999, + "eval_samples_per_second": 1.0, + "eval_steps_per_second": 0.2, + "step": 1000 + }, + { + "epoch": 4.4, + "grad_norm": 25.797325134277344, + "learning_rate": 9.515789473684212e-06, + "loss": 1.6965, + "step": 1100 + }, + { + "epoch": 4.8, + "grad_norm": 32.363624572753906, + "learning_rate": 8.463157894736843e-06, + "loss": 1.7272, + "step": 1200 + }, + { + "epoch": 4.8, + "eval_runtime": 5.5586, + "eval_samples_per_second": 0.9, + "eval_steps_per_second": 0.18, + "step": 1200 + }, + { + "epoch": 5.2, + "grad_norm": 36.48984146118164, + "learning_rate": 7.410526315789475e-06, + "loss": 1.342, + "step": 1300 + }, + { + "epoch": 5.6, + "grad_norm": 33.30397415161133, + "learning_rate": 6.357894736842106e-06, + "loss": 1.0472, + "step": 1400 + }, + { + "epoch": 5.6, + "eval_runtime": 5.6569, + "eval_samples_per_second": 0.884, + "eval_steps_per_second": 0.177, + "step": 1400 + }, + { + "epoch": 6.0, + "grad_norm": 29.339319229125977, + "learning_rate": 5.305263157894738e-06, + "loss": 1.0126, + "step": 1500 + }, + { + "epoch": 6.4, + "grad_norm": 48.872501373291016, + "learning_rate": 4.252631578947369e-06, + "loss": 0.5935, + "step": 1600 + }, + { + "epoch": 6.4, + "eval_runtime": 5.4682, + "eval_samples_per_second": 0.914, + "eval_steps_per_second": 0.183, + "step": 1600 + }, + { + "epoch": 6.8, + "grad_norm": 24.40644073486328, + "learning_rate": 3.2000000000000003e-06, + "loss": 0.5881, + "step": 1700 + }, + { + "epoch": 7.2, + "grad_norm": 13.23859691619873, + "learning_rate": 2.1473684210526317e-06, + "loss": 0.4895, + "step": 1800 + }, + { + "epoch": 7.2, + "eval_runtime": 5.2891, + "eval_samples_per_second": 0.945, + "eval_steps_per_second": 0.189, + "step": 1800 + }, + { + "epoch": 7.6, + "grad_norm": 13.824739456176758, + "learning_rate": 1.0947368421052632e-06, + "loss": 0.3585, + "step": 1900 + }, + { + "epoch": 8.0, + "grad_norm": 13.559722900390625, + "learning_rate": 4.2105263157894737e-08, + "loss": 0.3556, + "step": 2000 + }, + { + "epoch": 8.0, + "eval_runtime": 5.0911, + "eval_samples_per_second": 0.982, + "eval_steps_per_second": 0.196, + "step": 2000 + }, + { + "epoch": 8.0, + "step": 2000, + "total_flos": 0.0, + "train_loss": 2.4700030918121336, + "train_runtime": 33124.2258, + "train_samples_per_second": 2.414, + "train_steps_per_second": 0.06 + } + ], + "logging_steps": 100, + "max_steps": 2000, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 400, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 20, + "trial_name": null, + "trial_params": null +} diff --git a/ru/chatterbox-ru-t3k/training_args.bin b/ru/chatterbox-ru-t3k/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d78823461d97a70d49679184c240c4e863ac5dfb --- /dev/null +++ b/ru/chatterbox-ru-t3k/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad1b851ec11d67e4d085f4a6f854146e4156257df5267eca566491537523572 +size 5304 diff --git a/ru/chatterbox-ru-t3k/ve.safetensors b/ru/chatterbox-ru-t3k/ve.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0713f1587e627f23d93121e154a7de490d549dfb --- /dev/null +++ b/ru/chatterbox-ru-t3k/ve.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0921cab452fa278bc25cd23ffd59d36f816d7dc5181dd1bef9751a7fb61f63c +size 5695784