hootan09

vladislavbro commited on Dec 3, 2025

Commit

07ec72d

verified ·

0 Parent(s):

Duplicate from onnx-community/chatterbox-ONNX

Browse files

Co-authored-by: Vladislav Bronzov <vladislavbro@users.noreply.huggingface.co>

Files changed (22) hide show

.gitattributes +45 -0
README.md +248 -0
config.json +45 -0
default_voice.wav +3 -0
generation_config.json +8 -0
onnx/conditional_decoder.onnx +3 -0
onnx/conditional_decoder.onnx_data +3 -0
onnx/embed_tokens.onnx +3 -0
onnx/embed_tokens.onnx_data +3 -0
onnx/language_model.onnx +3 -0
onnx/language_model.onnx_data +3 -0
onnx/language_model_fp16.onnx +3 -0
onnx/language_model_fp16.onnx_data +3 -0
onnx/language_model_q4.onnx +3 -0
onnx/language_model_q4.onnx_data +3 -0
onnx/language_model_q4f16.onnx +3 -0
onnx/language_model_q4f16.onnx_data +3 -0
onnx/speech_encoder.onnx +3 -0
onnx/speech_encoder.onnx_data +3 -0
preprocessor_config.json +5 -0
tokenizer.json +1611 -0
tokenizer_config.json +12 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,45 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+llama3.onnx.data filter=lfs diff=lfs merge=lfs -text
+llama3.data filter=lfs diff=lfs merge=lfs -text
+onnx/language_model.onnx_data filter=lfs diff=lfs merge=lfs -text
+default_voice.wav filter=lfs diff=lfs merge=lfs -text
+onnx/conditional_decoder.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/embed_tokens.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/speech_encoder.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/language_model_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/language_model_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
+onnx/language_model_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,248 @@

+---
+license: mit
+language:
+- en
+pipeline_tag: text-to-speech
+tags:
+- text-to-speech
+- speech
+- speech-generation
+- voice-cloning
+library_name: Chatterbox
+base_model:
+- ResembleAI/chatterbox
+---
+<img width="800" alt="cb-big2" src="https://github.com/user-attachments/assets/bd8c5f03-e91d-4ee5-b680-57355da204d1" />
+<h1 style="font-size: 32px">Chatterbox TTS</h1>
+<div style="display: flex; align-items: center; gap: 12px">
+  <a href="https://resemble-ai.github.io/chatterbox_demopage/">
+    <img src="https://img.shields.io/badge/listen-demo_samples-blue" alt="Listen to Demo Samples" />
+  </a>
+  <a href="https://huggingface.co/spaces/ResembleAI/Chatterbox">
+    <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/open-in-hf-spaces-sm.svg" alt="Open in HF Spaces" />
+  </a>
+  <a href="https://podonos.com/resembleai/chatterbox">
+    <img src="https://static-public.podonos.com/badges/insight-on-pdns-sm-dark.svg" alt="Insight on Podos" />
+  </a>
+</div>
+<div style="display: flex; align-items: center; gap: 8px;">
+  <img width="100" alt="resemble-logo-horizontal" src="https://github.com/user-attachments/assets/35cf756b-3506-4943-9c72-c05ddfa4e525" />
+</div>
+**Chatterbox** [Resemble AI's](https://resemble.ai) production-grade open source TTS model. Chatterbox supports **English** out of the box. Licensed under MIT, Chatterbox has been benchmarked against leading closed-source systems like ElevenLabs, and is consistently preferred in side-by-side evaluations.
+Whether you're working on memes, videos, games, or AI agents, Chatterbox brings your content to life. It's also the first open source TTS model to support **emotion exaggeration control**, a powerful feature that makes your voices stand out.
+Chatterbox is provided in an exported ONNX format, enabling fast and portable inference with ONNX Runtime across platforms.
+# Key Details
+- SoTA zeroshot English TTS
+- 0.5B Llama backbone
+- Unique exaggeration/intensity control
+- Ultra-stable with alignment-informed inference
+- Trained on 0.5M hours of cleaned data
+- Watermarked outputs (optional)
+- Easy voice conversion script using onnxruntime
+- [Outperforms ElevenLabs](https://podonos.com/resembleai/chatterbox)
+# Tips
+- **General Use (TTS and Voice Agents):**
+  - The default settings (`exaggeration=0.5`, `cfg=0.5`) work well for most prompts.
+- **Expressive or Dramatic Speech:**
+  - Try increase `exaggeration` to around `0.7` or higher.
+  - Higher `exaggeration` tends to speed up speech;
+# Usage
+[Link to GitHub ONNX Export and Inference script](https://github.com/VladOS95-cyber/onnx_conversion_scripts/tree/main/chatterbox)
+```python
+# !pip install --upgrade onnxruntime==1.22.1 huggingface_hub==0.34.4 transformers==4.46.3 numpy==2.2.6 tqdm==4.67.1 librosa==0.11.0 soundfile==0.13.1 resemble-perth==1.0.1
+import onnxruntime
+from huggingface_hub import hf_hub_download
+from transformers import AutoTokenizer
+import numpy as np
+from tqdm import tqdm
+import librosa
+import soundfile as sf
+S3GEN_SR = 24000
+START_SPEECH_TOKEN = 6561
+STOP_SPEECH_TOKEN = 6562
+class RepetitionPenaltyLogitsProcessor:
+    def __init__(self, penalty: float):
+        if not isinstance(penalty, float) or not (penalty > 0):
+            raise ValueError(f"`penalty` must be a strictly positive float, but is {penalty}")
+        self.penalty = penalty
+    def __call__(self, input_ids: np.ndarray, scores: np.ndarray) -> np.ndarray:
+        score = np.take_along_axis(scores, input_ids, axis=1)
+        score = np.where(score < 0, score * self.penalty, score / self.penalty)
+        scores_processed = scores.copy()
+        np.put_along_axis(scores_processed, input_ids, score, axis=1)
+        return scores_processed
+def run_inference(
+    text="The Lord of the Rings is the greatest work of literature.",
+    target_voice_path=None,
+    max_new_tokens = 256,
+    exaggeration=0.5,
+    output_dir="converted",
+    output_file_name="output.wav",
+    apply_watermark=True,
+):
+    model_id = "onnx-community/chatterbox-onnx"
+    if not target_voice_path:
+        target_voice_path = hf_hub_download(repo_id=model_id, filename="default_voice.wav", local_dir=output_dir)
+    ## Load model
+    speech_encoder_path = hf_hub_download(repo_id=model_id, filename="speech_encoder.onnx", local_dir=output_dir, subfolder='onnx')
+    hf_hub_download(repo_id=model_id, filename="speech_encoder.onnx_data", local_dir=output_dir, subfolder='onnx')
+    embed_tokens_path = hf_hub_download(repo_id=model_id, filename="embed_tokens.onnx", local_dir=output_dir, subfolder='onnx')
+    hf_hub_download(repo_id=model_id, filename="embed_tokens.onnx_data", local_dir=output_dir, subfolder='onnx')
+    conditional_decoder_path = hf_hub_download(repo_id=model_id, filename="conditional_decoder.onnx", local_dir=output_dir, subfolder='onnx')
+    hf_hub_download(repo_id=model_id, filename="conditional_decoder.onnx_data", local_dir=output_dir, subfolder='onnx')
+    language_model_path = hf_hub_download(repo_id=model_id, filename="language_model.onnx", local_dir=output_dir, subfolder='onnx')
+    hf_hub_download(repo_id=model_id, filename="language_model.onnx_data", local_dir=output_dir, subfolder='onnx')
+    # # Start inferense sessions
+    speech_encoder_session = onnxruntime.InferenceSession(speech_encoder_path)
+    embed_tokens_session = onnxruntime.InferenceSession(embed_tokens_path)
+    llama_with_past_session = onnxruntime.InferenceSession(language_model_path)
+    cond_decoder_session = onnxruntime.InferenceSession(conditional_decoder_path)
+    def execute_text_to_audio_inference(text):
+        print("Start inference script...")
+        audio_values, _ = librosa.load(target_voice_path, sr=S3GEN_SR)
+        audio_values = audio_values[np.newaxis, :].astype(np.float32)
+        ## Prepare input
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
+        input_ids = tokenizer(text, return_tensors="np")["input_ids"].astype(np.int64)
+        position_ids = np.where(
+            input_ids >= START_SPEECH_TOKEN,
+            0,
+            np.arange(input_ids.shape[1])[np.newaxis, :] - 1
+        )
+        ort_embed_tokens_inputs = {
+            "input_ids": input_ids,
+            "position_ids": position_ids,
+            "exaggeration": np.array([exaggeration], dtype=np.float32)
+        }
+        ## Instantiate the logits processors.
+        repetition_penalty = 1.2
+        repetition_penalty_processor = RepetitionPenaltyLogitsProcessor(penalty=repetition_penalty)
+        num_hidden_layers = 30
+        num_key_value_heads = 16
+        head_dim = 64
+        generate_tokens = np.array([[START_SPEECH_TOKEN]], dtype=np.long)
+        # ---- Generation Loop using kv_cache ----
+        for i in tqdm(range(max_new_tokens), desc="Sampling", dynamic_ncols=True):
+            inputs_embeds = embed_tokens_session.run(None, ort_embed_tokens_inputs)[0]
+            if i == 0:
+                ort_speech_encoder_input = {
+                    "audio_values": audio_values,
+                }
+                cond_emb, prompt_token, ref_x_vector, prompt_feat = speech_encoder_session.run(None, ort_speech_encoder_input)
+                inputs_embeds = np.concatenate((cond_emb, inputs_embeds), axis=1)
+                ## Prepare llm inputs
+                batch_size, seq_len, _ = inputs_embeds.shape
+                past_key_values = {
+                    f"past_key_values.{layer}.{kv}": np.zeros([batch_size, num_key_value_heads, 0, head_dim], dtype=np.float32)
+                    for layer in range(num_hidden_layers)
+                    for kv in ("key", "value")
+                }
+                attention_mask = np.ones((batch_size, seq_len), dtype=np.int64)
+            logits, *present_key_values = llama_with_past_session.run(None, dict(
+                inputs_embeds=inputs_embeds,
+                attention_mask=attention_mask,
+                **past_key_values,
+            ))
+            logits = logits[:, -1, :]
+            next_token_logits = repetition_penalty_processor(generate_tokens, logits)
+            next_token = np.argmax(next_token_logits, axis=-1, keepdims=True).astype(np.int64)
+            generate_tokens = np.concatenate((generate_tokens, next_token), axis=-1)
+            if (next_token.flatten() == STOP_SPEECH_TOKEN).all():
+                break
+            # Get embedding for the new token.
+            position_ids = np.full(
+                (input_ids.shape[0], 1),
+                i + 1,
+                dtype=np.int64,
+            )
+            ort_embed_tokens_inputs["input_ids"] = next_token
+            ort_embed_tokens_inputs["position_ids"] = position_ids
+            ## Update values for next generation loop
+            attention_mask = np.concatenate([attention_mask, np.ones((batch_size, 1), dtype=np.int64)], axis=1)
+            for j, key in enumerate(past_key_values):
+                past_key_values[key] = present_key_values[j]
+        speech_tokens = generate_tokens[:, 1:-1]
+        speech_tokens = np.concatenate([prompt_token, speech_tokens], axis=1)
+        return speech_tokens, ref_x_vector, prompt_feat
+    speech_tokens, speaker_embeddings, speaker_features = execute_text_to_audio_inference(text)
+    cond_incoder_input = {
+        "speech_tokens": speech_tokens,
+        "speaker_embeddings": speaker_embeddings,
+        "speaker_features": speaker_features,
+    }
+    wav = cond_decoder_session.run(None, cond_incoder_input)[0]
+    wav = np.squeeze(wav, axis=0)
+    # Optional: Apply watermark
+    if apply_watermark:
+        import perth
+        watermarker = perth.PerthImplicitWatermarker()
+        wav = watermarker.apply_watermark(wav, sample_rate=S3GEN_SR)
+    sf.write(output_file_name, wav, S3GEN_SR)
+    print(f"{output_file_name} was successfully saved")
+if __name__ == "__main__":
+    run_inference(
+        text="Ezreal and Jinx teamed up with Ahri, Yasuo, and Teemo to take down the enemy's Nexus in an epic late-game pentakill.",
+        exaggeration=0.5,
+        output_file_name="output.wav",
+        apply_watermark=False,
+    )
+```
+# Acknowledgements
+- [Xenova](https://huggingface.co/Xenova)
+- [Vladislav Bronzov](https://github.com/VladOS95-cyber)
+- [Resemble AI](https://github.com/resemble-ai/chatterbox)
+# Built-in PerTh Watermarking for Responsible AI
+Every audio file generated by Chatterbox includes [Resemble AI's Perth (Perceptual Threshold) Watermarker](https://github.com/resemble-ai/perth) - imperceptible neural watermarks that survive MP3 compression, audio editing, and common manipulations while maintaining nearly 100% detection accuracy.
+# Disclaimer
+Don't use this model to do bad things. Prompts are sourced from freely available data on the internet.

config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "model_type": "chatterbox",
+  "text_config": {
+    "architectures": [
+      "LlamaForCausalLM"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "bos_token_id": 1,
+    "eos_token_id": 2,
+    "head_dim": 64,
+    "hidden_act": "silu",
+    "hidden_size": 1024,
+    "initializer_range": 0.02,
+    "intermediate_size": 4096,
+    "max_position_embeddings": 131072,
+    "mlp_bias": false,
+    "model_type": "llama",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 30,
+    "num_key_value_heads": 16,
+    "pretraining_tp": 1,
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": {
+      "factor": 8.0,
+      "high_freq_factor": 4.0,
+      "low_freq_factor": 1.0,
+      "original_max_position_embeddings": 8192,
+      "rope_type": "llama3"
+    },
+    "rope_theta": 500000.0,
+    "tie_word_embeddings": false,
+    "torch_dtype": "float32",
+    "transformers_version": "4.46.3",
+    "use_cache": true,
+    "vocab_size": 8194
+  },
+  "transformers.js_config": {
+    "use_external_data_format": true,
+    "kv_cache_dtype": {
+      "fp16": "float16",
+      "q4f16": "float16"
+    }
+  }
+}

default_voice.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ebc531cdaba358a327099c1c4f0448026719957bcf4d8e9868767f227e02f4e
+size 714320

generation_config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "bos_token_id": 1,
+  "eos_token_id": [
+    2,
+    6562
+  ],
+  "repetition_penalty": 1.2
+}

onnx/conditional_decoder.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1656d0d31332bae1854839959a3139300ebb67c178651dfa3f8c5fbfa5351351
+size 6350448

onnx/conditional_decoder.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51d58345a272747665ec9d5bb61e01835258a940e321a288582ac4c18cf01b5a
+size 533970816

onnx/embed_tokens.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:160722ec14789f616abdb1e31916cbbf9223c03fde0ab546d64ca74fb72e430b
+size 13286

onnx/embed_tokens.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:898c563c3a5ca1b9ea10ce89b0cdcf252b0bb5ab460dfc4eadea003b56e5d2ee
+size 61640704

onnx/language_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:861a34585605e8ad671051788afc495dcbeaee833a41523a1b33aded9c3babc7
+size 171387

onnx/language_model.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:efe9a1173c40d50bc651cb96ebff9f23d6f20d5b3a11b0685510e3a3facdbcf1
+size 2080632832

onnx/language_model_fp16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c36a5bbbc2a4ed8c345033896612cd320fd0971a0f5e6447ab4cdd2d7f22e36
+size 172657

onnx/language_model_fp16.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d1b751ce1af740bbfa6cde9dc7d359afedba4b6de5a875b6fd199131216f16d
+size 1040316416

onnx/language_model_q4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f8cdca83b2493536cbf3acf421199808a3d68736f55f4eabd20ef8a99da4313
+size 227911

onnx/language_model_q4.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5203d1e83c159316f9923c5c83759f6a34f87be1322ce4ad0facd9fc4aef4790
+size 353621248

onnx/language_model_q4f16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3b78e9235be5e2e2a811e482399155cb30415f6d87c98c21d12bf48843fc928f
+size 229388

onnx/language_model_q4f16.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2480da95471e4cd41a6b1876686e65e8a594c82459f2e5d6fd7592a5bad4e6da
+size 304737408

onnx/speech_encoder.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f1c8a0f89b77bf9cd5dd8f2e034eb2c79dc00fe70d41196b28c257643b00ccb
+size 1184608

onnx/speech_encoder.onnx_data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04431dcef6325c54b02de2219845888b464bcd1f1ac2f8839c2fecd1ed2ef294
+size 591274880

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "feature_extractor_type": "ChatterboxFeatureExtractor",
+  "processor_class": "ChatterboxProcessor",
+  "sampling_rate": 24000
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,1611 @@

+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 0,
+      "special": true,
+      "content": "[STOP]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false
+    },
+    {
+      "id": 1,
+      "special": true,
+      "content": "[UNK]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false
+    },
+    {
+      "id": 255,
+      "special": true,
+      "content": "[START]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false
+    },
+    {
+      "id": 604,
+      "content": "[UH]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 605,
+      "content": "[UM]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 606,
+      "content": "[giggle]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 607,
+      "content": "[laughter]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 608,
+      "content": "[guffaw]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 609,
+      "content": "[inhale]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 610,
+      "content": "[exhale]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 611,
+      "content": "[sigh]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 612,
+      "content": "[cry]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 613,
+      "content": "[bark]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 614,
+      "content": "[howl]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 615,
+      "content": "[meow]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 616,
+      "content": "[singing]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 617,
+      "content": "[music]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 618,
+      "content": "[whistle]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 619,
+      "content": "[humming]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 620,
+      "content": "[gasp]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 621,
+      "content": "[groan]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 622,
+      "content": "[whisper]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 623,
+      "content": "[mumble]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 624,
+      "content": "[sniff]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 625,
+      "content": "[sneeze]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 626,
+      "content": "[cough]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 627,
+      "content": "[snore]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 628,
+      "content": "[chew]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 629,
+      "content": "[sip]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 630,
+      "content": "[clear_throat]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 631,
+      "content": "[kiss]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 632,
+      "content": "[shhh]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 633,
+      "content": "[gibberish]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 634,
+      "content": "[fr]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 635,
+      "content": "[es]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 636,
+      "content": "[de]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 637,
+      "content": "[it]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 638,
+      "content": "[ipa]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 639,
+      "content": "[end_of_label]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 695,
+      "content": "[PLACEHOLDER55]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 696,
+      "content": "[PLACEHOLDER56]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 697,
+      "content": "[PLACEHOLDER57]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 698,
+      "content": "[PLACEHOLDER58]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 699,
+      "content": "[PLACEHOLDER59]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 700,
+      "content": "[PLACEHOLDER60]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 701,
+      "content": "[PLACEHOLDER61]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 702,
+      "content": "[PLACEHOLDER62]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 703,
+      "content": "[PLACEHOLDER63]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 6561,
+      "content": "[START_SPEECH]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 6562,
+      "content": "[STOP_SPEECH]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 6563,
+      "content": "[EXAGGERATION]",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
+  "normalizer": {
+    "type": "Replace",
+    "pattern": {
+      "Regex": "\\s+"
+    },
+    "content": " "
+  },
+  "pre_tokenizer": null,
+  "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "SpecialToken": {
+          "id": "[EXAGGERATION]",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[START]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[STOP]",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[START_SPEECH]",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[START_SPEECH]",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "SpecialToken": {
+          "id": "[EXAGGERATION]",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[START]",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[STOP]",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[START_SPEECH]",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[START_SPEECH]",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[EXAGGERATION]",
+          "type_id": 1
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[START]",
+          "type_id": 1
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 1
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[STOP]",
+          "type_id": 1
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[START_SPEECH]",
+          "type_id": 1
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "[START_SPEECH]",
+          "type_id": 1
+        }
+      }
+    ],
+    "special_tokens": {
+      "[START]": {
+        "id": "[START]",
+        "ids": [
+          255
+        ],
+        "tokens": [
+          "[START]"
+        ]
+      },
+      "[STOP]": {
+        "id": "[STOP]",
+        "ids": [
+          0
+        ],
+        "tokens": [
+          "[STOP]"
+        ]
+      },
+      "[EXAGGERATION]": {
+        "id": "[EXAGGERATION]",
+        "ids": [
+          6563
+        ],
+        "tokens": [
+          "[EXAGGERATION]"
+        ]
+      },
+      "[START_SPEECH]": {
+        "id": "[START_SPEECH]",
+        "ids": [
+          6561
+        ],
+        "tokens": [
+          "[START_SPEECH]"
+        ]
+      }
+    }
+  },
+  "decoder": {
+    "type": "Fuse"
+  },
+  "model": {
+    "type": "BPE",
+    "dropout": null,
+    "unk_token": "[UNK]",
+    "continuing_subword_prefix": null,
+    "end_of_word_suffix": null,
+    "fuse_unk": false,
+    "vocab": {
+      "[STOP]": 0,
+      "[UNK]": 1,
+      " ": 2,
+      "!": 3,
+      "'": 4,
+      "(": 5,
+      ")": 6,
+      ",": 7,
+      "-": 8,
+      ".": 9,
+      "/": 10,
+      ":": 11,
+      ";": 12,
+      "?": 13,
+      "a": 14,
+      "b": 15,
+      "c": 16,
+      "d": 17,
+      "e": 18,
+      "f": 19,
+      "g": 20,
+      "h": 21,
+      "i": 22,
+      "j": 23,
+      "k": 24,
+      "l": 25,
+      "m": 26,
+      "n": 27,
+      "o": 28,
+      "p": 29,
+      "q": 30,
+      "r": 31,
+      "s": 32,
+      "t": 33,
+      "u": 34,
+      "v": 35,
+      "w": 36,
+      "x": 37,
+      "y": 38,
+      "z": 39,
+      "th": 40,
+      "in": 41,
+      "the": 42,
+      "an": 43,
+      "er": 44,
+      "ou": 45,
+      "re": 46,
+      "on": 47,
+      "at": 48,
+      "ed": 49,
+      "en": 50,
+      "to": 51,
+      "ing": 52,
+      "and": 53,
+      "is": 54,
+      "as": 55,
+      "al": 56,
+      "or": 57,
+      "of": 58,
+      "ar": 59,
+      "it": 60,
+      "es": 61,
+      "he": 62,
+      "st": 63,
+      "le": 64,
+      "om": 65,
+      "se": 66,
+      "be": 67,
+      "ad": 68,
+      "ow": 69,
+      "ly": 70,
+      "ch": 71,
+      "wh": 72,
+      "that": 73,
+      "you": 74,
+      "li": 75,
+      "ve": 76,
+      "ac": 77,
+      "ti": 78,
+      "ld": 79,
+      "me": 80,
+      "was": 81,
+      "gh": 82,
+      "id": 83,
+      "ll": 84,
+      "wi": 85,
+      "ent": 86,
+      "for": 87,
+      "ay": 88,
+      "ro": 89,
+      "ver": 90,
+      "ic": 91,
+      "her": 92,
+      "ke": 93,
+      "his": 94,
+      "no": 95,
+      "ut": 96,
+      "un": 97,
+      "ir": 98,
+      "lo": 99,
+      "we": 100,
+      "ri": 101,
+      "ha": 102,
+      "with": 103,
+      "ght": 104,
+      "out": 105,
+      "im": 106,
+      "ion": 107,
+      "all": 108,
+      "ab": 109,
+      "one": 110,
+      "ne": 111,
+      "ge": 112,
+      "ould": 113,
+      "ter": 114,
+      "mo": 115,
+      "had": 116,
+      "ce": 117,
+      "she": 118,
+      "go": 119,
+      "sh": 120,
+      "ur": 121,
+      "am": 122,
+      "so": 123,
+      "pe": 124,
+      "my": 125,
+      "de": 126,
+      "are": 127,
+      "but": 128,
+      "ome": 129,
+      "fr": 130,
+      "ther": 131,
+      "fe": 132,
+      "su": 133,
+      "do": 134,
+      "con": 135,
+      "te": 136,
+      "ain": 137,
+      "ere": 138,
+      "po": 139,
+      "if": 140,
+      "they": 141,
+      "us": 142,
+      "ag": 143,
+      "tr": 144,
+      "now": 145,
+      "oun": 146,
+      "this": 147,
+      "have": 148,
+      "not": 149,
+      "sa": 150,
+      "il": 151,
+      "up": 152,
+      "thing": 153,
+      "from": 154,
+      "ap": 155,
+      "him": 156,
+      "ack": 157,
+      "ation": 158,
+      "ant": 159,
+      "our": 160,
+      "op": 161,
+      "like": 162,
+      "ust": 163,
+      "ess": 164,
+      "bo": 165,
+      "ok": 166,
+      "ul": 167,
+      "ind": 168,
+      "ex": 169,
+      "com": 170,
+      "some": 171,
+      "there": 172,
+      "ers": 173,
+      "co": 174,
+      "res": 175,
+      "man": 176,
+      "ard": 177,
+      "pl": 178,
+      "wor": 179,
+      "way": 180,
+      "tion": 181,
+      "fo": 182,
+      "ca": 183,
+      "were": 184,
+      "by": 185,
+      "ate": 186,
+      "pro": 187,
+      "ted": 188,
+      "ound": 189,
+      "own": 190,
+      "would": 191,
+      "ts": 192,
+      "what": 193,
+      "qu": 194,
+      "ally": 195,
+      "ight": 196,
+      "ck": 197,
+      "gr": 198,
+      "when": 199,
+      "ven": 200,
+      "can": 201,
+      "ough": 202,
+      "ine": 203,
+      "end": 204,
+      "per": 205,
+      "ous": 206,
+      "od": 207,
+      "ide": 208,
+      "know": 209,
+      "ty": 210,
+      "very": 211,
+      "si": 212,
+      "ak": 213,
+      "who": 214,
+      "about": 215,
+      "ill": 216,
+      "them": 217,
+      "est": 218,
+      "red": 219,
+      "ye": 220,
+      "could": 221,
+      "ong": 222,
+      "your": 223,
+      "their": 224,
+      "em": 225,
+      "just": 226,
+      "other": 227,
+      "into": 228,
+      "any": 229,
+      "whi": 230,
+      "um": 231,
+      "tw": 232,
+      "ast": 233,
+      "der": 234,
+      "did": 235,
+      "ie": 236,
+      "been": 237,
+      "ace": 238,
+      "ink": 239,
+      "ity": 240,
+      "back": 241,
+      "ting": 242,
+      "br": 243,
+      "more": 244,
+      "ake": 245,
+      "pp": 246,
+      "then": 247,
+      "sp": 248,
+      "el": 249,
+      "use": 250,
+      "bl": 251,
+      "said": 252,
+      "over": 253,
+      "get": 254,
+      "[START]": 255,
+      "\"": 256,
+      "#": 257,
+      "$": 258,
+      "%": 259,
+      "&": 260,
+      "*": 261,
+      "+": 262,
+      "0": 263,
+      "1": 264,
+      "2": 265,
+      "3": 266,
+      "4": 267,
+      "5": 268,
+      "6": 269,
+      "7": 270,
+      "8": 271,
+      "9": 272,
+      "<": 273,
+      "=": 274,
+      ">": 275,
+      "@": 276,
+      "A": 277,
+      "B": 278,
+      "C": 279,
+      "D": 280,
+      "E": 281,
+      "F": 282,
+      "G": 283,
+      "H": 284,
+      "I": 285,
+      "J": 286,
+      "K": 287,
+      "L": 288,
+      "M": 289,
+      "N": 290,
+      "O": 291,
+      "P": 292,
+      "Q": 293,
+      "R": 294,
+      "S": 295,
+      "T": 296,
+      "U": 297,
+      "V": 298,
+      "W": 299,
+      "X": 300,
+      "Y": 301,
+      "Z": 302,
+      "[": 303,
+      "\\": 304,
+      "]": 305,
+      "^": 306,
+      "_": 307,
+      "`": 308,
+      "{": 309,
+      "|": 310,
+      "}": 311,
+      "~": 312,
+      "‐": 313,
+      "‑": 314,
+      "‒": 315,
+      "–": 316,
+      "—": 317,
+      "―": 318,
+      "‖": 319,
+      "‗": 320,
+      "‘": 321,
+      "’": 322,
+      "‚": 323,
+      "‛": 324,
+      "“": 325,
+      "”": 326,
+      "„": 327,
+      "‟": 328,
+      " ": 329,
+      "¡": 330,
+      "¢": 331,
+      "£": 332,
+      "¤": 333,
+      "¥": 334,
+      "¦": 335,
+      "§": 336,
+      "¨": 337,
+      "©": 338,
+      "ª": 339,
+      "«": 340,
+      "¬": 341,
+      "": 342,
+      "®": 343,
+      "¯": 344,
+      "°": 345,
+      "±": 346,
+      "²": 347,
+      "³": 348,
+      "´": 349,
+      "µ": 350,
+      "¶": 351,
+      "·": 352,
+      "¸": 353,
+      "¹": 354,
+      "º": 355,
+      "»": 356,
+      "¼": 357,
+      "½": 358,
+      "¾": 359,
+      "¿": 360,
+      "À": 361,
+      "Á": 362,
+      "Â": 363,
+      "Ã": 364,
+      "Ä": 365,
+      "Å": 366,
+      "Æ": 367,
+      "Ç": 368,
+      "È": 369,
+      "É": 370,
+      "Ê": 371,
+      "Ë": 372,
+      "Ì": 373,
+      "Í": 374,
+      "Î": 375,
+      "Ï": 376,
+      "Ð": 377,
+      "Ñ": 378,
+      "Ò": 379,
+      "Ó": 380,
+      "Ô": 381,
+      "Õ": 382,
+      "Ö": 383,
+      "×": 384,
+      "Ø": 385,
+      "Ù": 386,
+      "Ú": 387,
+      "Û": 388,
+      "Ü": 389,
+      "Ý": 390,
+      "Þ": 391,
+      "ß": 392,
+      "à": 393,
+      "á": 394,
+      "â": 395,
+      "ã": 396,
+      "ä": 397,
+      "å": 398,
+      "æ": 399,
+      "ç": 400,
+      "è": 401,
+      "é": 402,
+      "ê": 403,
+      "ë": 404,
+      "ì": 405,
+      "í": 406,
+      "î": 407,
+      "ï": 408,
+      "ð": 409,
+      "ñ": 410,
+      "ò": 411,
+      "ó": 412,
+      "ô": 413,
+      "õ": 414,
+      "ö": 415,
+      "÷": 416,
+      "ø": 417,
+      "ù": 418,
+      "ú": 419,
+      "û": 420,
+      "ü": 421,
+      "ý": 422,
+      "þ": 423,
+      "ÿ": 424,
+      "ɐ": 425,
+      "ɑ": 426,
+      "ɒ": 427,
+      "ɓ": 428,
+      "ɔ": 429,
+      "ɕ": 430,
+      "ɖ": 431,
+      "ɗ": 432,
+      "ɘ": 433,
+      "ə": 434,
+      "ɚ": 435,
+      "ɛ": 436,
+      "ɜ": 437,
+      "ɝ": 438,
+      "ɞ": 439,
+      "ɟ": 440,
+      "ɠ": 441,
+      "ɡ": 442,
+      "ɢ": 443,
+      "ɣ": 444,
+      "ɤ": 445,
+      "ɥ": 446,
+      "ɦ": 447,
+      "ɧ": 448,
+      "ɨ": 449,
+      "ɩ": 450,
+      "ɪ": 451,
+      "ɫ": 452,
+      "ɬ": 453,
+      "ɭ": 454,
+      "ɮ": 455,
+      "ɯ": 456,
+      "ɰ": 457,
+      "ɱ": 458,
+      "ɲ": 459,
+      "ɳ": 460,
+      "ɴ": 461,
+      "ɵ": 462,
+      "ɶ": 463,
+      "ɷ": 464,
+      "ɸ": 465,
+      "ɹ": 466,
+      "ɺ": 467,
+      "ɻ": 468,
+      "ɼ": 469,
+      "ɽ": 470,
+      "ɾ": 471,
+      "ɿ": 472,
+      "ʀ": 473,
+      "ʁ": 474,
+      "ʂ": 475,
+      "ʃ": 476,
+      "ʄ": 477,
+      "ʅ": 478,
+      "ʆ": 479,
+      "ʇ": 480,
+      "ʈ": 481,
+      "ʉ": 482,
+      "ʊ": 483,
+      "ʋ": 484,
+      "ʌ": 485,
+      "ʍ": 486,
+      "ʎ": 487,
+      "ʏ": 488,
+      "ʐ": 489,
+      "ʑ": 490,
+      "ʒ": 491,
+      "ʓ": 492,
+      "ʔ": 493,
+      "ʕ": 494,
+      "ʖ": 495,
+      "ʗ": 496,
+      "ʘ": 497,
+      "ʙ": 498,
+      "ʚ": 499,
+      "ʛ": 500,
+      "ʜ": 501,
+      "ʝ": 502,
+      "ʞ": 503,
+      "ʟ": 504,
+      "ʠ": 505,
+      "ʡ": 506,
+      "ʢ": 507,
+      "ʣ": 508,
+      "ʤ": 509,
+      "ʥ": 510,
+      "ʦ": 511,
+      "ʧ": 512,
+      "ʨ": 513,
+      "ʩ": 514,
+      "ʪ": 515,
+      "ʫ": 516,
+      "ʬ": 517,
+      "ʭ": 518,
+      "ʮ": 519,
+      "ʯ": 520,
+      "ʰ": 521,
+      "ʱ": 522,
+      "ʲ": 523,
+      "ʳ": 524,
+      "ʴ": 525,
+      "ʵ": 526,
+      "ʶ": 527,
+      "ʷ": 528,
+      "ʸ": 529,
+      "ʹ": 530,
+      "ʺ": 531,
+      "ʻ": 532,
+      "ʼ": 533,
+      "ʽ": 534,
+      "ʾ": 535,
+      "ʿ": 536,
+      "ˀ": 537,
+      "ˁ": 538,
+      "˂": 539,
+      "˃": 540,
+      "˄": 541,
+      "˅": 542,
+      "ˆ": 543,
+      "ˇ": 544,
+      "ˈ": 545,
+      "ˉ": 546,
+      "ˊ": 547,
+      "ˋ": 548,
+      "ˌ": 549,
+      "ˍ": 550,
+      "ˎ": 551,
+      "ˏ": 552,
+      "ː": 553,
+      "ˑ": 554,
+      "˒": 555,
+      "˓": 556,
+      "˔": 557,
+      "˕": 558,
+      "˖": 559,
+      "˗": 560,
+      "˘": 561,
+      "˙": 562,
+      "˚": 563,
+      "˛": 564,
+      "˜": 565,
+      "˝": 566,
+      "˞": 567,
+      "˟": 568,
+      "ˠ": 569,
+      "ˡ": 570,
+      "ˢ": 571,
+      "ˣ": 572,
+      "ˤ": 573,
+      "˥": 574,
+      "˦": 575,
+      "˧": 576,
+      "˨": 577,
+      "˩": 578,
+      "˪": 579,
+      "˫": 580,
+      "ˬ": 581,
+      "˭": 582,
+      "ˮ": 583,
+      "˯": 584,
+      "˰": 585,
+      "˱": 586,
+      "˲": 587,
+      "˳": 588,
+      "˴": 589,
+      "˵": 590,
+      "˶": 591,
+      "˷": 592,
+      "˸": 593,
+      "˹": 594,
+      "˺": 595,
+      "˻": 596,
+      "˼": 597,
+      "˽": 598,
+      "˾": 599,
+      "˿": 600,
+      "ā": 601,
+      "ō": 602,
+      "…": 603,
+      "[UH]": 604,
+      "[UM]": 605,
+      "[giggle]": 606,
+      "[laughter]": 607,
+      "[guffaw]": 608,
+      "[inhale]": 609,
+      "[exhale]": 610,
+      "[sigh]": 611,
+      "[cry]": 612,
+      "[bark]": 613,
+      "[howl]": 614,
+      "[meow]": 615,
+      "[singing]": 616,
+      "[music]": 617,
+      "[whistle]": 618,
+      "[humming]": 619,
+      "[gasp]": 620,
+      "[groan]": 621,
+      "[whisper]": 622,
+      "[mumble]": 623,
+      "[sniff]": 624,
+      "[sneeze]": 625,
+      "[cough]": 626,
+      "[snore]": 627,
+      "[chew]": 628,
+      "[sip]": 629,
+      "[clear_throat]": 630,
+      "[kiss]": 631,
+      "[shhh]": 632,
+      "[gibberish]": 633,
+      "[fr]": 634,
+      "[es]": 635,
+      "[de]": 636,
+      "[it]": 637,
+      "[ipa]": 638,
+      "[end_of_label]": 639,
+      "ŋ": 640,
+      "ᵻ": 641,
+      "θ": 642,
+      "̩": 643,
+      "\u0303": 644,
+      "ɑː": 645,
+      "iː": 646,
+      "uː": 647,
+      "ɜː": 648,
+      "ɔː": 649,
+      "oː": 650,
+      "eɪ": 651,
+      "oʊ": 652,
+      "aɪ": 653,
+      "aʊ": 654,
+      "ɔɪ": 655,
+      "dʒ": 656,
+      "tʃ": 657,
+      "ɪŋ": 658,
+      "ᵻd": 659,
+      "ˈiː": 660,
+      "ˌiː": 661,
+      "ˈɪ": 662,
+      "ˌɪ": 663,
+      "ˈeɪ": 664,
+      "ˌeɪ": 665,
+      "ˈɛ": 666,
+      "ˌɛ": 667,
+      "ˈæ": 668,
+      "ˌæ": 669,
+      "ˈɑː": 670,
+      "ˌɑː": 671,
+      "ˈɔː": 672,
+      "ˌɔː": 673,
+      "oːɹ": 674,
+      "ˈoːɹ": 675,
+      "ˌoːɹ": 676,
+      "ˈoʊ": 677,
+      "ˌoʊ": 678,
+      "ˈʊ": 679,
+      "ˌʊ": 680,
+      "ˈuː": 681,
+      "ˌuː": 682,
+      "ˈɜː": 683,
+      "ˌɜː": 684,
+      "ˈʌ": 685,
+      "ˌʌ": 686,
+      "ˈaɪ": 687,
+      "ˌaɪ": 688,
+      "ˈaʊ": 689,
+      "ˌaʊ": 690,
+      "ˈɔɪ": 691,
+      "ˌɔɪ": 692,
+      "ˈɚ": 693,
+      "ˌɐ": 694,
+      "[PLACEHOLDER55]": 695,
+      "[PLACEHOLDER56]": 696,
+      "[PLACEHOLDER57]": 697,
+      "[PLACEHOLDER58]": 698,
+      "[PLACEHOLDER59]": 699,
+      "[PLACEHOLDER60]": 700,
+      "[PLACEHOLDER61]": 701,
+      "[PLACEHOLDER62]": 702,
+      "[PLACEHOLDER63]": 703
+    },
+    "merges": [
+      "t h",
+      "i n",
+      "th e",
+      "a n",
+      "e r",
+      "o u",
+      "r e",
+      "o n",
+      "a t",
+      "e d",
+      "e n",
+      "t o",
+      "in g",
+      "an d",
+      "i s",
+      "a s",
+      "a l",
+      "o r",
+      "o f",
+      "a r",
+      "i t",
+      "e s",
+      "h e",
+      "s t",
+      "l e",
+      "o m",
+      "s e",
+      "b e",
+      "a d",
+      "o w",
+      "l y",
+      "c h",
+      "w h",
+      "th at",
+      "y ou",
+      "l i",
+      "v e",
+      "a c",
+      "t i",
+      "l d",
+      "m e",
+      "w as",
+      "g h",
+      "i d",
+      "l l",
+      "w i",
+      "en t",
+      "f or",
+      "a y",
+      "r o",
+      "v er",
+      "i c",
+      "h er",
+      "k e",
+      "h is",
+      "n o",
+      "u t",
+      "u n",
+      "i r",
+      "l o",
+      "w e",
+      "r i",
+      "h a",
+      "wi th",
+      "gh t",
+      "ou t",
+      "i m",
+      "i on",
+      "al l",
+      "a b",
+      "on e",
+      "n e",
+      "g e",
+      "ou ld",
+      "t er",
+      "m o",
+      "h ad",
+      "c e",
+      "s he",
+      "g o",
+      "s h",
+      "u r",
+      "a m",
+      "s o",
+      "p e",
+      "m y",
+      "d e",
+      "a re",
+      "b ut",
+      "om e",
+      "f r",
+      "the r",
+      "f e",
+      "s u",
+      "d o",
+      "c on",
+      "t e",
+      "a in",
+      "er e",
+      "p o",
+      "i f",
+      "the y",
+      "u s",
+      "a g",
+      "t r",
+      "n ow",
+      "ou n",
+      "th is",
+      "ha ve",
+      "no t",
+      "s a",
+      "i l",
+      "u p",
+      "th ing",
+      "fr om",
+      "a p",
+      "h im",
+      "ac k",
+      "at ion",
+      "an t",
+      "ou r",
+      "o p",
+      "li ke",
+      "u st",
+      "es s",
+      "b o",
+      "o k",
+      "u l",
+      "in d",
+      "e x",
+      "c om",
+      "s ome",
+      "the re",
+      "er s",
+      "c o",
+      "re s",
+      "m an",
+      "ar d",
+      "p l",
+      "w or",
+      "w ay",
+      "ti on",
+      "f o",
+      "c a",
+      "w ere",
+      "b y",
+      "at e",
+      "p ro",
+      "t ed",
+      "oun d",
+      "ow n",
+      "w ould",
+      "t s",
+      "wh at",
+      "q u",
+      "al ly",
+      "i ght",
+      "c k",
+      "g r",
+      "wh en",
+      "v en",
+      "c an",
+      "ou gh",
+      "in e",
+      "en d",
+      "p er",
+      "ou s",
+      "o d",
+      "id e",
+      "k now",
+      "t y",
+      "ver y",
+      "s i",
+      "a k",
+      "wh o",
+      "ab out",
+      "i ll",
+      "the m",
+      "es t",
+      "re d",
+      "y e",
+      "c ould",
+      "on g",
+      "you r",
+      "the ir",
+      "e m",
+      "j ust",
+      "o ther",
+      "in to",
+      "an y",
+      "wh i",
+      "u m",
+      "t w",
+      "as t",
+      "d er",
+      "d id",
+      "i e",
+      "be en",
+      "ac e",
+      "in k",
+      "it y",
+      "b ack",
+      "t ing",
+      "b r",
+      "mo re",
+      "a ke",
+      "p p",
+      "the n",
+      "s p",
+      "e l",
+      "u se",
+      "b l",
+      "sa id",
+      "o ver",
+      "ge t",
+      "ɑ ː",
+      "i ː",
+      "u ː",
+      "ɜ ː",
+      "ɔ ː",
+      "o ː",
+      "e ɪ",
+      "o ʊ",
+      "a ɪ",
+      "a ʊ",
+      "ɔ ɪ",
+      "d ʒ",
+      "t ʃ",
+      "ɪ ŋ",
+      "ᵻ d",
+      "ˈ iː",
+      "ˌ iː",
+      "ˈ ɪ",
+      "ˌ ɪ",
+      "ˈ eɪ",
+      "ˌ eɪ",
+      "ˈ ɛ",
+      "ˌ ɛ",
+      "ˈ æ",
+      "ˌ æ",
+      "ˈ ɑː",
+      "ˌ ɑː",
+      "ˈ ɔː",
+      "ˌ ɔː",
+      "oː ɹ",
+      "ˈ oːɹ",
+      "ˌ oːɹ",
+      "ˈ oʊ",
+      "ˌ oʊ",
+      "ˈ ʊ",
+      "ˌ ʊ",
+      "ˈ uː",
+      "ˌ uː",
+      "ˈ ɜː",
+      "ˌ ɜː",
+      "ˈ ʌ",
+      "ˌ ʌ",
+      "ˈ aɪ",
+      "ˌ aɪ",
+      "ˈ aʊ",
+      "ˌ aʊ",
+      "ˈ ɔɪ",
+      "ˌ ɔɪ",
+      "ˈ ɚ",
+      "ˌ ɐ"
+    ]
+  }
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": true,
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 131072,
+  "tokenizer_class": "PreTrainedTokenizerFast"
+}