Commit ·
cb23798
0
Parent(s):
Duplicate from onnx-community/Supertonic-TTS-ONNX
Browse filesCo-authored-by: Joshua <Xenova@users.noreply.huggingface.co>
- .gitattributes +38 -0
- README.md +151 -0
- config.json +12 -0
- onnx/latent_denoiser.onnx +3 -0
- onnx/latent_denoiser.onnx_data +3 -0
- onnx/text_encoder.onnx +3 -0
- onnx/text_encoder.onnx_data +3 -0
- onnx/voice_decoder.onnx +3 -0
- onnx/voice_decoder.onnx_data +3 -0
- tokenizer.json +130 -0
- tokenizer_config.json +6 -0
- voices/F1.bin +3 -0
- voices/F2.bin +3 -0
- voices/F3.bin +3 -0
- voices/F4.bin +3 -0
- voices/F5.bin +3 -0
- voices/M1.bin +3 -0
- voices/M2.bin +3 -0
- voices/M3.bin +3 -0
- voices/M4.bin +3 -0
- voices/M5.bin +3 -0
.gitattributes
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
onnx/latent_denoiser.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
onnx/text_encoder.onnx_data filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
onnx/voice_decoder.onnx_data filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: openrail
|
| 3 |
+
base_model:
|
| 4 |
+
- Supertone/supertonic
|
| 5 |
+
library_name: transformers.js
|
| 6 |
+
language:
|
| 7 |
+
- en
|
| 8 |
+
pipeline_tag: text-to-speech
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
## Usage
|
| 12 |
+
|
| 13 |
+
### Transformers.js
|
| 14 |
+
|
| 15 |
+
If you haven't already, you can install the [Transformers.js](https://huggingface.co/docs/transformers.js) JavaScript library from [NPM](https://www.npmjs.com/package/@huggingface/transformers) using:
|
| 16 |
+
```bash
|
| 17 |
+
npm i @huggingface/transformers
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
You can then generate audio as follows:
|
| 21 |
+
```js
|
| 22 |
+
import { pipeline } from '@huggingface/transformers';
|
| 23 |
+
|
| 24 |
+
const tts = await pipeline('text-to-speech', 'onnx-community/Supertonic-TTS-ONNX');
|
| 25 |
+
|
| 26 |
+
const input_text = 'This is really cool!';
|
| 27 |
+
const audio = await tts(input_text, {
|
| 28 |
+
speaker_embeddings: 'https://huggingface.co/onnx-community/Supertonic-TTS-ONNX/resolve/main/voices/F1.bin',
|
| 29 |
+
num_inference_steps: 5, // Higher = better quality (typically 1-50)
|
| 30 |
+
speed: 1.05, // Higher = faster speech (typically 0.8-1.2)
|
| 31 |
+
});
|
| 32 |
+
await audio.save('output.wav'); // or `audio.toBlob()`;
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
### ONNXRuntime
|
| 36 |
+
|
| 37 |
+
First, let's create a helper class, `SupertonicTTS`:
|
| 38 |
+
|
| 39 |
+
```py
|
| 40 |
+
import os
|
| 41 |
+
import numpy as np
|
| 42 |
+
import onnxruntime as ort
|
| 43 |
+
from transformers import AutoTokenizer
|
| 44 |
+
|
| 45 |
+
class SupertonicTTS:
|
| 46 |
+
SAMPLE_RATE = 44100
|
| 47 |
+
CHUNK_COMPRESS_FACTOR = 6
|
| 48 |
+
BASE_CHUNK_SIZE = 512
|
| 49 |
+
LATENT_DIM = 24
|
| 50 |
+
STYLE_DIM = 128
|
| 51 |
+
LATENT_SIZE = BASE_CHUNK_SIZE * CHUNK_COMPRESS_FACTOR
|
| 52 |
+
|
| 53 |
+
def __init__(self, model_path):
|
| 54 |
+
self.model_path = model_path
|
| 55 |
+
self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
|
| 56 |
+
|
| 57 |
+
# Load ONNX sessions
|
| 58 |
+
self.text_encoder = ort.InferenceSession(os.path.join(self.model_path, "onnx", "text_encoder.onnx"))
|
| 59 |
+
self.latent_denoiser = ort.InferenceSession(os.path.join(self.model_path, "onnx", "latent_denoiser.onnx"))
|
| 60 |
+
self.voice_decoder = ort.InferenceSession(os.path.join(self.model_path, "onnx", "voice_decoder.onnx"))
|
| 61 |
+
|
| 62 |
+
def _load_style(self, voice: str) -> np.ndarray:
|
| 63 |
+
voice_path = os.path.join(self.model_path, "voices", f"{voice}.bin")
|
| 64 |
+
if not os.path.exists(voice_path):
|
| 65 |
+
raise ValueError(f"Voice '{voice}' not found.")
|
| 66 |
+
|
| 67 |
+
style_vec = np.fromfile(voice_path, dtype=np.float32)
|
| 68 |
+
return style_vec.reshape(1, -1, self.STYLE_DIM)
|
| 69 |
+
|
| 70 |
+
def generate(self, text: list[str], *, voice: str = "M1", speed: float = 1.0, steps: int = 5) -> list[np.ndarray]:
|
| 71 |
+
# 1. Prepare Text Inputs
|
| 72 |
+
inputs = self.tokenizer(text, return_tensors="np", padding=True, truncation=True)
|
| 73 |
+
input_ids = inputs["input_ids"]
|
| 74 |
+
attn_mask = inputs["attention_mask"]
|
| 75 |
+
batch_size = input_ids.shape[0]
|
| 76 |
+
|
| 77 |
+
# 2. Prepare Style
|
| 78 |
+
style = self._load_style(voice).repeat(batch_size, axis=0)
|
| 79 |
+
|
| 80 |
+
# 3. Text Encoding
|
| 81 |
+
last_hidden_state, raw_durations = self.text_encoder.run(
|
| 82 |
+
None,
|
| 83 |
+
{"input_ids": input_ids, "attention_mask": attn_mask, "style": style}
|
| 84 |
+
)
|
| 85 |
+
durations = (raw_durations / speed * self.SAMPLE_RATE).astype(np.int64)
|
| 86 |
+
|
| 87 |
+
# 4. Latent Preparation
|
| 88 |
+
latent_lengths = (durations + self.LATENT_SIZE - 1) // self.LATENT_SIZE
|
| 89 |
+
max_len = latent_lengths.max()
|
| 90 |
+
latent_mask = (np.arange(max_len) < latent_lengths[:, None]).astype(np.int64)
|
| 91 |
+
latents = np.random.randn(batch_size, self.LATENT_DIM * self.CHUNK_COMPRESS_FACTOR, max_len).astype(np.float32)
|
| 92 |
+
latents *= latent_mask[:, None, :]
|
| 93 |
+
|
| 94 |
+
# 5. Denoising Loop
|
| 95 |
+
num_inference_steps = np.full(batch_size, steps, dtype=np.float32)
|
| 96 |
+
for step in range(steps):
|
| 97 |
+
timestep = np.full(batch_size, step, dtype=np.float32)
|
| 98 |
+
latents = self.latent_denoiser.run(
|
| 99 |
+
None,
|
| 100 |
+
{
|
| 101 |
+
"noisy_latents": latents,
|
| 102 |
+
"latent_mask": latent_mask,
|
| 103 |
+
"style": style,
|
| 104 |
+
"encoder_outputs": last_hidden_state,
|
| 105 |
+
"attention_mask": attn_mask,
|
| 106 |
+
"timestep": timestep,
|
| 107 |
+
"num_inference_steps": num_inference_steps,
|
| 108 |
+
},
|
| 109 |
+
)[0]
|
| 110 |
+
|
| 111 |
+
# 6. Decode Latents to Audio
|
| 112 |
+
waveforms = self.voice_decoder.run(None, {"latents": latents})[0]
|
| 113 |
+
|
| 114 |
+
# 7. Post-process: Trim padding and return list of arrays
|
| 115 |
+
results = []
|
| 116 |
+
for i, length in enumerate(latent_mask.sum(axis=1) * self.LATENT_SIZE):
|
| 117 |
+
results.append(waveforms[i, :length])
|
| 118 |
+
|
| 119 |
+
return results
|
| 120 |
+
```
|
| 121 |
+
|
| 122 |
+
Next, we clone this repository (using whichever way you want, `git clone`, `huggingface_hub`, etc.)
|
| 123 |
+
```py
|
| 124 |
+
# (Optional) Download model files (or use existing local directory)
|
| 125 |
+
from huggingface_hub import snapshot_download
|
| 126 |
+
model_id = "onnx-community/Supertonic-TTS-ONNX"
|
| 127 |
+
local_dir = "supertonic"
|
| 128 |
+
snapshot_download(model_id, local_dir=local_dir)
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
We can then use the model as follows:
|
| 132 |
+
|
| 133 |
+
```py
|
| 134 |
+
# Initialize TTS
|
| 135 |
+
tts = SupertonicTTS(local_dir)
|
| 136 |
+
|
| 137 |
+
# Generate audio
|
| 138 |
+
prompts = [
|
| 139 |
+
"Once upon a time, there was a brave knight.",
|
| 140 |
+
"Refactoring code makes it much easier to read!",
|
| 141 |
+
"I love this!"
|
| 142 |
+
]
|
| 143 |
+
audio_data = tts.generate(prompts, voice="M1", speed=1.0, steps=10)
|
| 144 |
+
|
| 145 |
+
# (Optional) Save to files
|
| 146 |
+
import soundfile as sf
|
| 147 |
+
for i, audio in enumerate(audio_data):
|
| 148 |
+
filename = f"output_{i}.wav"
|
| 149 |
+
sf.write(filename, audio, tts.SAMPLE_RATE)
|
| 150 |
+
print(f"Saved {filename}")
|
| 151 |
+
```
|
config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"base_chunk_size": 512,
|
| 3 |
+
"chunk_compress_factor": 6,
|
| 4 |
+
"latent_dim": 24,
|
| 5 |
+
"model_type": "supertonic",
|
| 6 |
+
"sampling_rate": 44100,
|
| 7 |
+
"style_dim": 128,
|
| 8 |
+
"transformers.js_config": {
|
| 9 |
+
"dtype": "fp32",
|
| 10 |
+
"use_external_data_format": true
|
| 11 |
+
}
|
| 12 |
+
}
|
onnx/latent_denoiser.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a639a8c05c9be111848562c5cf10ea2697a589c6341830aac479d0ce7b75aa9
|
| 3 |
+
size 398102
|
onnx/latent_denoiser.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cde4abf1136defce235bc446eaab4954a57721ae8d5a4754cdd337bf191b612f
|
| 3 |
+
size 132098880
|
onnx/text_encoder.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50a03d29d5dc95918eeff578f542b814f3cf5a741f927116f5a8462a76ff6898
|
| 3 |
+
size 433169
|
onnx/text_encoder.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6415854f135a318909dc716e90f83a391d9a91bd9da09bdb6d6763d6b0a6c102
|
| 3 |
+
size 28426752
|
onnx/voice_decoder.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83c104006dabcd6b568c0d5acb6fec18f65609d2391dd2c459e4440e85027669
|
| 3 |
+
size 59921
|
onnx/voice_decoder.onnx_data
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea52402c9ba5131ee2b3901a86db2f0b435b322169cd75157e053493d967d17f
|
| 3 |
+
size 101353472
|
tokenizer.json
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": "1.0",
|
| 3 |
+
"truncation": null,
|
| 4 |
+
"padding": null,
|
| 5 |
+
"added_tokens": [],
|
| 6 |
+
"normalizer": {
|
| 7 |
+
"type": "Sequence",
|
| 8 |
+
"normalizers": [
|
| 9 |
+
{
|
| 10 |
+
"type": "NFKD"
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"type": "Replace",
|
| 14 |
+
"pattern": {
|
| 15 |
+
"Regex": "\\s+"
|
| 16 |
+
},
|
| 17 |
+
"content": " "
|
| 18 |
+
},
|
| 19 |
+
{
|
| 20 |
+
"type": "Replace",
|
| 21 |
+
"pattern": {
|
| 22 |
+
"Regex": "[\u2013\u2014]"
|
| 23 |
+
},
|
| 24 |
+
"content": "-"
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"type": "Replace",
|
| 28 |
+
"pattern": {
|
| 29 |
+
"Regex": "[^ -\"$-.0-;?A-Za-z£́]"
|
| 30 |
+
},
|
| 31 |
+
"content": ""
|
| 32 |
+
}
|
| 33 |
+
]
|
| 34 |
+
},
|
| 35 |
+
"pre_tokenizer": {
|
| 36 |
+
"type": "FixedLength",
|
| 37 |
+
"length": 1
|
| 38 |
+
},
|
| 39 |
+
"post_processor": null,
|
| 40 |
+
"decoder": {
|
| 41 |
+
"type": "Fuse"
|
| 42 |
+
},
|
| 43 |
+
"model": {
|
| 44 |
+
"type": "WordLevel",
|
| 45 |
+
"vocab": {
|
| 46 |
+
" ": 0,
|
| 47 |
+
"!": 1,
|
| 48 |
+
"\"": 2,
|
| 49 |
+
"$": 3,
|
| 50 |
+
"%": 4,
|
| 51 |
+
"&": 5,
|
| 52 |
+
"'": 6,
|
| 53 |
+
"(": 7,
|
| 54 |
+
")": 8,
|
| 55 |
+
"*": 9,
|
| 56 |
+
"+": 10,
|
| 57 |
+
",": 11,
|
| 58 |
+
"-": 12,
|
| 59 |
+
".": 13,
|
| 60 |
+
"0": 14,
|
| 61 |
+
"1": 15,
|
| 62 |
+
"2": 16,
|
| 63 |
+
"3": 17,
|
| 64 |
+
"4": 18,
|
| 65 |
+
"5": 19,
|
| 66 |
+
"6": 20,
|
| 67 |
+
"7": 21,
|
| 68 |
+
"8": 22,
|
| 69 |
+
"9": 23,
|
| 70 |
+
":": 24,
|
| 71 |
+
";": 25,
|
| 72 |
+
"?": 26,
|
| 73 |
+
"A": 27,
|
| 74 |
+
"B": 28,
|
| 75 |
+
"C": 29,
|
| 76 |
+
"D": 30,
|
| 77 |
+
"E": 31,
|
| 78 |
+
"F": 32,
|
| 79 |
+
"G": 33,
|
| 80 |
+
"H": 34,
|
| 81 |
+
"I": 35,
|
| 82 |
+
"J": 36,
|
| 83 |
+
"K": 37,
|
| 84 |
+
"L": 38,
|
| 85 |
+
"M": 39,
|
| 86 |
+
"N": 40,
|
| 87 |
+
"O": 41,
|
| 88 |
+
"P": 42,
|
| 89 |
+
"Q": 43,
|
| 90 |
+
"R": 44,
|
| 91 |
+
"S": 45,
|
| 92 |
+
"T": 46,
|
| 93 |
+
"U": 47,
|
| 94 |
+
"V": 48,
|
| 95 |
+
"W": 49,
|
| 96 |
+
"X": 50,
|
| 97 |
+
"Y": 51,
|
| 98 |
+
"Z": 52,
|
| 99 |
+
"a": 53,
|
| 100 |
+
"b": 54,
|
| 101 |
+
"c": 55,
|
| 102 |
+
"d": 56,
|
| 103 |
+
"e": 57,
|
| 104 |
+
"f": 58,
|
| 105 |
+
"g": 59,
|
| 106 |
+
"h": 60,
|
| 107 |
+
"i": 61,
|
| 108 |
+
"j": 62,
|
| 109 |
+
"k": 63,
|
| 110 |
+
"l": 64,
|
| 111 |
+
"m": 65,
|
| 112 |
+
"n": 66,
|
| 113 |
+
"o": 67,
|
| 114 |
+
"p": 68,
|
| 115 |
+
"q": 69,
|
| 116 |
+
"r": 70,
|
| 117 |
+
"s": 71,
|
| 118 |
+
"t": 72,
|
| 119 |
+
"u": 73,
|
| 120 |
+
"v": 74,
|
| 121 |
+
"w": 75,
|
| 122 |
+
"x": 76,
|
| 123 |
+
"y": 77,
|
| 124 |
+
"z": 78,
|
| 125 |
+
"£": 79,
|
| 126 |
+
"\u0301": 80
|
| 127 |
+
},
|
| 128 |
+
"unk_token": "\u0301"
|
| 129 |
+
}
|
| 130 |
+
}
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 3 |
+
"model_max_length": 1000,
|
| 4 |
+
"pad_token": " ",
|
| 5 |
+
"pad_token_id": 0
|
| 6 |
+
}
|
voices/F1.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ef84e3421e4f80994a5a40a18ba39ba9fc48175c41ae6cf3e56418820872dbf
|
| 3 |
+
size 51712
|
voices/F2.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1949cf0e066c4278980d2b835cf334dab0f8f781704c9116bf48a072278f7c72
|
| 3 |
+
size 51712
|
voices/F3.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:38ee1d62ad8a02877ab0d08b501742b76cf3586ed888514df1a7f27cc0f8d171
|
| 3 |
+
size 51712
|
voices/F4.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63890c361868a296c51f9aee114f51e0a9a92c3f46a91582539545f7ab408a72
|
| 3 |
+
size 51712
|
voices/F5.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:793223d8d11e0ee49721842ebdc7bd46b4487579588f646953e75ad3fc8ffb9c
|
| 3 |
+
size 51712
|
voices/M1.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d53fbaaccf39a358010dcc5f289fc1d5cb350fe5f518be35f62cc518d794892
|
| 3 |
+
size 51712
|
voices/M2.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e02979a394f89002d920f0bcc006206d4cd8da90e8cc82d0532831a5bb20e79
|
| 3 |
+
size 51712
|
voices/M3.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:470d2b6b77239628ce90ba879ca5366fb5e6103fdd7e7053954a7b6d5dc2142a
|
| 3 |
+
size 51712
|
voices/M4.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4700e92c614fd34971a8ed9c8140c2f2162ab8ef3067f8e1e7ef67c3e6488fb7
|
| 3 |
+
size 51712
|
voices/M5.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c40fbc4093d113ef261cbc7bfe3f080dd813d3168347d682c78b1ca71a07da1f
|
| 3 |
+
size 51712
|