Spaces:
Build error
Build error
Deploy Bengali TTS app
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +5 -0
- app.py +254 -0
- data/Bengali/vocab.txt +2545 -0
- requirements.txt +15 -0
- src/f5_tts/api.py +164 -0
- src/f5_tts/configs/E2TTS_Base.yaml +49 -0
- src/f5_tts/configs/E2TTS_Small.yaml +49 -0
- src/f5_tts/configs/F5TTS_Base.yaml +54 -0
- src/f5_tts/configs/F5TTS_Small.yaml +54 -0
- src/f5_tts/configs/F5TTS_v1_Base.yaml +55 -0
- src/f5_tts/eval/README.md +63 -0
- src/f5_tts/eval/__pycache__/compare_checkpoints.cpython-311.pyc +0 -0
- src/f5_tts/eval/__pycache__/eval_bengali.cpython-311.pyc +0 -0
- src/f5_tts/eval/__pycache__/gen_bengali_batch.cpython-311.pyc +0 -0
- src/f5_tts/eval/__pycache__/gen_elevenlabs_batch.cpython-311.pyc +0 -0
- src/f5_tts/eval/__pycache__/gen_indicf5_batch.cpython-311.pyc +0 -0
- src/f5_tts/eval/compare_checkpoints.py +150 -0
- src/f5_tts/eval/ecapa_tdnn.py +331 -0
- src/f5_tts/eval/eval_bengali.py +215 -0
- src/f5_tts/eval/eval_gemini.py +160 -0
- src/f5_tts/eval/eval_infer_batch.py +221 -0
- src/f5_tts/eval/eval_infer_batch.sh +116 -0
- src/f5_tts/eval/eval_infer_batch_example.sh +18 -0
- src/f5_tts/eval/eval_librispeech_test_clean.py +105 -0
- src/f5_tts/eval/eval_seedtts_testset.py +104 -0
- src/f5_tts/eval/eval_utmos.py +42 -0
- src/f5_tts/eval/gen_bengali_batch.py +159 -0
- src/f5_tts/eval/gen_elevenlabs_batch.py +110 -0
- src/f5_tts/eval/gen_gemini_batch.py +121 -0
- src/f5_tts/eval/utils_eval.py +444 -0
- src/f5_tts/infer/README.md +177 -0
- src/f5_tts/infer/SHARED.md +193 -0
- src/f5_tts/infer/__pycache__/infer_cli.cpython-311.pyc +0 -0
- src/f5_tts/infer/__pycache__/infer_cli_emotion.cpython-311.pyc +0 -0
- src/f5_tts/infer/__pycache__/infer_elevenlabs.cpython-311.pyc +0 -0
- src/f5_tts/infer/__pycache__/infer_emotion.cpython-311.pyc +0 -0
- src/f5_tts/infer/__pycache__/utils_infer.cpython-311.pyc +0 -0
- src/f5_tts/infer/examples/basic/basic.toml +11 -0
- src/f5_tts/infer/examples/basic/basic_ref_en.wav +3 -0
- src/f5_tts/infer/examples/basic/basic_ref_zh.wav +3 -0
- src/f5_tts/infer/examples/multi/country.flac +3 -0
- src/f5_tts/infer/examples/multi/main.flac +3 -0
- src/f5_tts/infer/examples/multi/story.toml +20 -0
- src/f5_tts/infer/examples/multi/story.txt +1 -0
- src/f5_tts/infer/examples/multi/town.flac +3 -0
- src/f5_tts/infer/examples/vocab.txt +2545 -0
- src/f5_tts/infer/infer_cli.py +388 -0
- src/f5_tts/infer/infer_cli_emotion.py +287 -0
- src/f5_tts/infer/infer_elevenlabs.py +71 -0
- src/f5_tts/infer/infer_emotion.py +265 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
src/f5_tts/infer/examples/basic/basic_ref_en.wav filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
src/f5_tts/infer/examples/basic/basic_ref_zh.wav filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
src/f5_tts/infer/examples/multi/country.flac filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
src/f5_tts/infer/examples/multi/main.flac filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
src/f5_tts/infer/examples/multi/town.flac filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import tempfile
|
| 3 |
+
|
| 4 |
+
import gradio as gr
|
| 5 |
+
import numpy as np
|
| 6 |
+
import torch
|
| 7 |
+
from huggingface_hub import hf_hub_download
|
| 8 |
+
|
| 9 |
+
# Optional HF Spaces GPU decorator
|
| 10 |
+
try:
|
| 11 |
+
import spaces
|
| 12 |
+
gpu_decorator = spaces.GPU
|
| 13 |
+
except ImportError:
|
| 14 |
+
def gpu_decorator(fn):
|
| 15 |
+
return fn
|
| 16 |
+
import torchaudio
|
| 17 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration, GenerationConfig
|
| 18 |
+
|
| 19 |
+
from f5_tts.model import CFM, DiT
|
| 20 |
+
from f5_tts.infer.utils_infer import (
|
| 21 |
+
device,
|
| 22 |
+
load_checkpoint,
|
| 23 |
+
load_vocoder,
|
| 24 |
+
preprocess_ref_audio_text,
|
| 25 |
+
infer_process,
|
| 26 |
+
target_sample_rate,
|
| 27 |
+
hop_length,
|
| 28 |
+
n_fft,
|
| 29 |
+
win_length,
|
| 30 |
+
n_mel_channels,
|
| 31 |
+
)
|
| 32 |
+
from f5_tts.model.utils import get_tokenizer
|
| 33 |
+
|
| 34 |
+
# Config
|
| 35 |
+
MODEL_CKPT = hf_hub_download("Umong/bengali-f5-tts", "model_50000.pt")
|
| 36 |
+
VOCAB_FILE = "data/Bengali/vocab.txt"
|
| 37 |
+
WHISPER_MODEL = "bengaliAI/tugstugi_bengaliai-asr_whisper-medium"
|
| 38 |
+
|
| 39 |
+
# Model architecture (same as F5TTS_v1_Base)
|
| 40 |
+
model_cfg = dict(
|
| 41 |
+
dim=1024,
|
| 42 |
+
depth=22,
|
| 43 |
+
heads=16,
|
| 44 |
+
ff_mult=2,
|
| 45 |
+
text_dim=512,
|
| 46 |
+
text_mask_padding=True,
|
| 47 |
+
qk_norm=None,
|
| 48 |
+
conv_layers=4,
|
| 49 |
+
pe_attn_head=None,
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# Globals
|
| 53 |
+
ema_model = None
|
| 54 |
+
vocoder = None
|
| 55 |
+
bn_asr_model = None
|
| 56 |
+
bn_asr_processor = None
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def load_models():
|
| 60 |
+
global ema_model, vocoder
|
| 61 |
+
if ema_model is not None:
|
| 62 |
+
return
|
| 63 |
+
|
| 64 |
+
print("Loading Bengali TTS model...")
|
| 65 |
+
vocab_char_map, vocab_size = get_tokenizer(VOCAB_FILE, "custom")
|
| 66 |
+
model = CFM(
|
| 67 |
+
transformer=DiT(**model_cfg, text_num_embeds=vocab_size, mel_dim=n_mel_channels),
|
| 68 |
+
mel_spec_kwargs=dict(
|
| 69 |
+
n_fft=n_fft,
|
| 70 |
+
hop_length=hop_length,
|
| 71 |
+
win_length=win_length,
|
| 72 |
+
n_mel_channels=n_mel_channels,
|
| 73 |
+
target_sample_rate=target_sample_rate,
|
| 74 |
+
mel_spec_type="vocos",
|
| 75 |
+
),
|
| 76 |
+
odeint_kwargs=dict(method="euler"),
|
| 77 |
+
vocab_char_map=vocab_char_map,
|
| 78 |
+
).to(device)
|
| 79 |
+
ema_model = load_checkpoint(model, MODEL_CKPT, device, use_ema=True)
|
| 80 |
+
|
| 81 |
+
print("Loading vocoder...")
|
| 82 |
+
vocoder = load_vocoder(vocoder_name="vocos", is_local=False, device=device)
|
| 83 |
+
print("Models loaded.")
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def init_bengali_asr():
|
| 87 |
+
global bn_asr_model, bn_asr_processor
|
| 88 |
+
if bn_asr_model is not None:
|
| 89 |
+
return
|
| 90 |
+
|
| 91 |
+
print("Loading Bengali ASR...")
|
| 92 |
+
bn_asr_processor = WhisperProcessor.from_pretrained(WHISPER_MODEL)
|
| 93 |
+
bn_asr_model = WhisperForConditionalGeneration.from_pretrained(WHISPER_MODEL).to(device)
|
| 94 |
+
# Fix outdated generation config
|
| 95 |
+
bn_asr_model.generation_config = GenerationConfig.from_pretrained("openai/whisper-medium")
|
| 96 |
+
print("Bengali ASR loaded.")
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def transcribe_bengali(audio_path: str) -> str:
|
| 100 |
+
init_bengali_asr()
|
| 101 |
+
waveform, sr = torchaudio.load(audio_path)
|
| 102 |
+
if sr != 16000:
|
| 103 |
+
waveform = torchaudio.transforms.Resample(sr, 16000)(waveform)
|
| 104 |
+
if waveform.shape[0] > 1:
|
| 105 |
+
waveform = waveform.mean(dim=0, keepdim=True)
|
| 106 |
+
|
| 107 |
+
input_features = bn_asr_processor(
|
| 108 |
+
waveform.squeeze().numpy(), sampling_rate=16000, return_tensors="pt"
|
| 109 |
+
).input_features.to(device)
|
| 110 |
+
predicted_ids = bn_asr_model.generate(input_features, language="bn", task="transcribe")
|
| 111 |
+
text = bn_asr_processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
| 112 |
+
return text.strip()
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def preprocess_ref_audio_text_bn(ref_audio, ref_text, show_info=print):
|
| 116 |
+
"""Wrapper that uses Bengali ASR instead of default whisper."""
|
| 117 |
+
# Use original preprocessing for audio clipping/silence
|
| 118 |
+
from f5_tts.infer.utils_infer import (
|
| 119 |
+
_ref_audio_cache,
|
| 120 |
+
remove_silence_edges,
|
| 121 |
+
)
|
| 122 |
+
from pydub import AudioSegment, silence
|
| 123 |
+
import hashlib
|
| 124 |
+
|
| 125 |
+
show_info("Converting audio...")
|
| 126 |
+
|
| 127 |
+
with open(ref_audio, "rb") as f:
|
| 128 |
+
audio_hash = hashlib.md5(f.read()).hexdigest()
|
| 129 |
+
|
| 130 |
+
if audio_hash in _ref_audio_cache:
|
| 131 |
+
processed_audio = _ref_audio_cache[audio_hash]
|
| 132 |
+
else:
|
| 133 |
+
tempfile_kwargs = {"delete": False, "suffix": ".wav"}
|
| 134 |
+
with tempfile.NamedTemporaryFile(**tempfile_kwargs) as f:
|
| 135 |
+
temp_path = f.name
|
| 136 |
+
|
| 137 |
+
aseg = AudioSegment.from_file(ref_audio)
|
| 138 |
+
|
| 139 |
+
# Clip to 15s using silence detection
|
| 140 |
+
non_silent_segs = silence.split_on_silence(
|
| 141 |
+
aseg, min_silence_len=1000, silence_thresh=-50, keep_silence=1000, seek_step=10
|
| 142 |
+
)
|
| 143 |
+
non_silent_wave = AudioSegment.silent(duration=0)
|
| 144 |
+
for seg in non_silent_segs:
|
| 145 |
+
if len(non_silent_wave) > 6000 and len(non_silent_wave + seg) > 15000:
|
| 146 |
+
show_info("Audio over 15s, clipping.")
|
| 147 |
+
break
|
| 148 |
+
non_silent_wave += seg
|
| 149 |
+
|
| 150 |
+
if len(non_silent_wave) > 15000:
|
| 151 |
+
non_silent_segs = silence.split_on_silence(
|
| 152 |
+
aseg, min_silence_len=100, silence_thresh=-40, keep_silence=1000, seek_step=10
|
| 153 |
+
)
|
| 154 |
+
non_silent_wave = AudioSegment.silent(duration=0)
|
| 155 |
+
for seg in non_silent_segs:
|
| 156 |
+
if len(non_silent_wave) > 6000 and len(non_silent_wave + seg) > 15000:
|
| 157 |
+
break
|
| 158 |
+
non_silent_wave += seg
|
| 159 |
+
|
| 160 |
+
aseg = non_silent_wave
|
| 161 |
+
if len(aseg) > 15000:
|
| 162 |
+
aseg = aseg[:15000]
|
| 163 |
+
show_info("Audio over 15s, hard clip.")
|
| 164 |
+
|
| 165 |
+
aseg = remove_silence_edges(aseg) + AudioSegment.silent(duration=50)
|
| 166 |
+
aseg.export(temp_path, format="wav")
|
| 167 |
+
processed_audio = temp_path
|
| 168 |
+
_ref_audio_cache[audio_hash] = processed_audio
|
| 169 |
+
|
| 170 |
+
# Bengali transcription if no ref_text
|
| 171 |
+
if not ref_text.strip():
|
| 172 |
+
show_info("Transcribing with Bengali ASR...")
|
| 173 |
+
ref_text = transcribe_bengali(processed_audio)
|
| 174 |
+
|
| 175 |
+
# Ensure proper ending punctuation
|
| 176 |
+
if not ref_text.endswith(". ") and not ref_text.endswith("।"):
|
| 177 |
+
if ref_text.endswith("."):
|
| 178 |
+
ref_text += " "
|
| 179 |
+
else:
|
| 180 |
+
ref_text += "। "
|
| 181 |
+
|
| 182 |
+
print("ref_text:", ref_text)
|
| 183 |
+
return processed_audio, ref_text
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
@gpu_decorator
|
| 187 |
+
def generate_tts(ref_audio, gen_text, speed):
|
| 188 |
+
if ref_audio is None:
|
| 189 |
+
return None, "Please provide reference audio."
|
| 190 |
+
if not gen_text.strip():
|
| 191 |
+
return None, "Please enter text to generate."
|
| 192 |
+
|
| 193 |
+
load_models()
|
| 194 |
+
|
| 195 |
+
try:
|
| 196 |
+
ref_audio_processed, ref_text_processed = preprocess_ref_audio_text_bn(
|
| 197 |
+
ref_audio, ""
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
audio, sr, _ = infer_process(
|
| 201 |
+
ref_audio_processed,
|
| 202 |
+
ref_text_processed,
|
| 203 |
+
gen_text,
|
| 204 |
+
ema_model,
|
| 205 |
+
vocoder,
|
| 206 |
+
mel_spec_type="vocos",
|
| 207 |
+
speed=speed,
|
| 208 |
+
device=device,
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
return (sr, audio), f"Generated with ref: '{ref_text_processed[:50]}...'"
|
| 212 |
+
|
| 213 |
+
except Exception as e:
|
| 214 |
+
return None, f"Error: {str(e)}"
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
# Gradio UI
|
| 218 |
+
with gr.Blocks(title="Bengali TTS") as demo:
|
| 219 |
+
gr.Markdown("# Bengali Text-to-Speech")
|
| 220 |
+
gr.Markdown("Upload or record Bengali audio (max 15s) as reference, then generate speech.")
|
| 221 |
+
|
| 222 |
+
with gr.Row():
|
| 223 |
+
with gr.Column():
|
| 224 |
+
ref_audio = gr.Audio(
|
| 225 |
+
label="Reference Audio (record or upload)",
|
| 226 |
+
type="filepath",
|
| 227 |
+
max_length=15,
|
| 228 |
+
)
|
| 229 |
+
gen_text = gr.Textbox(
|
| 230 |
+
label="Text to Generate (Bengali)",
|
| 231 |
+
placeholder="Enter Bengali text here...",
|
| 232 |
+
lines=3,
|
| 233 |
+
)
|
| 234 |
+
speed = gr.Slider(
|
| 235 |
+
minimum=0.5,
|
| 236 |
+
maximum=2.0,
|
| 237 |
+
value=1.0,
|
| 238 |
+
step=0.1,
|
| 239 |
+
label="Speed",
|
| 240 |
+
)
|
| 241 |
+
generate_btn = gr.Button("Generate", variant="primary")
|
| 242 |
+
|
| 243 |
+
with gr.Column():
|
| 244 |
+
output_audio = gr.Audio(label="Generated Audio", type="numpy")
|
| 245 |
+
status = gr.Textbox(label="Status", interactive=False)
|
| 246 |
+
|
| 247 |
+
generate_btn.click(
|
| 248 |
+
fn=generate_tts,
|
| 249 |
+
inputs=[ref_audio, gen_text, speed],
|
| 250 |
+
outputs=[output_audio, status],
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
if __name__ == "__main__":
|
| 254 |
+
demo.launch()
|
data/Bengali/vocab.txt
ADDED
|
@@ -0,0 +1,2545 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
!
|
| 3 |
+
"
|
| 4 |
+
#
|
| 5 |
+
$
|
| 6 |
+
%
|
| 7 |
+
&
|
| 8 |
+
'
|
| 9 |
+
(
|
| 10 |
+
)
|
| 11 |
+
*
|
| 12 |
+
+
|
| 13 |
+
,
|
| 14 |
+
-
|
| 15 |
+
.
|
| 16 |
+
/
|
| 17 |
+
0
|
| 18 |
+
1
|
| 19 |
+
2
|
| 20 |
+
3
|
| 21 |
+
4
|
| 22 |
+
5
|
| 23 |
+
6
|
| 24 |
+
7
|
| 25 |
+
8
|
| 26 |
+
9
|
| 27 |
+
:
|
| 28 |
+
;
|
| 29 |
+
=
|
| 30 |
+
>
|
| 31 |
+
?
|
| 32 |
+
@
|
| 33 |
+
A
|
| 34 |
+
B
|
| 35 |
+
C
|
| 36 |
+
D
|
| 37 |
+
E
|
| 38 |
+
F
|
| 39 |
+
G
|
| 40 |
+
H
|
| 41 |
+
I
|
| 42 |
+
J
|
| 43 |
+
K
|
| 44 |
+
L
|
| 45 |
+
M
|
| 46 |
+
N
|
| 47 |
+
O
|
| 48 |
+
P
|
| 49 |
+
Q
|
| 50 |
+
R
|
| 51 |
+
S
|
| 52 |
+
T
|
| 53 |
+
U
|
| 54 |
+
V
|
| 55 |
+
W
|
| 56 |
+
X
|
| 57 |
+
Y
|
| 58 |
+
Z
|
| 59 |
+
[
|
| 60 |
+
\
|
| 61 |
+
]
|
| 62 |
+
_
|
| 63 |
+
a
|
| 64 |
+
a1
|
| 65 |
+
ai1
|
| 66 |
+
ai2
|
| 67 |
+
ai3
|
| 68 |
+
ai4
|
| 69 |
+
an1
|
| 70 |
+
an3
|
| 71 |
+
an4
|
| 72 |
+
ang1
|
| 73 |
+
ang2
|
| 74 |
+
ang4
|
| 75 |
+
ao1
|
| 76 |
+
ao2
|
| 77 |
+
ao3
|
| 78 |
+
ao4
|
| 79 |
+
b
|
| 80 |
+
ba
|
| 81 |
+
ba1
|
| 82 |
+
ba2
|
| 83 |
+
ba3
|
| 84 |
+
ba4
|
| 85 |
+
bai1
|
| 86 |
+
bai2
|
| 87 |
+
bai3
|
| 88 |
+
bai4
|
| 89 |
+
ban1
|
| 90 |
+
ban2
|
| 91 |
+
ban3
|
| 92 |
+
ban4
|
| 93 |
+
bang1
|
| 94 |
+
bang2
|
| 95 |
+
bang3
|
| 96 |
+
bang4
|
| 97 |
+
bao1
|
| 98 |
+
bao2
|
| 99 |
+
bao3
|
| 100 |
+
bao4
|
| 101 |
+
bei
|
| 102 |
+
bei1
|
| 103 |
+
bei2
|
| 104 |
+
bei3
|
| 105 |
+
bei4
|
| 106 |
+
ben1
|
| 107 |
+
ben2
|
| 108 |
+
ben3
|
| 109 |
+
ben4
|
| 110 |
+
beng
|
| 111 |
+
beng1
|
| 112 |
+
beng2
|
| 113 |
+
beng3
|
| 114 |
+
beng4
|
| 115 |
+
bi1
|
| 116 |
+
bi2
|
| 117 |
+
bi3
|
| 118 |
+
bi4
|
| 119 |
+
bian1
|
| 120 |
+
bian2
|
| 121 |
+
bian3
|
| 122 |
+
bian4
|
| 123 |
+
biao1
|
| 124 |
+
biao2
|
| 125 |
+
biao3
|
| 126 |
+
bie1
|
| 127 |
+
bie2
|
| 128 |
+
bie3
|
| 129 |
+
bie4
|
| 130 |
+
bin1
|
| 131 |
+
bin4
|
| 132 |
+
bing1
|
| 133 |
+
bing2
|
| 134 |
+
bing3
|
| 135 |
+
bing4
|
| 136 |
+
bo
|
| 137 |
+
bo1
|
| 138 |
+
bo2
|
| 139 |
+
bo3
|
| 140 |
+
bo4
|
| 141 |
+
bu2
|
| 142 |
+
bu3
|
| 143 |
+
bu4
|
| 144 |
+
c
|
| 145 |
+
ca1
|
| 146 |
+
cai1
|
| 147 |
+
cai2
|
| 148 |
+
cai3
|
| 149 |
+
cai4
|
| 150 |
+
can1
|
| 151 |
+
can2
|
| 152 |
+
can3
|
| 153 |
+
can4
|
| 154 |
+
cang1
|
| 155 |
+
cang2
|
| 156 |
+
cao1
|
| 157 |
+
cao2
|
| 158 |
+
cao3
|
| 159 |
+
ce4
|
| 160 |
+
cen1
|
| 161 |
+
cen2
|
| 162 |
+
ceng1
|
| 163 |
+
ceng2
|
| 164 |
+
ceng4
|
| 165 |
+
cha1
|
| 166 |
+
cha2
|
| 167 |
+
cha3
|
| 168 |
+
cha4
|
| 169 |
+
chai1
|
| 170 |
+
chai2
|
| 171 |
+
chan1
|
| 172 |
+
chan2
|
| 173 |
+
chan3
|
| 174 |
+
chan4
|
| 175 |
+
chang1
|
| 176 |
+
chang2
|
| 177 |
+
chang3
|
| 178 |
+
chang4
|
| 179 |
+
chao1
|
| 180 |
+
chao2
|
| 181 |
+
chao3
|
| 182 |
+
che1
|
| 183 |
+
che2
|
| 184 |
+
che3
|
| 185 |
+
che4
|
| 186 |
+
chen1
|
| 187 |
+
chen2
|
| 188 |
+
chen3
|
| 189 |
+
chen4
|
| 190 |
+
cheng1
|
| 191 |
+
cheng2
|
| 192 |
+
cheng3
|
| 193 |
+
cheng4
|
| 194 |
+
chi1
|
| 195 |
+
chi2
|
| 196 |
+
chi3
|
| 197 |
+
chi4
|
| 198 |
+
chong1
|
| 199 |
+
chong2
|
| 200 |
+
chong3
|
| 201 |
+
chong4
|
| 202 |
+
chou1
|
| 203 |
+
chou2
|
| 204 |
+
chou3
|
| 205 |
+
chou4
|
| 206 |
+
chu1
|
| 207 |
+
chu2
|
| 208 |
+
chu3
|
| 209 |
+
chu4
|
| 210 |
+
chua1
|
| 211 |
+
chuai1
|
| 212 |
+
chuai2
|
| 213 |
+
chuai3
|
| 214 |
+
chuai4
|
| 215 |
+
chuan1
|
| 216 |
+
chuan2
|
| 217 |
+
chuan3
|
| 218 |
+
chuan4
|
| 219 |
+
chuang1
|
| 220 |
+
chuang2
|
| 221 |
+
chuang3
|
| 222 |
+
chuang4
|
| 223 |
+
chui1
|
| 224 |
+
chui2
|
| 225 |
+
chun1
|
| 226 |
+
chun2
|
| 227 |
+
chun3
|
| 228 |
+
chuo1
|
| 229 |
+
chuo4
|
| 230 |
+
ci1
|
| 231 |
+
ci2
|
| 232 |
+
ci3
|
| 233 |
+
ci4
|
| 234 |
+
cong1
|
| 235 |
+
cong2
|
| 236 |
+
cou4
|
| 237 |
+
cu1
|
| 238 |
+
cu4
|
| 239 |
+
cuan1
|
| 240 |
+
cuan2
|
| 241 |
+
cuan4
|
| 242 |
+
cui1
|
| 243 |
+
cui3
|
| 244 |
+
cui4
|
| 245 |
+
cun1
|
| 246 |
+
cun2
|
| 247 |
+
cun4
|
| 248 |
+
cuo1
|
| 249 |
+
cuo2
|
| 250 |
+
cuo4
|
| 251 |
+
d
|
| 252 |
+
da
|
| 253 |
+
da1
|
| 254 |
+
da2
|
| 255 |
+
da3
|
| 256 |
+
da4
|
| 257 |
+
dai1
|
| 258 |
+
dai2
|
| 259 |
+
dai3
|
| 260 |
+
dai4
|
| 261 |
+
dan1
|
| 262 |
+
dan2
|
| 263 |
+
dan3
|
| 264 |
+
dan4
|
| 265 |
+
dang1
|
| 266 |
+
dang2
|
| 267 |
+
dang3
|
| 268 |
+
dang4
|
| 269 |
+
dao1
|
| 270 |
+
dao2
|
| 271 |
+
dao3
|
| 272 |
+
dao4
|
| 273 |
+
de
|
| 274 |
+
de1
|
| 275 |
+
de2
|
| 276 |
+
dei3
|
| 277 |
+
den4
|
| 278 |
+
deng1
|
| 279 |
+
deng2
|
| 280 |
+
deng3
|
| 281 |
+
deng4
|
| 282 |
+
di1
|
| 283 |
+
di2
|
| 284 |
+
di3
|
| 285 |
+
di4
|
| 286 |
+
dia3
|
| 287 |
+
dian1
|
| 288 |
+
dian2
|
| 289 |
+
dian3
|
| 290 |
+
dian4
|
| 291 |
+
diao1
|
| 292 |
+
diao3
|
| 293 |
+
diao4
|
| 294 |
+
die1
|
| 295 |
+
die2
|
| 296 |
+
die4
|
| 297 |
+
ding1
|
| 298 |
+
ding2
|
| 299 |
+
ding3
|
| 300 |
+
ding4
|
| 301 |
+
diu1
|
| 302 |
+
dong1
|
| 303 |
+
dong3
|
| 304 |
+
dong4
|
| 305 |
+
dou1
|
| 306 |
+
dou2
|
| 307 |
+
dou3
|
| 308 |
+
dou4
|
| 309 |
+
du1
|
| 310 |
+
du2
|
| 311 |
+
du3
|
| 312 |
+
du4
|
| 313 |
+
duan1
|
| 314 |
+
duan2
|
| 315 |
+
duan3
|
| 316 |
+
duan4
|
| 317 |
+
dui1
|
| 318 |
+
dui4
|
| 319 |
+
dun1
|
| 320 |
+
dun3
|
| 321 |
+
dun4
|
| 322 |
+
duo1
|
| 323 |
+
duo2
|
| 324 |
+
duo3
|
| 325 |
+
duo4
|
| 326 |
+
e
|
| 327 |
+
e1
|
| 328 |
+
e2
|
| 329 |
+
e3
|
| 330 |
+
e4
|
| 331 |
+
ei2
|
| 332 |
+
en1
|
| 333 |
+
en4
|
| 334 |
+
er
|
| 335 |
+
er2
|
| 336 |
+
er3
|
| 337 |
+
er4
|
| 338 |
+
f
|
| 339 |
+
fa1
|
| 340 |
+
fa2
|
| 341 |
+
fa3
|
| 342 |
+
fa4
|
| 343 |
+
fan1
|
| 344 |
+
fan2
|
| 345 |
+
fan3
|
| 346 |
+
fan4
|
| 347 |
+
fang1
|
| 348 |
+
fang2
|
| 349 |
+
fang3
|
| 350 |
+
fang4
|
| 351 |
+
fei1
|
| 352 |
+
fei2
|
| 353 |
+
fei3
|
| 354 |
+
fei4
|
| 355 |
+
fen1
|
| 356 |
+
fen2
|
| 357 |
+
fen3
|
| 358 |
+
fen4
|
| 359 |
+
feng1
|
| 360 |
+
feng2
|
| 361 |
+
feng3
|
| 362 |
+
feng4
|
| 363 |
+
fo2
|
| 364 |
+
fou2
|
| 365 |
+
fou3
|
| 366 |
+
fu1
|
| 367 |
+
fu2
|
| 368 |
+
fu3
|
| 369 |
+
fu4
|
| 370 |
+
g
|
| 371 |
+
ga1
|
| 372 |
+
ga2
|
| 373 |
+
ga3
|
| 374 |
+
ga4
|
| 375 |
+
gai1
|
| 376 |
+
gai2
|
| 377 |
+
gai3
|
| 378 |
+
gai4
|
| 379 |
+
gan1
|
| 380 |
+
gan2
|
| 381 |
+
gan3
|
| 382 |
+
gan4
|
| 383 |
+
gang1
|
| 384 |
+
gang2
|
| 385 |
+
gang3
|
| 386 |
+
gang4
|
| 387 |
+
gao1
|
| 388 |
+
gao2
|
| 389 |
+
gao3
|
| 390 |
+
gao4
|
| 391 |
+
ge1
|
| 392 |
+
ge2
|
| 393 |
+
ge3
|
| 394 |
+
ge4
|
| 395 |
+
gei2
|
| 396 |
+
gei3
|
| 397 |
+
gen1
|
| 398 |
+
gen2
|
| 399 |
+
gen3
|
| 400 |
+
gen4
|
| 401 |
+
geng1
|
| 402 |
+
geng3
|
| 403 |
+
geng4
|
| 404 |
+
gong1
|
| 405 |
+
gong3
|
| 406 |
+
gong4
|
| 407 |
+
gou1
|
| 408 |
+
gou2
|
| 409 |
+
gou3
|
| 410 |
+
gou4
|
| 411 |
+
gu
|
| 412 |
+
gu1
|
| 413 |
+
gu2
|
| 414 |
+
gu3
|
| 415 |
+
gu4
|
| 416 |
+
gua1
|
| 417 |
+
gua2
|
| 418 |
+
gua3
|
| 419 |
+
gua4
|
| 420 |
+
guai1
|
| 421 |
+
guai2
|
| 422 |
+
guai3
|
| 423 |
+
guai4
|
| 424 |
+
guan1
|
| 425 |
+
guan2
|
| 426 |
+
guan3
|
| 427 |
+
guan4
|
| 428 |
+
guang1
|
| 429 |
+
guang2
|
| 430 |
+
guang3
|
| 431 |
+
guang4
|
| 432 |
+
gui1
|
| 433 |
+
gui2
|
| 434 |
+
gui3
|
| 435 |
+
gui4
|
| 436 |
+
gun3
|
| 437 |
+
gun4
|
| 438 |
+
guo1
|
| 439 |
+
guo2
|
| 440 |
+
guo3
|
| 441 |
+
guo4
|
| 442 |
+
h
|
| 443 |
+
ha1
|
| 444 |
+
ha2
|
| 445 |
+
ha3
|
| 446 |
+
hai1
|
| 447 |
+
hai2
|
| 448 |
+
hai3
|
| 449 |
+
hai4
|
| 450 |
+
han1
|
| 451 |
+
han2
|
| 452 |
+
han3
|
| 453 |
+
han4
|
| 454 |
+
hang1
|
| 455 |
+
hang2
|
| 456 |
+
hang4
|
| 457 |
+
hao1
|
| 458 |
+
hao2
|
| 459 |
+
hao3
|
| 460 |
+
hao4
|
| 461 |
+
he1
|
| 462 |
+
he2
|
| 463 |
+
he4
|
| 464 |
+
hei1
|
| 465 |
+
hen2
|
| 466 |
+
hen3
|
| 467 |
+
hen4
|
| 468 |
+
heng1
|
| 469 |
+
heng2
|
| 470 |
+
heng4
|
| 471 |
+
hong1
|
| 472 |
+
hong2
|
| 473 |
+
hong3
|
| 474 |
+
hong4
|
| 475 |
+
hou1
|
| 476 |
+
hou2
|
| 477 |
+
hou3
|
| 478 |
+
hou4
|
| 479 |
+
hu1
|
| 480 |
+
hu2
|
| 481 |
+
hu3
|
| 482 |
+
hu4
|
| 483 |
+
hua1
|
| 484 |
+
hua2
|
| 485 |
+
hua4
|
| 486 |
+
huai2
|
| 487 |
+
huai4
|
| 488 |
+
huan1
|
| 489 |
+
huan2
|
| 490 |
+
huan3
|
| 491 |
+
huan4
|
| 492 |
+
huang1
|
| 493 |
+
huang2
|
| 494 |
+
huang3
|
| 495 |
+
huang4
|
| 496 |
+
hui1
|
| 497 |
+
hui2
|
| 498 |
+
hui3
|
| 499 |
+
hui4
|
| 500 |
+
hun1
|
| 501 |
+
hun2
|
| 502 |
+
hun4
|
| 503 |
+
huo
|
| 504 |
+
huo1
|
| 505 |
+
huo2
|
| 506 |
+
huo3
|
| 507 |
+
huo4
|
| 508 |
+
i
|
| 509 |
+
j
|
| 510 |
+
ji1
|
| 511 |
+
ji2
|
| 512 |
+
ji3
|
| 513 |
+
ji4
|
| 514 |
+
jia
|
| 515 |
+
jia1
|
| 516 |
+
jia2
|
| 517 |
+
jia3
|
| 518 |
+
jia4
|
| 519 |
+
jian1
|
| 520 |
+
jian2
|
| 521 |
+
jian3
|
| 522 |
+
jian4
|
| 523 |
+
jiang1
|
| 524 |
+
jiang2
|
| 525 |
+
jiang3
|
| 526 |
+
jiang4
|
| 527 |
+
jiao1
|
| 528 |
+
jiao2
|
| 529 |
+
jiao3
|
| 530 |
+
jiao4
|
| 531 |
+
jie1
|
| 532 |
+
jie2
|
| 533 |
+
jie3
|
| 534 |
+
jie4
|
| 535 |
+
jin1
|
| 536 |
+
jin2
|
| 537 |
+
jin3
|
| 538 |
+
jin4
|
| 539 |
+
jing1
|
| 540 |
+
jing2
|
| 541 |
+
jing3
|
| 542 |
+
jing4
|
| 543 |
+
jiong3
|
| 544 |
+
jiu1
|
| 545 |
+
jiu2
|
| 546 |
+
jiu3
|
| 547 |
+
jiu4
|
| 548 |
+
ju1
|
| 549 |
+
ju2
|
| 550 |
+
ju3
|
| 551 |
+
ju4
|
| 552 |
+
juan1
|
| 553 |
+
juan2
|
| 554 |
+
juan3
|
| 555 |
+
juan4
|
| 556 |
+
jue1
|
| 557 |
+
jue2
|
| 558 |
+
jue4
|
| 559 |
+
jun1
|
| 560 |
+
jun4
|
| 561 |
+
k
|
| 562 |
+
ka1
|
| 563 |
+
ka2
|
| 564 |
+
ka3
|
| 565 |
+
kai1
|
| 566 |
+
kai2
|
| 567 |
+
kai3
|
| 568 |
+
kai4
|
| 569 |
+
kan1
|
| 570 |
+
kan2
|
| 571 |
+
kan3
|
| 572 |
+
kan4
|
| 573 |
+
kang1
|
| 574 |
+
kang2
|
| 575 |
+
kang4
|
| 576 |
+
kao1
|
| 577 |
+
kao2
|
| 578 |
+
kao3
|
| 579 |
+
kao4
|
| 580 |
+
ke1
|
| 581 |
+
ke2
|
| 582 |
+
ke3
|
| 583 |
+
ke4
|
| 584 |
+
ken3
|
| 585 |
+
keng1
|
| 586 |
+
kong1
|
| 587 |
+
kong3
|
| 588 |
+
kong4
|
| 589 |
+
kou1
|
| 590 |
+
kou2
|
| 591 |
+
kou3
|
| 592 |
+
kou4
|
| 593 |
+
ku1
|
| 594 |
+
ku2
|
| 595 |
+
ku3
|
| 596 |
+
ku4
|
| 597 |
+
kua1
|
| 598 |
+
kua3
|
| 599 |
+
kua4
|
| 600 |
+
kuai3
|
| 601 |
+
kuai4
|
| 602 |
+
kuan1
|
| 603 |
+
kuan2
|
| 604 |
+
kuan3
|
| 605 |
+
kuang1
|
| 606 |
+
kuang2
|
| 607 |
+
kuang4
|
| 608 |
+
kui1
|
| 609 |
+
kui2
|
| 610 |
+
kui3
|
| 611 |
+
kui4
|
| 612 |
+
kun1
|
| 613 |
+
kun3
|
| 614 |
+
kun4
|
| 615 |
+
kuo4
|
| 616 |
+
l
|
| 617 |
+
la
|
| 618 |
+
la1
|
| 619 |
+
la2
|
| 620 |
+
la3
|
| 621 |
+
la4
|
| 622 |
+
lai2
|
| 623 |
+
lai4
|
| 624 |
+
lan2
|
| 625 |
+
lan3
|
| 626 |
+
lan4
|
| 627 |
+
lang1
|
| 628 |
+
lang2
|
| 629 |
+
lang3
|
| 630 |
+
lang4
|
| 631 |
+
lao1
|
| 632 |
+
lao2
|
| 633 |
+
lao3
|
| 634 |
+
lao4
|
| 635 |
+
le
|
| 636 |
+
le1
|
| 637 |
+
le4
|
| 638 |
+
lei
|
| 639 |
+
lei1
|
| 640 |
+
lei2
|
| 641 |
+
lei3
|
| 642 |
+
lei4
|
| 643 |
+
leng1
|
| 644 |
+
leng2
|
| 645 |
+
leng3
|
| 646 |
+
leng4
|
| 647 |
+
li
|
| 648 |
+
li1
|
| 649 |
+
li2
|
| 650 |
+
li3
|
| 651 |
+
li4
|
| 652 |
+
lia3
|
| 653 |
+
lian2
|
| 654 |
+
lian3
|
| 655 |
+
lian4
|
| 656 |
+
liang2
|
| 657 |
+
liang3
|
| 658 |
+
liang4
|
| 659 |
+
liao1
|
| 660 |
+
liao2
|
| 661 |
+
liao3
|
| 662 |
+
liao4
|
| 663 |
+
lie1
|
| 664 |
+
lie2
|
| 665 |
+
lie3
|
| 666 |
+
lie4
|
| 667 |
+
lin1
|
| 668 |
+
lin2
|
| 669 |
+
lin3
|
| 670 |
+
lin4
|
| 671 |
+
ling2
|
| 672 |
+
ling3
|
| 673 |
+
ling4
|
| 674 |
+
liu1
|
| 675 |
+
liu2
|
| 676 |
+
liu3
|
| 677 |
+
liu4
|
| 678 |
+
long1
|
| 679 |
+
long2
|
| 680 |
+
long3
|
| 681 |
+
long4
|
| 682 |
+
lou1
|
| 683 |
+
lou2
|
| 684 |
+
lou3
|
| 685 |
+
lou4
|
| 686 |
+
lu1
|
| 687 |
+
lu2
|
| 688 |
+
lu3
|
| 689 |
+
lu4
|
| 690 |
+
luan2
|
| 691 |
+
luan3
|
| 692 |
+
luan4
|
| 693 |
+
lun1
|
| 694 |
+
lun2
|
| 695 |
+
lun4
|
| 696 |
+
luo1
|
| 697 |
+
luo2
|
| 698 |
+
luo3
|
| 699 |
+
luo4
|
| 700 |
+
lv2
|
| 701 |
+
lv3
|
| 702 |
+
lv4
|
| 703 |
+
lve3
|
| 704 |
+
lve4
|
| 705 |
+
m
|
| 706 |
+
ma
|
| 707 |
+
ma1
|
| 708 |
+
ma2
|
| 709 |
+
ma3
|
| 710 |
+
ma4
|
| 711 |
+
mai2
|
| 712 |
+
mai3
|
| 713 |
+
mai4
|
| 714 |
+
man1
|
| 715 |
+
man2
|
| 716 |
+
man3
|
| 717 |
+
man4
|
| 718 |
+
mang2
|
| 719 |
+
mang3
|
| 720 |
+
mao1
|
| 721 |
+
mao2
|
| 722 |
+
mao3
|
| 723 |
+
mao4
|
| 724 |
+
me
|
| 725 |
+
mei2
|
| 726 |
+
mei3
|
| 727 |
+
mei4
|
| 728 |
+
men
|
| 729 |
+
men1
|
| 730 |
+
men2
|
| 731 |
+
men4
|
| 732 |
+
meng
|
| 733 |
+
meng1
|
| 734 |
+
meng2
|
| 735 |
+
meng3
|
| 736 |
+
meng4
|
| 737 |
+
mi1
|
| 738 |
+
mi2
|
| 739 |
+
mi3
|
| 740 |
+
mi4
|
| 741 |
+
mian2
|
| 742 |
+
mian3
|
| 743 |
+
mian4
|
| 744 |
+
miao1
|
| 745 |
+
miao2
|
| 746 |
+
miao3
|
| 747 |
+
miao4
|
| 748 |
+
mie1
|
| 749 |
+
mie4
|
| 750 |
+
min2
|
| 751 |
+
min3
|
| 752 |
+
ming2
|
| 753 |
+
ming3
|
| 754 |
+
ming4
|
| 755 |
+
miu4
|
| 756 |
+
mo1
|
| 757 |
+
mo2
|
| 758 |
+
mo3
|
| 759 |
+
mo4
|
| 760 |
+
mou1
|
| 761 |
+
mou2
|
| 762 |
+
mou3
|
| 763 |
+
mu2
|
| 764 |
+
mu3
|
| 765 |
+
mu4
|
| 766 |
+
n
|
| 767 |
+
n2
|
| 768 |
+
na1
|
| 769 |
+
na2
|
| 770 |
+
na3
|
| 771 |
+
na4
|
| 772 |
+
nai2
|
| 773 |
+
nai3
|
| 774 |
+
nai4
|
| 775 |
+
nan1
|
| 776 |
+
nan2
|
| 777 |
+
nan3
|
| 778 |
+
nan4
|
| 779 |
+
nang1
|
| 780 |
+
nang2
|
| 781 |
+
nang3
|
| 782 |
+
nao1
|
| 783 |
+
nao2
|
| 784 |
+
nao3
|
| 785 |
+
nao4
|
| 786 |
+
ne
|
| 787 |
+
ne2
|
| 788 |
+
ne4
|
| 789 |
+
nei3
|
| 790 |
+
nei4
|
| 791 |
+
nen4
|
| 792 |
+
neng2
|
| 793 |
+
ni1
|
| 794 |
+
ni2
|
| 795 |
+
ni3
|
| 796 |
+
ni4
|
| 797 |
+
nian1
|
| 798 |
+
nian2
|
| 799 |
+
nian3
|
| 800 |
+
nian4
|
| 801 |
+
niang2
|
| 802 |
+
niang4
|
| 803 |
+
niao2
|
| 804 |
+
niao3
|
| 805 |
+
niao4
|
| 806 |
+
nie1
|
| 807 |
+
nie4
|
| 808 |
+
nin2
|
| 809 |
+
ning2
|
| 810 |
+
ning3
|
| 811 |
+
ning4
|
| 812 |
+
niu1
|
| 813 |
+
niu2
|
| 814 |
+
niu3
|
| 815 |
+
niu4
|
| 816 |
+
nong2
|
| 817 |
+
nong4
|
| 818 |
+
nou4
|
| 819 |
+
nu2
|
| 820 |
+
nu3
|
| 821 |
+
nu4
|
| 822 |
+
nuan3
|
| 823 |
+
nuo2
|
| 824 |
+
nuo4
|
| 825 |
+
nv2
|
| 826 |
+
nv3
|
| 827 |
+
nve4
|
| 828 |
+
o
|
| 829 |
+
o1
|
| 830 |
+
o2
|
| 831 |
+
ou1
|
| 832 |
+
ou2
|
| 833 |
+
ou3
|
| 834 |
+
ou4
|
| 835 |
+
p
|
| 836 |
+
pa1
|
| 837 |
+
pa2
|
| 838 |
+
pa4
|
| 839 |
+
pai1
|
| 840 |
+
pai2
|
| 841 |
+
pai3
|
| 842 |
+
pai4
|
| 843 |
+
pan1
|
| 844 |
+
pan2
|
| 845 |
+
pan4
|
| 846 |
+
pang1
|
| 847 |
+
pang2
|
| 848 |
+
pang4
|
| 849 |
+
pao1
|
| 850 |
+
pao2
|
| 851 |
+
pao3
|
| 852 |
+
pao4
|
| 853 |
+
pei1
|
| 854 |
+
pei2
|
| 855 |
+
pei4
|
| 856 |
+
pen1
|
| 857 |
+
pen2
|
| 858 |
+
pen4
|
| 859 |
+
peng1
|
| 860 |
+
peng2
|
| 861 |
+
peng3
|
| 862 |
+
peng4
|
| 863 |
+
pi1
|
| 864 |
+
pi2
|
| 865 |
+
pi3
|
| 866 |
+
pi4
|
| 867 |
+
pian1
|
| 868 |
+
pian2
|
| 869 |
+
pian4
|
| 870 |
+
piao1
|
| 871 |
+
piao2
|
| 872 |
+
piao3
|
| 873 |
+
piao4
|
| 874 |
+
pie1
|
| 875 |
+
pie2
|
| 876 |
+
pie3
|
| 877 |
+
pin1
|
| 878 |
+
pin2
|
| 879 |
+
pin3
|
| 880 |
+
pin4
|
| 881 |
+
ping1
|
| 882 |
+
ping2
|
| 883 |
+
po1
|
| 884 |
+
po2
|
| 885 |
+
po3
|
| 886 |
+
po4
|
| 887 |
+
pou1
|
| 888 |
+
pu1
|
| 889 |
+
pu2
|
| 890 |
+
pu3
|
| 891 |
+
pu4
|
| 892 |
+
q
|
| 893 |
+
qi1
|
| 894 |
+
qi2
|
| 895 |
+
qi3
|
| 896 |
+
qi4
|
| 897 |
+
qia1
|
| 898 |
+
qia3
|
| 899 |
+
qia4
|
| 900 |
+
qian1
|
| 901 |
+
qian2
|
| 902 |
+
qian3
|
| 903 |
+
qian4
|
| 904 |
+
qiang1
|
| 905 |
+
qiang2
|
| 906 |
+
qiang3
|
| 907 |
+
qiang4
|
| 908 |
+
qiao1
|
| 909 |
+
qiao2
|
| 910 |
+
qiao3
|
| 911 |
+
qiao4
|
| 912 |
+
qie1
|
| 913 |
+
qie2
|
| 914 |
+
qie3
|
| 915 |
+
qie4
|
| 916 |
+
qin1
|
| 917 |
+
qin2
|
| 918 |
+
qin3
|
| 919 |
+
qin4
|
| 920 |
+
qing1
|
| 921 |
+
qing2
|
| 922 |
+
qing3
|
| 923 |
+
qing4
|
| 924 |
+
qiong1
|
| 925 |
+
qiong2
|
| 926 |
+
qiu1
|
| 927 |
+
qiu2
|
| 928 |
+
qiu3
|
| 929 |
+
qu1
|
| 930 |
+
qu2
|
| 931 |
+
qu3
|
| 932 |
+
qu4
|
| 933 |
+
quan1
|
| 934 |
+
quan2
|
| 935 |
+
quan3
|
| 936 |
+
quan4
|
| 937 |
+
que1
|
| 938 |
+
que2
|
| 939 |
+
que4
|
| 940 |
+
qun2
|
| 941 |
+
r
|
| 942 |
+
ran2
|
| 943 |
+
ran3
|
| 944 |
+
rang1
|
| 945 |
+
rang2
|
| 946 |
+
rang3
|
| 947 |
+
rang4
|
| 948 |
+
rao2
|
| 949 |
+
rao3
|
| 950 |
+
rao4
|
| 951 |
+
re2
|
| 952 |
+
re3
|
| 953 |
+
re4
|
| 954 |
+
ren2
|
| 955 |
+
ren3
|
| 956 |
+
ren4
|
| 957 |
+
reng1
|
| 958 |
+
reng2
|
| 959 |
+
ri4
|
| 960 |
+
rong1
|
| 961 |
+
rong2
|
| 962 |
+
rong3
|
| 963 |
+
rou2
|
| 964 |
+
rou4
|
| 965 |
+
ru2
|
| 966 |
+
ru3
|
| 967 |
+
ru4
|
| 968 |
+
ruan2
|
| 969 |
+
ruan3
|
| 970 |
+
rui3
|
| 971 |
+
rui4
|
| 972 |
+
run4
|
| 973 |
+
ruo4
|
| 974 |
+
s
|
| 975 |
+
sa1
|
| 976 |
+
sa2
|
| 977 |
+
sa3
|
| 978 |
+
sa4
|
| 979 |
+
sai1
|
| 980 |
+
sai4
|
| 981 |
+
san1
|
| 982 |
+
san2
|
| 983 |
+
san3
|
| 984 |
+
san4
|
| 985 |
+
sang1
|
| 986 |
+
sang3
|
| 987 |
+
sang4
|
| 988 |
+
sao1
|
| 989 |
+
sao2
|
| 990 |
+
sao3
|
| 991 |
+
sao4
|
| 992 |
+
se4
|
| 993 |
+
sen1
|
| 994 |
+
seng1
|
| 995 |
+
sha1
|
| 996 |
+
sha2
|
| 997 |
+
sha3
|
| 998 |
+
sha4
|
| 999 |
+
shai1
|
| 1000 |
+
shai2
|
| 1001 |
+
shai3
|
| 1002 |
+
shai4
|
| 1003 |
+
shan1
|
| 1004 |
+
shan3
|
| 1005 |
+
shan4
|
| 1006 |
+
shang
|
| 1007 |
+
shang1
|
| 1008 |
+
shang3
|
| 1009 |
+
shang4
|
| 1010 |
+
shao1
|
| 1011 |
+
shao2
|
| 1012 |
+
shao3
|
| 1013 |
+
shao4
|
| 1014 |
+
she1
|
| 1015 |
+
she2
|
| 1016 |
+
she3
|
| 1017 |
+
she4
|
| 1018 |
+
shei2
|
| 1019 |
+
shen1
|
| 1020 |
+
shen2
|
| 1021 |
+
shen3
|
| 1022 |
+
shen4
|
| 1023 |
+
sheng1
|
| 1024 |
+
sheng2
|
| 1025 |
+
sheng3
|
| 1026 |
+
sheng4
|
| 1027 |
+
shi
|
| 1028 |
+
shi1
|
| 1029 |
+
shi2
|
| 1030 |
+
shi3
|
| 1031 |
+
shi4
|
| 1032 |
+
shou1
|
| 1033 |
+
shou2
|
| 1034 |
+
shou3
|
| 1035 |
+
shou4
|
| 1036 |
+
shu1
|
| 1037 |
+
shu2
|
| 1038 |
+
shu3
|
| 1039 |
+
shu4
|
| 1040 |
+
shua1
|
| 1041 |
+
shua2
|
| 1042 |
+
shua3
|
| 1043 |
+
shua4
|
| 1044 |
+
shuai1
|
| 1045 |
+
shuai3
|
| 1046 |
+
shuai4
|
| 1047 |
+
shuan1
|
| 1048 |
+
shuan4
|
| 1049 |
+
shuang1
|
| 1050 |
+
shuang3
|
| 1051 |
+
shui2
|
| 1052 |
+
shui3
|
| 1053 |
+
shui4
|
| 1054 |
+
shun3
|
| 1055 |
+
shun4
|
| 1056 |
+
shuo1
|
| 1057 |
+
shuo4
|
| 1058 |
+
si1
|
| 1059 |
+
si2
|
| 1060 |
+
si3
|
| 1061 |
+
si4
|
| 1062 |
+
song1
|
| 1063 |
+
song3
|
| 1064 |
+
song4
|
| 1065 |
+
sou1
|
| 1066 |
+
sou3
|
| 1067 |
+
sou4
|
| 1068 |
+
su1
|
| 1069 |
+
su2
|
| 1070 |
+
su4
|
| 1071 |
+
suan1
|
| 1072 |
+
suan4
|
| 1073 |
+
sui1
|
| 1074 |
+
sui2
|
| 1075 |
+
sui3
|
| 1076 |
+
sui4
|
| 1077 |
+
sun1
|
| 1078 |
+
sun3
|
| 1079 |
+
suo
|
| 1080 |
+
suo1
|
| 1081 |
+
suo2
|
| 1082 |
+
suo3
|
| 1083 |
+
t
|
| 1084 |
+
ta1
|
| 1085 |
+
ta2
|
| 1086 |
+
ta3
|
| 1087 |
+
ta4
|
| 1088 |
+
tai1
|
| 1089 |
+
tai2
|
| 1090 |
+
tai4
|
| 1091 |
+
tan1
|
| 1092 |
+
tan2
|
| 1093 |
+
tan3
|
| 1094 |
+
tan4
|
| 1095 |
+
tang1
|
| 1096 |
+
tang2
|
| 1097 |
+
tang3
|
| 1098 |
+
tang4
|
| 1099 |
+
tao1
|
| 1100 |
+
tao2
|
| 1101 |
+
tao3
|
| 1102 |
+
tao4
|
| 1103 |
+
te4
|
| 1104 |
+
teng2
|
| 1105 |
+
ti1
|
| 1106 |
+
ti2
|
| 1107 |
+
ti3
|
| 1108 |
+
ti4
|
| 1109 |
+
tian1
|
| 1110 |
+
tian2
|
| 1111 |
+
tian3
|
| 1112 |
+
tiao1
|
| 1113 |
+
tiao2
|
| 1114 |
+
tiao3
|
| 1115 |
+
tiao4
|
| 1116 |
+
tie1
|
| 1117 |
+
tie2
|
| 1118 |
+
tie3
|
| 1119 |
+
tie4
|
| 1120 |
+
ting1
|
| 1121 |
+
ting2
|
| 1122 |
+
ting3
|
| 1123 |
+
tong1
|
| 1124 |
+
tong2
|
| 1125 |
+
tong3
|
| 1126 |
+
tong4
|
| 1127 |
+
tou
|
| 1128 |
+
tou1
|
| 1129 |
+
tou2
|
| 1130 |
+
tou4
|
| 1131 |
+
tu1
|
| 1132 |
+
tu2
|
| 1133 |
+
tu3
|
| 1134 |
+
tu4
|
| 1135 |
+
tuan1
|
| 1136 |
+
tuan2
|
| 1137 |
+
tui1
|
| 1138 |
+
tui2
|
| 1139 |
+
tui3
|
| 1140 |
+
tui4
|
| 1141 |
+
tun1
|
| 1142 |
+
tun2
|
| 1143 |
+
tun4
|
| 1144 |
+
tuo1
|
| 1145 |
+
tuo2
|
| 1146 |
+
tuo3
|
| 1147 |
+
tuo4
|
| 1148 |
+
u
|
| 1149 |
+
v
|
| 1150 |
+
w
|
| 1151 |
+
wa
|
| 1152 |
+
wa1
|
| 1153 |
+
wa2
|
| 1154 |
+
wa3
|
| 1155 |
+
wa4
|
| 1156 |
+
wai1
|
| 1157 |
+
wai3
|
| 1158 |
+
wai4
|
| 1159 |
+
wan1
|
| 1160 |
+
wan2
|
| 1161 |
+
wan3
|
| 1162 |
+
wan4
|
| 1163 |
+
wang1
|
| 1164 |
+
wang2
|
| 1165 |
+
wang3
|
| 1166 |
+
wang4
|
| 1167 |
+
wei1
|
| 1168 |
+
wei2
|
| 1169 |
+
wei3
|
| 1170 |
+
wei4
|
| 1171 |
+
wen1
|
| 1172 |
+
wen2
|
| 1173 |
+
wen3
|
| 1174 |
+
wen4
|
| 1175 |
+
weng1
|
| 1176 |
+
weng4
|
| 1177 |
+
wo1
|
| 1178 |
+
wo2
|
| 1179 |
+
wo3
|
| 1180 |
+
wo4
|
| 1181 |
+
wu1
|
| 1182 |
+
wu2
|
| 1183 |
+
wu3
|
| 1184 |
+
wu4
|
| 1185 |
+
x
|
| 1186 |
+
xi1
|
| 1187 |
+
xi2
|
| 1188 |
+
xi3
|
| 1189 |
+
xi4
|
| 1190 |
+
xia1
|
| 1191 |
+
xia2
|
| 1192 |
+
xia4
|
| 1193 |
+
xian1
|
| 1194 |
+
xian2
|
| 1195 |
+
xian3
|
| 1196 |
+
xian4
|
| 1197 |
+
xiang1
|
| 1198 |
+
xiang2
|
| 1199 |
+
xiang3
|
| 1200 |
+
xiang4
|
| 1201 |
+
xiao1
|
| 1202 |
+
xiao2
|
| 1203 |
+
xiao3
|
| 1204 |
+
xiao4
|
| 1205 |
+
xie1
|
| 1206 |
+
xie2
|
| 1207 |
+
xie3
|
| 1208 |
+
xie4
|
| 1209 |
+
xin1
|
| 1210 |
+
xin2
|
| 1211 |
+
xin4
|
| 1212 |
+
xing1
|
| 1213 |
+
xing2
|
| 1214 |
+
xing3
|
| 1215 |
+
xing4
|
| 1216 |
+
xiong1
|
| 1217 |
+
xiong2
|
| 1218 |
+
xiu1
|
| 1219 |
+
xiu3
|
| 1220 |
+
xiu4
|
| 1221 |
+
xu
|
| 1222 |
+
xu1
|
| 1223 |
+
xu2
|
| 1224 |
+
xu3
|
| 1225 |
+
xu4
|
| 1226 |
+
xuan1
|
| 1227 |
+
xuan2
|
| 1228 |
+
xuan3
|
| 1229 |
+
xuan4
|
| 1230 |
+
xue1
|
| 1231 |
+
xue2
|
| 1232 |
+
xue3
|
| 1233 |
+
xue4
|
| 1234 |
+
xun1
|
| 1235 |
+
xun2
|
| 1236 |
+
xun4
|
| 1237 |
+
y
|
| 1238 |
+
ya
|
| 1239 |
+
ya1
|
| 1240 |
+
ya2
|
| 1241 |
+
ya3
|
| 1242 |
+
ya4
|
| 1243 |
+
yan1
|
| 1244 |
+
yan2
|
| 1245 |
+
yan3
|
| 1246 |
+
yan4
|
| 1247 |
+
yang1
|
| 1248 |
+
yang2
|
| 1249 |
+
yang3
|
| 1250 |
+
yang4
|
| 1251 |
+
yao1
|
| 1252 |
+
yao2
|
| 1253 |
+
yao3
|
| 1254 |
+
yao4
|
| 1255 |
+
ye1
|
| 1256 |
+
ye2
|
| 1257 |
+
ye3
|
| 1258 |
+
ye4
|
| 1259 |
+
yi
|
| 1260 |
+
yi1
|
| 1261 |
+
yi2
|
| 1262 |
+
yi3
|
| 1263 |
+
yi4
|
| 1264 |
+
yin1
|
| 1265 |
+
yin2
|
| 1266 |
+
yin3
|
| 1267 |
+
yin4
|
| 1268 |
+
ying1
|
| 1269 |
+
ying2
|
| 1270 |
+
ying3
|
| 1271 |
+
ying4
|
| 1272 |
+
yo1
|
| 1273 |
+
yong1
|
| 1274 |
+
yong2
|
| 1275 |
+
yong3
|
| 1276 |
+
yong4
|
| 1277 |
+
you1
|
| 1278 |
+
you2
|
| 1279 |
+
you3
|
| 1280 |
+
you4
|
| 1281 |
+
yu1
|
| 1282 |
+
yu2
|
| 1283 |
+
yu3
|
| 1284 |
+
yu4
|
| 1285 |
+
yuan1
|
| 1286 |
+
yuan2
|
| 1287 |
+
yuan3
|
| 1288 |
+
yuan4
|
| 1289 |
+
yue1
|
| 1290 |
+
yue4
|
| 1291 |
+
yun1
|
| 1292 |
+
yun2
|
| 1293 |
+
yun3
|
| 1294 |
+
yun4
|
| 1295 |
+
z
|
| 1296 |
+
za1
|
| 1297 |
+
za2
|
| 1298 |
+
za3
|
| 1299 |
+
zai1
|
| 1300 |
+
zai3
|
| 1301 |
+
zai4
|
| 1302 |
+
zan1
|
| 1303 |
+
zan2
|
| 1304 |
+
zan3
|
| 1305 |
+
zan4
|
| 1306 |
+
zang1
|
| 1307 |
+
zang4
|
| 1308 |
+
zao1
|
| 1309 |
+
zao2
|
| 1310 |
+
zao3
|
| 1311 |
+
zao4
|
| 1312 |
+
ze2
|
| 1313 |
+
ze4
|
| 1314 |
+
zei2
|
| 1315 |
+
zen3
|
| 1316 |
+
zeng1
|
| 1317 |
+
zeng4
|
| 1318 |
+
zha1
|
| 1319 |
+
zha2
|
| 1320 |
+
zha3
|
| 1321 |
+
zha4
|
| 1322 |
+
zhai1
|
| 1323 |
+
zhai2
|
| 1324 |
+
zhai3
|
| 1325 |
+
zhai4
|
| 1326 |
+
zhan1
|
| 1327 |
+
zhan2
|
| 1328 |
+
zhan3
|
| 1329 |
+
zhan4
|
| 1330 |
+
zhang1
|
| 1331 |
+
zhang2
|
| 1332 |
+
zhang3
|
| 1333 |
+
zhang4
|
| 1334 |
+
zhao1
|
| 1335 |
+
zhao2
|
| 1336 |
+
zhao3
|
| 1337 |
+
zhao4
|
| 1338 |
+
zhe
|
| 1339 |
+
zhe1
|
| 1340 |
+
zhe2
|
| 1341 |
+
zhe3
|
| 1342 |
+
zhe4
|
| 1343 |
+
zhen1
|
| 1344 |
+
zhen2
|
| 1345 |
+
zhen3
|
| 1346 |
+
zhen4
|
| 1347 |
+
zheng1
|
| 1348 |
+
zheng2
|
| 1349 |
+
zheng3
|
| 1350 |
+
zheng4
|
| 1351 |
+
zhi1
|
| 1352 |
+
zhi2
|
| 1353 |
+
zhi3
|
| 1354 |
+
zhi4
|
| 1355 |
+
zhong1
|
| 1356 |
+
zhong2
|
| 1357 |
+
zhong3
|
| 1358 |
+
zhong4
|
| 1359 |
+
zhou1
|
| 1360 |
+
zhou2
|
| 1361 |
+
zhou3
|
| 1362 |
+
zhou4
|
| 1363 |
+
zhu1
|
| 1364 |
+
zhu2
|
| 1365 |
+
zhu3
|
| 1366 |
+
zhu4
|
| 1367 |
+
zhua1
|
| 1368 |
+
zhua2
|
| 1369 |
+
zhua3
|
| 1370 |
+
zhuai1
|
| 1371 |
+
zhuai3
|
| 1372 |
+
zhuai4
|
| 1373 |
+
zhuan1
|
| 1374 |
+
zhuan2
|
| 1375 |
+
zhuan3
|
| 1376 |
+
zhuan4
|
| 1377 |
+
zhuang1
|
| 1378 |
+
zhuang4
|
| 1379 |
+
zhui1
|
| 1380 |
+
zhui4
|
| 1381 |
+
zhun1
|
| 1382 |
+
zhun2
|
| 1383 |
+
zhun3
|
| 1384 |
+
zhuo1
|
| 1385 |
+
zhuo2
|
| 1386 |
+
zi
|
| 1387 |
+
zi1
|
| 1388 |
+
zi2
|
| 1389 |
+
zi3
|
| 1390 |
+
zi4
|
| 1391 |
+
zong1
|
| 1392 |
+
zong2
|
| 1393 |
+
zong3
|
| 1394 |
+
zong4
|
| 1395 |
+
zou1
|
| 1396 |
+
zou2
|
| 1397 |
+
zou3
|
| 1398 |
+
zou4
|
| 1399 |
+
zu1
|
| 1400 |
+
zu2
|
| 1401 |
+
zu3
|
| 1402 |
+
zuan1
|
| 1403 |
+
zuan3
|
| 1404 |
+
zuan4
|
| 1405 |
+
zui2
|
| 1406 |
+
zui3
|
| 1407 |
+
zui4
|
| 1408 |
+
zun1
|
| 1409 |
+
zuo
|
| 1410 |
+
zuo1
|
| 1411 |
+
zuo2
|
| 1412 |
+
zuo3
|
| 1413 |
+
zuo4
|
| 1414 |
+
{
|
| 1415 |
+
~
|
| 1416 |
+
¡
|
| 1417 |
+
¢
|
| 1418 |
+
£
|
| 1419 |
+
¥
|
| 1420 |
+
§
|
| 1421 |
+
¨
|
| 1422 |
+
©
|
| 1423 |
+
«
|
| 1424 |
+
®
|
| 1425 |
+
¯
|
| 1426 |
+
°
|
| 1427 |
+
±
|
| 1428 |
+
²
|
| 1429 |
+
³
|
| 1430 |
+
´
|
| 1431 |
+
µ
|
| 1432 |
+
·
|
| 1433 |
+
¹
|
| 1434 |
+
º
|
| 1435 |
+
»
|
| 1436 |
+
¼
|
| 1437 |
+
½
|
| 1438 |
+
¾
|
| 1439 |
+
¿
|
| 1440 |
+
À
|
| 1441 |
+
Á
|
| 1442 |
+
Â
|
| 1443 |
+
Ã
|
| 1444 |
+
Ä
|
| 1445 |
+
Å
|
| 1446 |
+
Æ
|
| 1447 |
+
Ç
|
| 1448 |
+
È
|
| 1449 |
+
É
|
| 1450 |
+
Ê
|
| 1451 |
+
Í
|
| 1452 |
+
Î
|
| 1453 |
+
Ñ
|
| 1454 |
+
Ó
|
| 1455 |
+
Ö
|
| 1456 |
+
×
|
| 1457 |
+
Ø
|
| 1458 |
+
Ú
|
| 1459 |
+
Ü
|
| 1460 |
+
Ý
|
| 1461 |
+
Þ
|
| 1462 |
+
ß
|
| 1463 |
+
à
|
| 1464 |
+
á
|
| 1465 |
+
â
|
| 1466 |
+
ã
|
| 1467 |
+
ä
|
| 1468 |
+
å
|
| 1469 |
+
æ
|
| 1470 |
+
ç
|
| 1471 |
+
è
|
| 1472 |
+
é
|
| 1473 |
+
ê
|
| 1474 |
+
ë
|
| 1475 |
+
ì
|
| 1476 |
+
í
|
| 1477 |
+
î
|
| 1478 |
+
ï
|
| 1479 |
+
ð
|
| 1480 |
+
ñ
|
| 1481 |
+
ò
|
| 1482 |
+
ó
|
| 1483 |
+
ô
|
| 1484 |
+
õ
|
| 1485 |
+
ö
|
| 1486 |
+
ø
|
| 1487 |
+
ù
|
| 1488 |
+
ú
|
| 1489 |
+
û
|
| 1490 |
+
ü
|
| 1491 |
+
ý
|
| 1492 |
+
Ā
|
| 1493 |
+
ā
|
| 1494 |
+
ă
|
| 1495 |
+
ą
|
| 1496 |
+
ć
|
| 1497 |
+
Č
|
| 1498 |
+
č
|
| 1499 |
+
Đ
|
| 1500 |
+
đ
|
| 1501 |
+
ē
|
| 1502 |
+
ė
|
| 1503 |
+
ę
|
| 1504 |
+
ě
|
| 1505 |
+
ĝ
|
| 1506 |
+
ğ
|
| 1507 |
+
ħ
|
| 1508 |
+
ī
|
| 1509 |
+
į
|
| 1510 |
+
İ
|
| 1511 |
+
ı
|
| 1512 |
+
Ł
|
| 1513 |
+
ł
|
| 1514 |
+
ń
|
| 1515 |
+
ņ
|
| 1516 |
+
ň
|
| 1517 |
+
ŋ
|
| 1518 |
+
Ō
|
| 1519 |
+
ō
|
| 1520 |
+
ő
|
| 1521 |
+
œ
|
| 1522 |
+
ř
|
| 1523 |
+
Ś
|
| 1524 |
+
ś
|
| 1525 |
+
Ş
|
| 1526 |
+
ş
|
| 1527 |
+
Š
|
| 1528 |
+
š
|
| 1529 |
+
Ť
|
| 1530 |
+
ť
|
| 1531 |
+
ũ
|
| 1532 |
+
ū
|
| 1533 |
+
ź
|
| 1534 |
+
Ż
|
| 1535 |
+
ż
|
| 1536 |
+
Ž
|
| 1537 |
+
ž
|
| 1538 |
+
ơ
|
| 1539 |
+
ư
|
| 1540 |
+
ǎ
|
| 1541 |
+
ǐ
|
| 1542 |
+
ǒ
|
| 1543 |
+
ǔ
|
| 1544 |
+
ǚ
|
| 1545 |
+
ș
|
| 1546 |
+
ț
|
| 1547 |
+
ɑ
|
| 1548 |
+
ɔ
|
| 1549 |
+
ɕ
|
| 1550 |
+
ə
|
| 1551 |
+
ɛ
|
| 1552 |
+
ɜ
|
| 1553 |
+
ɡ
|
| 1554 |
+
ɣ
|
| 1555 |
+
ɪ
|
| 1556 |
+
ɫ
|
| 1557 |
+
ɴ
|
| 1558 |
+
ɹ
|
| 1559 |
+
ɾ
|
| 1560 |
+
ʃ
|
| 1561 |
+
ʊ
|
| 1562 |
+
ʌ
|
| 1563 |
+
ʒ
|
| 1564 |
+
ʔ
|
| 1565 |
+
ʰ
|
| 1566 |
+
ʷ
|
| 1567 |
+
ʻ
|
| 1568 |
+
ʾ
|
| 1569 |
+
ʿ
|
| 1570 |
+
ˈ
|
| 1571 |
+
ː
|
| 1572 |
+
˙
|
| 1573 |
+
˜
|
| 1574 |
+
ˢ
|
| 1575 |
+
́
|
| 1576 |
+
̅
|
| 1577 |
+
Α
|
| 1578 |
+
Β
|
| 1579 |
+
Δ
|
| 1580 |
+
Ε
|
| 1581 |
+
Θ
|
| 1582 |
+
Κ
|
| 1583 |
+
Λ
|
| 1584 |
+
Μ
|
| 1585 |
+
Ξ
|
| 1586 |
+
Π
|
| 1587 |
+
Σ
|
| 1588 |
+
Τ
|
| 1589 |
+
Φ
|
| 1590 |
+
Χ
|
| 1591 |
+
Ψ
|
| 1592 |
+
Ω
|
| 1593 |
+
ά
|
| 1594 |
+
έ
|
| 1595 |
+
ή
|
| 1596 |
+
ί
|
| 1597 |
+
α
|
| 1598 |
+
β
|
| 1599 |
+
γ
|
| 1600 |
+
δ
|
| 1601 |
+
ε
|
| 1602 |
+
ζ
|
| 1603 |
+
η
|
| 1604 |
+
θ
|
| 1605 |
+
ι
|
| 1606 |
+
κ
|
| 1607 |
+
λ
|
| 1608 |
+
μ
|
| 1609 |
+
ν
|
| 1610 |
+
ξ
|
| 1611 |
+
ο
|
| 1612 |
+
π
|
| 1613 |
+
ρ
|
| 1614 |
+
ς
|
| 1615 |
+
σ
|
| 1616 |
+
τ
|
| 1617 |
+
υ
|
| 1618 |
+
φ
|
| 1619 |
+
χ
|
| 1620 |
+
ψ
|
| 1621 |
+
ω
|
| 1622 |
+
ϊ
|
| 1623 |
+
ό
|
| 1624 |
+
ύ
|
| 1625 |
+
ώ
|
| 1626 |
+
ϕ
|
| 1627 |
+
ϵ
|
| 1628 |
+
Ё
|
| 1629 |
+
А
|
| 1630 |
+
Б
|
| 1631 |
+
В
|
| 1632 |
+
Г
|
| 1633 |
+
Д
|
| 1634 |
+
Е
|
| 1635 |
+
Ж
|
| 1636 |
+
З
|
| 1637 |
+
И
|
| 1638 |
+
Й
|
| 1639 |
+
К
|
| 1640 |
+
Л
|
| 1641 |
+
М
|
| 1642 |
+
Н
|
| 1643 |
+
О
|
| 1644 |
+
П
|
| 1645 |
+
Р
|
| 1646 |
+
С
|
| 1647 |
+
Т
|
| 1648 |
+
У
|
| 1649 |
+
Ф
|
| 1650 |
+
Х
|
| 1651 |
+
Ц
|
| 1652 |
+
Ч
|
| 1653 |
+
Ш
|
| 1654 |
+
Щ
|
| 1655 |
+
Ы
|
| 1656 |
+
Ь
|
| 1657 |
+
Э
|
| 1658 |
+
Ю
|
| 1659 |
+
Я
|
| 1660 |
+
а
|
| 1661 |
+
б
|
| 1662 |
+
в
|
| 1663 |
+
г
|
| 1664 |
+
д
|
| 1665 |
+
е
|
| 1666 |
+
ж
|
| 1667 |
+
з
|
| 1668 |
+
и
|
| 1669 |
+
й
|
| 1670 |
+
к
|
| 1671 |
+
л
|
| 1672 |
+
м
|
| 1673 |
+
н
|
| 1674 |
+
о
|
| 1675 |
+
п
|
| 1676 |
+
р
|
| 1677 |
+
с
|
| 1678 |
+
т
|
| 1679 |
+
у
|
| 1680 |
+
ф
|
| 1681 |
+
х
|
| 1682 |
+
ц
|
| 1683 |
+
ч
|
| 1684 |
+
ш
|
| 1685 |
+
щ
|
| 1686 |
+
ъ
|
| 1687 |
+
ы
|
| 1688 |
+
ь
|
| 1689 |
+
э
|
| 1690 |
+
ю
|
| 1691 |
+
я
|
| 1692 |
+
ё
|
| 1693 |
+
і
|
| 1694 |
+
ְ
|
| 1695 |
+
ִ
|
| 1696 |
+
ֵ
|
| 1697 |
+
ֶ
|
| 1698 |
+
ַ
|
| 1699 |
+
ָ
|
| 1700 |
+
ֹ
|
| 1701 |
+
|
| 1702 |
+
|
| 1703 |
+
|
| 1704 |
+
|
| 1705 |
+
|
| 1706 |
+
<
|
| 1707 |
+
^
|
| 1708 |
+
`
|
| 1709 |
+
|
|
| 1710 |
+
}
|
| 1711 |
+
|
| 1712 |
+
ʼ
|
| 1713 |
+
̮
|
| 1714 |
+
ँ
|
| 1715 |
+
ं
|
| 1716 |
+
ः
|
| 1717 |
+
अ
|
| 1718 |
+
आ
|
| 1719 |
+
इ
|
| 1720 |
+
ई
|
| 1721 |
+
उ
|
| 1722 |
+
ऊ
|
| 1723 |
+
ऋ
|
| 1724 |
+
ऍ
|
| 1725 |
+
ऎ
|
| 1726 |
+
ए
|
| 1727 |
+
ऐ
|
| 1728 |
+
ऑ
|
| 1729 |
+
ऒ
|
| 1730 |
+
ओ
|
| 1731 |
+
औ
|
| 1732 |
+
क
|
| 1733 |
+
ख
|
| 1734 |
+
ग
|
| 1735 |
+
घ
|
| 1736 |
+
ङ
|
| 1737 |
+
च
|
| 1738 |
+
छ
|
| 1739 |
+
ज
|
| 1740 |
+
झ
|
| 1741 |
+
ञ
|
| 1742 |
+
ट
|
| 1743 |
+
ठ
|
| 1744 |
+
ड
|
| 1745 |
+
ढ
|
| 1746 |
+
ण
|
| 1747 |
+
त
|
| 1748 |
+
थ
|
| 1749 |
+
द
|
| 1750 |
+
ध
|
| 1751 |
+
न
|
| 1752 |
+
ऩ
|
| 1753 |
+
प
|
| 1754 |
+
फ
|
| 1755 |
+
ब
|
| 1756 |
+
भ
|
| 1757 |
+
म
|
| 1758 |
+
य
|
| 1759 |
+
र
|
| 1760 |
+
ऱ
|
| 1761 |
+
ल
|
| 1762 |
+
ळ
|
| 1763 |
+
ऴ
|
| 1764 |
+
व
|
| 1765 |
+
श
|
| 1766 |
+
ष
|
| 1767 |
+
स
|
| 1768 |
+
ह
|
| 1769 |
+
ऺ
|
| 1770 |
+
ऻ
|
| 1771 |
+
़
|
| 1772 |
+
ऽ
|
| 1773 |
+
ा
|
| 1774 |
+
ि
|
| 1775 |
+
ी
|
| 1776 |
+
ु
|
| 1777 |
+
ू
|
| 1778 |
+
ृ
|
| 1779 |
+
ॄ
|
| 1780 |
+
ॅ
|
| 1781 |
+
ॆ
|
| 1782 |
+
े
|
| 1783 |
+
ै
|
| 1784 |
+
ॉ
|
| 1785 |
+
ॊ
|
| 1786 |
+
ो
|
| 1787 |
+
ौ
|
| 1788 |
+
्
|
| 1789 |
+
ॐ
|
| 1790 |
+
॑
|
| 1791 |
+
॒
|
| 1792 |
+
॔
|
| 1793 |
+
ॕ
|
| 1794 |
+
ॖ
|
| 1795 |
+
क़
|
| 1796 |
+
ख़
|
| 1797 |
+
ग़
|
| 1798 |
+
ज़
|
| 1799 |
+
ड़
|
| 1800 |
+
ढ़
|
| 1801 |
+
फ़
|
| 1802 |
+
य़
|
| 1803 |
+
ॠ
|
| 1804 |
+
।
|
| 1805 |
+
॥
|
| 1806 |
+
०
|
| 1807 |
+
१
|
| 1808 |
+
२
|
| 1809 |
+
३
|
| 1810 |
+
४
|
| 1811 |
+
५
|
| 1812 |
+
६
|
| 1813 |
+
७
|
| 1814 |
+
८
|
| 1815 |
+
९
|
| 1816 |
+
॰
|
| 1817 |
+
ॲ
|
| 1818 |
+
ঁ
|
| 1819 |
+
ং
|
| 1820 |
+
ঃ
|
| 1821 |
+
অ
|
| 1822 |
+
আ
|
| 1823 |
+
ই
|
| 1824 |
+
ঈ
|
| 1825 |
+
উ
|
| 1826 |
+
ঊ
|
| 1827 |
+
ঋ
|
| 1828 |
+
ঌ
|
| 1829 |
+
|
| 1830 |
+
এ
|
| 1831 |
+
ঐ
|
| 1832 |
+
ও
|
| 1833 |
+
ঔ
|
| 1834 |
+
ক
|
| 1835 |
+
খ
|
| 1836 |
+
গ
|
| 1837 |
+
ঘ
|
| 1838 |
+
ঙ
|
| 1839 |
+
চ
|
| 1840 |
+
ছ
|
| 1841 |
+
জ
|
| 1842 |
+
ঝ
|
| 1843 |
+
ঞ
|
| 1844 |
+
ট
|
| 1845 |
+
ঠ
|
| 1846 |
+
ড
|
| 1847 |
+
ঢ
|
| 1848 |
+
ণ
|
| 1849 |
+
ত
|
| 1850 |
+
থ
|
| 1851 |
+
দ
|
| 1852 |
+
ধ
|
| 1853 |
+
ন
|
| 1854 |
+
প
|
| 1855 |
+
ফ
|
| 1856 |
+
ব
|
| 1857 |
+
ভ
|
| 1858 |
+
ম
|
| 1859 |
+
য
|
| 1860 |
+
র
|
| 1861 |
+
ল
|
| 1862 |
+
|
| 1863 |
+
শ
|
| 1864 |
+
ষ
|
| 1865 |
+
স
|
| 1866 |
+
হ
|
| 1867 |
+
়
|
| 1868 |
+
ঽ
|
| 1869 |
+
া
|
| 1870 |
+
ি
|
| 1871 |
+
ী
|
| 1872 |
+
ু
|
| 1873 |
+
ূ
|
| 1874 |
+
ৃ
|
| 1875 |
+
ৄ
|
| 1876 |
+
|
| 1877 |
+
ে
|
| 1878 |
+
ৈ
|
| 1879 |
+
ো
|
| 1880 |
+
ৌ
|
| 1881 |
+
্
|
| 1882 |
+
ৎ
|
| 1883 |
+
ৗ
|
| 1884 |
+
ড়
|
| 1885 |
+
ঢ়
|
| 1886 |
+
য়
|
| 1887 |
+
০
|
| 1888 |
+
১
|
| 1889 |
+
২
|
| 1890 |
+
৩
|
| 1891 |
+
৪
|
| 1892 |
+
৫
|
| 1893 |
+
৬
|
| 1894 |
+
৭
|
| 1895 |
+
৮
|
| 1896 |
+
৯
|
| 1897 |
+
ৰ
|
| 1898 |
+
ৱ
|
| 1899 |
+
৲
|
| 1900 |
+
৷
|
| 1901 |
+
৹
|
| 1902 |
+
৻
|
| 1903 |
+
ਂ
|
| 1904 |
+
ਃ
|
| 1905 |
+
ਅ
|
| 1906 |
+
ਆ
|
| 1907 |
+
ਇ
|
| 1908 |
+
ਈ
|
| 1909 |
+
ਉ
|
| 1910 |
+
ਊ
|
| 1911 |
+
ਏ
|
| 1912 |
+
ਐ
|
| 1913 |
+
ਓ
|
| 1914 |
+
ਔ
|
| 1915 |
+
ਕ
|
| 1916 |
+
ਖ
|
| 1917 |
+
ਗ
|
| 1918 |
+
ਘ
|
| 1919 |
+
ਙ
|
| 1920 |
+
ਚ
|
| 1921 |
+
ਛ
|
| 1922 |
+
ਜ
|
| 1923 |
+
ਝ
|
| 1924 |
+
ਞ
|
| 1925 |
+
ਟ
|
| 1926 |
+
ਠ
|
| 1927 |
+
ਡ
|
| 1928 |
+
ਢ
|
| 1929 |
+
ਣ
|
| 1930 |
+
ਤ
|
| 1931 |
+
ਥ
|
| 1932 |
+
ਦ
|
| 1933 |
+
ਧ
|
| 1934 |
+
ਨ
|
| 1935 |
+
ਪ
|
| 1936 |
+
ਫ
|
| 1937 |
+
ਬ
|
| 1938 |
+
ਭ
|
| 1939 |
+
ਮ
|
| 1940 |
+
ਯ
|
| 1941 |
+
ਰ
|
| 1942 |
+
ਲ
|
| 1943 |
+
ਲ਼
|
| 1944 |
+
ਵ
|
| 1945 |
+
ਸ਼
|
| 1946 |
+
ਸ
|
| 1947 |
+
ਹ
|
| 1948 |
+
਼
|
| 1949 |
+
ਾ
|
| 1950 |
+
ਿ
|
| 1951 |
+
ੀ
|
| 1952 |
+
ੁ
|
| 1953 |
+
ੂ
|
| 1954 |
+
ੇ
|
| 1955 |
+
ੈ
|
| 1956 |
+
ੋ
|
| 1957 |
+
ੌ
|
| 1958 |
+
੍
|
| 1959 |
+
ੑ
|
| 1960 |
+
ਖ਼
|
| 1961 |
+
ਗ਼
|
| 1962 |
+
ਜ਼
|
| 1963 |
+
ੜ
|
| 1964 |
+
ਫ਼
|
| 1965 |
+
ੰ
|
| 1966 |
+
ੱ
|
| 1967 |
+
ੲ
|
| 1968 |
+
ੳ
|
| 1969 |
+
ઁ
|
| 1970 |
+
ં
|
| 1971 |
+
ઃ
|
| 1972 |
+
અ
|
| 1973 |
+
આ
|
| 1974 |
+
ઇ
|
| 1975 |
+
ઈ
|
| 1976 |
+
ઉ
|
| 1977 |
+
ઊ
|
| 1978 |
+
ઋ
|
| 1979 |
+
ઍ
|
| 1980 |
+
એ
|
| 1981 |
+
ઐ
|
| 1982 |
+
ઑ
|
| 1983 |
+
ઓ
|
| 1984 |
+
ઔ
|
| 1985 |
+
ક
|
| 1986 |
+
ખ
|
| 1987 |
+
ગ
|
| 1988 |
+
ઘ
|
| 1989 |
+
ચ
|
| 1990 |
+
છ
|
| 1991 |
+
જ
|
| 1992 |
+
ઝ
|
| 1993 |
+
ઞ
|
| 1994 |
+
ટ
|
| 1995 |
+
ઠ
|
| 1996 |
+
ડ
|
| 1997 |
+
ઢ
|
| 1998 |
+
ણ
|
| 1999 |
+
ત
|
| 2000 |
+
થ
|
| 2001 |
+
દ
|
| 2002 |
+
ધ
|
| 2003 |
+
ન
|
| 2004 |
+
પ
|
| 2005 |
+
ફ
|
| 2006 |
+
બ
|
| 2007 |
+
ભ
|
| 2008 |
+
મ
|
| 2009 |
+
ય
|
| 2010 |
+
ર
|
| 2011 |
+
લ
|
| 2012 |
+
ળ
|
| 2013 |
+
વ
|
| 2014 |
+
શ
|
| 2015 |
+
ષ
|
| 2016 |
+
સ
|
| 2017 |
+
હ
|
| 2018 |
+
઼
|
| 2019 |
+
ા
|
| 2020 |
+
િ
|
| 2021 |
+
ી
|
| 2022 |
+
ુ
|
| 2023 |
+
ૂ
|
| 2024 |
+
ૃ
|
| 2025 |
+
ૄ
|
| 2026 |
+
ૅ
|
| 2027 |
+
ે
|
| 2028 |
+
ૈ
|
| 2029 |
+
ૉ
|
| 2030 |
+
ો
|
| 2031 |
+
ૌ
|
| 2032 |
+
્
|
| 2033 |
+
ૐ
|
| 2034 |
+
ૠ
|
| 2035 |
+
૧
|
| 2036 |
+
૨
|
| 2037 |
+
૪
|
| 2038 |
+
૫
|
| 2039 |
+
ଁ
|
| 2040 |
+
ଂ
|
| 2041 |
+
ଃ
|
| 2042 |
+
ଅ
|
| 2043 |
+
ଆ
|
| 2044 |
+
ଇ
|
| 2045 |
+
ଈ
|
| 2046 |
+
ଉ
|
| 2047 |
+
ଊ
|
| 2048 |
+
ଋ
|
| 2049 |
+
ଏ
|
| 2050 |
+
ଐ
|
| 2051 |
+
ଓ
|
| 2052 |
+
ଔ
|
| 2053 |
+
କ
|
| 2054 |
+
ଖ
|
| 2055 |
+
ଗ
|
| 2056 |
+
ଘ
|
| 2057 |
+
ଙ
|
| 2058 |
+
ଚ
|
| 2059 |
+
ଛ
|
| 2060 |
+
ଜ
|
| 2061 |
+
ଝ
|
| 2062 |
+
ଞ
|
| 2063 |
+
ଟ
|
| 2064 |
+
ଠ
|
| 2065 |
+
ଡ
|
| 2066 |
+
ଢ
|
| 2067 |
+
ଣ
|
| 2068 |
+
ତ
|
| 2069 |
+
ଥ
|
| 2070 |
+
ଦ
|
| 2071 |
+
ଧ
|
| 2072 |
+
ନ
|
| 2073 |
+
ପ
|
| 2074 |
+
ଫ
|
| 2075 |
+
ବ
|
| 2076 |
+
ଭ
|
| 2077 |
+
ମ
|
| 2078 |
+
ଯ
|
| 2079 |
+
ର
|
| 2080 |
+
ଲ
|
| 2081 |
+
ଳ
|
| 2082 |
+
ଵ
|
| 2083 |
+
ଶ
|
| 2084 |
+
ଷ
|
| 2085 |
+
ସ
|
| 2086 |
+
ହ
|
| 2087 |
+
଼
|
| 2088 |
+
ା
|
| 2089 |
+
ି
|
| 2090 |
+
ୀ
|
| 2091 |
+
ୁ
|
| 2092 |
+
ୂ
|
| 2093 |
+
ୃ
|
| 2094 |
+
ୄ
|
| 2095 |
+
େ
|
| 2096 |
+
ୈ
|
| 2097 |
+
ୋ
|
| 2098 |
+
ୌ
|
| 2099 |
+
୍
|
| 2100 |
+
ୖ
|
| 2101 |
+
ୗ
|
| 2102 |
+
ଡ଼
|
| 2103 |
+
ଢ଼
|
| 2104 |
+
ୟ
|
| 2105 |
+
ୠ
|
| 2106 |
+
୦
|
| 2107 |
+
୧
|
| 2108 |
+
୨
|
| 2109 |
+
୪
|
| 2110 |
+
୫
|
| 2111 |
+
୬
|
| 2112 |
+
୮
|
| 2113 |
+
ୱ
|
| 2114 |
+
ஃ
|
| 2115 |
+
அ
|
| 2116 |
+
ஆ
|
| 2117 |
+
இ
|
| 2118 |
+
ஈ
|
| 2119 |
+
உ
|
| 2120 |
+
ஊ
|
| 2121 |
+
எ
|
| 2122 |
+
ஏ
|
| 2123 |
+
ஐ
|
| 2124 |
+
ஒ
|
| 2125 |
+
ஓ
|
| 2126 |
+
ஔ
|
| 2127 |
+
க
|
| 2128 |
+
ங
|
| 2129 |
+
ச
|
| 2130 |
+
ஜ
|
| 2131 |
+
ஞ
|
| 2132 |
+
ட
|
| 2133 |
+
ண
|
| 2134 |
+
த
|
| 2135 |
+
ந
|
| 2136 |
+
ன
|
| 2137 |
+
ப
|
| 2138 |
+
ம
|
| 2139 |
+
ய
|
| 2140 |
+
ர
|
| 2141 |
+
ற
|
| 2142 |
+
ல
|
| 2143 |
+
ள
|
| 2144 |
+
ழ
|
| 2145 |
+
வ
|
| 2146 |
+
ஷ
|
| 2147 |
+
ஸ
|
| 2148 |
+
ஹ
|
| 2149 |
+
ா
|
| 2150 |
+
ி
|
| 2151 |
+
ீ
|
| 2152 |
+
ு
|
| 2153 |
+
ூ
|
| 2154 |
+
ெ
|
| 2155 |
+
ே
|
| 2156 |
+
ை
|
| 2157 |
+
ொ
|
| 2158 |
+
ோ
|
| 2159 |
+
ௌ
|
| 2160 |
+
்
|
| 2161 |
+
ௗ
|
| 2162 |
+
௦
|
| 2163 |
+
ఁ
|
| 2164 |
+
ం
|
| 2165 |
+
ః
|
| 2166 |
+
అ
|
| 2167 |
+
ఆ
|
| 2168 |
+
ఇ
|
| 2169 |
+
ఈ
|
| 2170 |
+
ఉ
|
| 2171 |
+
ఊ
|
| 2172 |
+
ఋ
|
| 2173 |
+
ఎ
|
| 2174 |
+
ఏ
|
| 2175 |
+
ఐ
|
| 2176 |
+
ఒ
|
| 2177 |
+
ఓ
|
| 2178 |
+
ఔ
|
| 2179 |
+
క
|
| 2180 |
+
ఖ
|
| 2181 |
+
గ
|
| 2182 |
+
ఘ
|
| 2183 |
+
ఙ
|
| 2184 |
+
చ
|
| 2185 |
+
ఛ
|
| 2186 |
+
జ
|
| 2187 |
+
ఝ
|
| 2188 |
+
ఞ
|
| 2189 |
+
ట
|
| 2190 |
+
ఠ
|
| 2191 |
+
డ
|
| 2192 |
+
ఢ
|
| 2193 |
+
ణ
|
| 2194 |
+
త
|
| 2195 |
+
థ
|
| 2196 |
+
ద
|
| 2197 |
+
ధ
|
| 2198 |
+
న
|
| 2199 |
+
ప
|
| 2200 |
+
ఫ
|
| 2201 |
+
బ
|
| 2202 |
+
భ
|
| 2203 |
+
మ
|
| 2204 |
+
య
|
| 2205 |
+
ర
|
| 2206 |
+
ఱ
|
| 2207 |
+
ల
|
| 2208 |
+
ళ
|
| 2209 |
+
వ
|
| 2210 |
+
శ
|
| 2211 |
+
ష
|
| 2212 |
+
స
|
| 2213 |
+
హ
|
| 2214 |
+
ఽ
|
| 2215 |
+
ా
|
| 2216 |
+
ి
|
| 2217 |
+
ీ
|
| 2218 |
+
ు
|
| 2219 |
+
ూ
|
| 2220 |
+
ృ
|
| 2221 |
+
ౄ
|
| 2222 |
+
ె
|
| 2223 |
+
ే
|
| 2224 |
+
ై
|
| 2225 |
+
ొ
|
| 2226 |
+
ో
|
| 2227 |
+
ౌ
|
| 2228 |
+
్
|
| 2229 |
+
ౕ
|
| 2230 |
+
ౖ
|
| 2231 |
+
ౙ
|
| 2232 |
+
ౠ
|
| 2233 |
+
౦
|
| 2234 |
+
౩
|
| 2235 |
+
ಂ
|
| 2236 |
+
ಃ
|
| 2237 |
+
ಅ
|
| 2238 |
+
ಆ
|
| 2239 |
+
ಇ
|
| 2240 |
+
ಈ
|
| 2241 |
+
ಉ
|
| 2242 |
+
ಊ
|
| 2243 |
+
ಋ
|
| 2244 |
+
ಎ
|
| 2245 |
+
ಏ
|
| 2246 |
+
ಐ
|
| 2247 |
+
ಒ
|
| 2248 |
+
ಓ
|
| 2249 |
+
ಔ
|
| 2250 |
+
ಕ
|
| 2251 |
+
ಖ
|
| 2252 |
+
ಗ
|
| 2253 |
+
ಘ
|
| 2254 |
+
ಙ
|
| 2255 |
+
ಚ
|
| 2256 |
+
ಛ
|
| 2257 |
+
ಜ
|
| 2258 |
+
ಝ
|
| 2259 |
+
ಞ
|
| 2260 |
+
ಟ
|
| 2261 |
+
ಠ
|
| 2262 |
+
ಡ
|
| 2263 |
+
ಢ
|
| 2264 |
+
ಣ
|
| 2265 |
+
ತ
|
| 2266 |
+
ಥ
|
| 2267 |
+
ದ
|
| 2268 |
+
ಧ
|
| 2269 |
+
ನ
|
| 2270 |
+
ಪ
|
| 2271 |
+
ಫ
|
| 2272 |
+
ಬ
|
| 2273 |
+
ಭ
|
| 2274 |
+
ಮ
|
| 2275 |
+
ಯ
|
| 2276 |
+
ರ
|
| 2277 |
+
ಱ
|
| 2278 |
+
ಲ
|
| 2279 |
+
ಳ
|
| 2280 |
+
ವ
|
| 2281 |
+
ಶ
|
| 2282 |
+
ಷ
|
| 2283 |
+
ಸ
|
| 2284 |
+
ಹ
|
| 2285 |
+
಼
|
| 2286 |
+
ಽ
|
| 2287 |
+
ಾ
|
| 2288 |
+
ಿ
|
| 2289 |
+
ೀ
|
| 2290 |
+
ು
|
| 2291 |
+
ೂ
|
| 2292 |
+
ೃ
|
| 2293 |
+
ೆ
|
| 2294 |
+
ೇ
|
| 2295 |
+
ೈ
|
| 2296 |
+
ೊ
|
| 2297 |
+
ೋ
|
| 2298 |
+
ೌ
|
| 2299 |
+
್
|
| 2300 |
+
ೕ
|
| 2301 |
+
ೖ
|
| 2302 |
+
ೞ
|
| 2303 |
+
ೠ
|
| 2304 |
+
೦
|
| 2305 |
+
೧
|
| 2306 |
+
೨
|
| 2307 |
+
೩
|
| 2308 |
+
೪
|
| 2309 |
+
೫
|
| 2310 |
+
೬
|
| 2311 |
+
೭
|
| 2312 |
+
೮
|
| 2313 |
+
೯
|
| 2314 |
+
ം
|
| 2315 |
+
ഃ
|
| 2316 |
+
അ
|
| 2317 |
+
ആ
|
| 2318 |
+
ഇ
|
| 2319 |
+
ഈ
|
| 2320 |
+
ഉ
|
| 2321 |
+
ഊ
|
| 2322 |
+
ഋ
|
| 2323 |
+
എ
|
| 2324 |
+
ഏ
|
| 2325 |
+
ഐ
|
| 2326 |
+
ഒ
|
| 2327 |
+
ഓ
|
| 2328 |
+
ഔ
|
| 2329 |
+
ക
|
| 2330 |
+
ഖ
|
| 2331 |
+
ഗ
|
| 2332 |
+
ഘ
|
| 2333 |
+
ങ
|
| 2334 |
+
ച
|
| 2335 |
+
ഛ
|
| 2336 |
+
ജ
|
| 2337 |
+
ഝ
|
| 2338 |
+
ഞ
|
| 2339 |
+
ട
|
| 2340 |
+
ഠ
|
| 2341 |
+
ഡ
|
| 2342 |
+
ഢ
|
| 2343 |
+
ണ
|
| 2344 |
+
ത
|
| 2345 |
+
ഥ
|
| 2346 |
+
ദ
|
| 2347 |
+
ധ
|
| 2348 |
+
ന
|
| 2349 |
+
പ
|
| 2350 |
+
ഫ
|
| 2351 |
+
ബ
|
| 2352 |
+
ഭ
|
| 2353 |
+
മ
|
| 2354 |
+
യ
|
| 2355 |
+
ര
|
| 2356 |
+
റ
|
| 2357 |
+
ല
|
| 2358 |
+
ള
|
| 2359 |
+
ഴ
|
| 2360 |
+
വ
|
| 2361 |
+
ശ
|
| 2362 |
+
ഷ
|
| 2363 |
+
സ
|
| 2364 |
+
ഹ
|
| 2365 |
+
ാ
|
| 2366 |
+
ി
|
| 2367 |
+
ീ
|
| 2368 |
+
ു
|
| 2369 |
+
ൂ
|
| 2370 |
+
ൃ
|
| 2371 |
+
െ
|
| 2372 |
+
േ
|
| 2373 |
+
ൈ
|
| 2374 |
+
ൊ
|
| 2375 |
+
ോ
|
| 2376 |
+
ൌ
|
| 2377 |
+
്
|
| 2378 |
+
ൎ
|
| 2379 |
+
ൗ
|
| 2380 |
+
ൟ
|
| 2381 |
+
ൺ
|
| 2382 |
+
ൻ
|
| 2383 |
+
ർ
|
| 2384 |
+
ൽ
|
| 2385 |
+
ൾ
|
| 2386 |
+
ൿ
|
| 2387 |
+
|
| 2388 |
+
|
| 2389 |
+
|
| 2390 |
+
|
| 2391 |
+
|
| 2392 |
+
|
| 2393 |
+
›
|
| 2394 |
+
⇒
|
| 2395 |
+
|
| 2396 |
+
|
| 2397 |
+
|
| 2398 |
+
📯
|
| 2399 |
+
읽
|
| 2400 |
+
임
|
| 2401 |
+
입
|
| 2402 |
+
있
|
| 2403 |
+
자
|
| 2404 |
+
작
|
| 2405 |
+
잔
|
| 2406 |
+
잖
|
| 2407 |
+
잘
|
| 2408 |
+
잡
|
| 2409 |
+
잤
|
| 2410 |
+
장
|
| 2411 |
+
재
|
| 2412 |
+
저
|
| 2413 |
+
전
|
| 2414 |
+
점
|
| 2415 |
+
정
|
| 2416 |
+
제
|
| 2417 |
+
져
|
| 2418 |
+
졌
|
| 2419 |
+
조
|
| 2420 |
+
족
|
| 2421 |
+
좀
|
| 2422 |
+
종
|
| 2423 |
+
좋
|
| 2424 |
+
죠
|
| 2425 |
+
주
|
| 2426 |
+
준
|
| 2427 |
+
줄
|
| 2428 |
+
중
|
| 2429 |
+
줘
|
| 2430 |
+
즈
|
| 2431 |
+
즐
|
| 2432 |
+
즘
|
| 2433 |
+
지
|
| 2434 |
+
진
|
| 2435 |
+
집
|
| 2436 |
+
짜
|
| 2437 |
+
짝
|
| 2438 |
+
쩌
|
| 2439 |
+
쪼
|
| 2440 |
+
쪽
|
| 2441 |
+
쫌
|
| 2442 |
+
쭈
|
| 2443 |
+
쯔
|
| 2444 |
+
찌
|
| 2445 |
+
찍
|
| 2446 |
+
차
|
| 2447 |
+
착
|
| 2448 |
+
찾
|
| 2449 |
+
책
|
| 2450 |
+
처
|
| 2451 |
+
천
|
| 2452 |
+
철
|
| 2453 |
+
체
|
| 2454 |
+
쳐
|
| 2455 |
+
쳤
|
| 2456 |
+
초
|
| 2457 |
+
촌
|
| 2458 |
+
추
|
| 2459 |
+
출
|
| 2460 |
+
춤
|
| 2461 |
+
춥
|
| 2462 |
+
춰
|
| 2463 |
+
치
|
| 2464 |
+
친
|
| 2465 |
+
칠
|
| 2466 |
+
침
|
| 2467 |
+
칩
|
| 2468 |
+
칼
|
| 2469 |
+
커
|
| 2470 |
+
켓
|
| 2471 |
+
코
|
| 2472 |
+
콩
|
| 2473 |
+
쿠
|
| 2474 |
+
퀴
|
| 2475 |
+
크
|
| 2476 |
+
큰
|
| 2477 |
+
큽
|
| 2478 |
+
키
|
| 2479 |
+
킨
|
| 2480 |
+
타
|
| 2481 |
+
태
|
| 2482 |
+
터
|
| 2483 |
+
턴
|
| 2484 |
+
털
|
| 2485 |
+
테
|
| 2486 |
+
토
|
| 2487 |
+
통
|
| 2488 |
+
투
|
| 2489 |
+
트
|
| 2490 |
+
특
|
| 2491 |
+
튼
|
| 2492 |
+
틀
|
| 2493 |
+
티
|
| 2494 |
+
팀
|
| 2495 |
+
파
|
| 2496 |
+
팔
|
| 2497 |
+
패
|
| 2498 |
+
페
|
| 2499 |
+
펜
|
| 2500 |
+
펭
|
| 2501 |
+
평
|
| 2502 |
+
포
|
| 2503 |
+
폭
|
| 2504 |
+
표
|
| 2505 |
+
품
|
| 2506 |
+
풍
|
| 2507 |
+
프
|
| 2508 |
+
플
|
| 2509 |
+
피
|
| 2510 |
+
필
|
| 2511 |
+
하
|
| 2512 |
+
학
|
| 2513 |
+
한
|
| 2514 |
+
할
|
| 2515 |
+
함
|
| 2516 |
+
합
|
| 2517 |
+
항
|
| 2518 |
+
해
|
| 2519 |
+
햇
|
| 2520 |
+
했
|
| 2521 |
+
행
|
| 2522 |
+
허
|
| 2523 |
+
험
|
| 2524 |
+
형
|
| 2525 |
+
혜
|
| 2526 |
+
호
|
| 2527 |
+
혼
|
| 2528 |
+
홀
|
| 2529 |
+
화
|
| 2530 |
+
회
|
| 2531 |
+
획
|
| 2532 |
+
후
|
| 2533 |
+
휴
|
| 2534 |
+
흐
|
| 2535 |
+
흔
|
| 2536 |
+
희
|
| 2537 |
+
히
|
| 2538 |
+
힘
|
| 2539 |
+
ﷺ
|
| 2540 |
+
ﷻ
|
| 2541 |
+
!
|
| 2542 |
+
,
|
| 2543 |
+
?
|
| 2544 |
+
�
|
| 2545 |
+
𠮶
|
requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate>=0.33.0
|
| 2 |
+
cached_path
|
| 3 |
+
ema_pytorch>=0.5.2
|
| 4 |
+
gradio>=5.0.0
|
| 5 |
+
librosa
|
| 6 |
+
pydub
|
| 7 |
+
safetensors
|
| 8 |
+
soundfile
|
| 9 |
+
torch>=2.0.0
|
| 10 |
+
torchaudio>=2.0.0
|
| 11 |
+
torchdiffeq
|
| 12 |
+
tqdm>=4.65.0
|
| 13 |
+
transformers
|
| 14 |
+
vocos
|
| 15 |
+
x_transformers>=1.31.14
|
src/f5_tts/api.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
import sys
|
| 3 |
+
from importlib.resources import files
|
| 4 |
+
|
| 5 |
+
import soundfile as sf
|
| 6 |
+
import tqdm
|
| 7 |
+
from cached_path import cached_path
|
| 8 |
+
from hydra.utils import get_class
|
| 9 |
+
from omegaconf import OmegaConf
|
| 10 |
+
|
| 11 |
+
from f5_tts.infer.utils_infer import (
|
| 12 |
+
infer_process,
|
| 13 |
+
load_model,
|
| 14 |
+
load_vocoder,
|
| 15 |
+
preprocess_ref_audio_text,
|
| 16 |
+
remove_silence_for_generated_wav,
|
| 17 |
+
save_spectrogram,
|
| 18 |
+
transcribe,
|
| 19 |
+
)
|
| 20 |
+
from f5_tts.model.utils import seed_everything
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class F5TTS:
|
| 24 |
+
def __init__(
|
| 25 |
+
self,
|
| 26 |
+
model="F5TTS_v1_Base",
|
| 27 |
+
ckpt_file="",
|
| 28 |
+
vocab_file="",
|
| 29 |
+
ode_method="euler",
|
| 30 |
+
use_ema=True,
|
| 31 |
+
vocoder_local_path=None,
|
| 32 |
+
device=None,
|
| 33 |
+
hf_cache_dir=None,
|
| 34 |
+
):
|
| 35 |
+
model_cfg = OmegaConf.load(str(files("f5_tts").joinpath(f"configs/{model}.yaml")))
|
| 36 |
+
model_cls = get_class(f"f5_tts.model.{model_cfg.model.backbone}")
|
| 37 |
+
model_arc = model_cfg.model.arch
|
| 38 |
+
|
| 39 |
+
self.mel_spec_type = model_cfg.model.mel_spec.mel_spec_type
|
| 40 |
+
self.target_sample_rate = model_cfg.model.mel_spec.target_sample_rate
|
| 41 |
+
|
| 42 |
+
self.ode_method = ode_method
|
| 43 |
+
self.use_ema = use_ema
|
| 44 |
+
|
| 45 |
+
if device is not None:
|
| 46 |
+
self.device = device
|
| 47 |
+
else:
|
| 48 |
+
import torch
|
| 49 |
+
|
| 50 |
+
self.device = (
|
| 51 |
+
"cuda"
|
| 52 |
+
if torch.cuda.is_available()
|
| 53 |
+
else "xpu"
|
| 54 |
+
if torch.xpu.is_available()
|
| 55 |
+
else "mps"
|
| 56 |
+
if torch.backends.mps.is_available()
|
| 57 |
+
else "cpu"
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
# Load models
|
| 61 |
+
self.vocoder = load_vocoder(
|
| 62 |
+
self.mel_spec_type, vocoder_local_path is not None, vocoder_local_path, self.device, hf_cache_dir
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
repo_name, ckpt_step, ckpt_type = "F5-TTS", 1250000, "safetensors"
|
| 66 |
+
|
| 67 |
+
# override for previous models
|
| 68 |
+
if model == "F5TTS_Base":
|
| 69 |
+
if self.mel_spec_type == "vocos":
|
| 70 |
+
ckpt_step = 1200000
|
| 71 |
+
elif self.mel_spec_type == "bigvgan":
|
| 72 |
+
model = "F5TTS_Base_bigvgan"
|
| 73 |
+
ckpt_type = "pt"
|
| 74 |
+
elif model == "E2TTS_Base":
|
| 75 |
+
repo_name = "E2-TTS"
|
| 76 |
+
ckpt_step = 1200000
|
| 77 |
+
|
| 78 |
+
if not ckpt_file:
|
| 79 |
+
ckpt_file = str(
|
| 80 |
+
cached_path(f"hf://SWivid/{repo_name}/{model}/model_{ckpt_step}.{ckpt_type}", cache_dir=hf_cache_dir)
|
| 81 |
+
)
|
| 82 |
+
self.ema_model = load_model(
|
| 83 |
+
model_cls, model_arc, ckpt_file, self.mel_spec_type, vocab_file, self.ode_method, self.use_ema, self.device
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
def transcribe(self, ref_audio, language=None):
|
| 87 |
+
return transcribe(ref_audio, language)
|
| 88 |
+
|
| 89 |
+
def export_wav(self, wav, file_wave, remove_silence=False):
|
| 90 |
+
sf.write(file_wave, wav, self.target_sample_rate)
|
| 91 |
+
|
| 92 |
+
if remove_silence:
|
| 93 |
+
remove_silence_for_generated_wav(file_wave)
|
| 94 |
+
|
| 95 |
+
def export_spectrogram(self, spec, file_spec):
|
| 96 |
+
save_spectrogram(spec, file_spec)
|
| 97 |
+
|
| 98 |
+
def infer(
|
| 99 |
+
self,
|
| 100 |
+
ref_file,
|
| 101 |
+
ref_text,
|
| 102 |
+
gen_text,
|
| 103 |
+
show_info=print,
|
| 104 |
+
progress=tqdm,
|
| 105 |
+
target_rms=0.1,
|
| 106 |
+
cross_fade_duration=0.15,
|
| 107 |
+
sway_sampling_coef=-1,
|
| 108 |
+
cfg_strength=2,
|
| 109 |
+
nfe_step=32,
|
| 110 |
+
speed=1.0,
|
| 111 |
+
fix_duration=None,
|
| 112 |
+
remove_silence=False,
|
| 113 |
+
file_wave=None,
|
| 114 |
+
file_spec=None,
|
| 115 |
+
seed=None,
|
| 116 |
+
):
|
| 117 |
+
if seed is None:
|
| 118 |
+
seed = random.randint(0, sys.maxsize)
|
| 119 |
+
seed_everything(seed)
|
| 120 |
+
self.seed = seed
|
| 121 |
+
|
| 122 |
+
ref_file, ref_text = preprocess_ref_audio_text(ref_file, ref_text)
|
| 123 |
+
|
| 124 |
+
wav, sr, spec = infer_process(
|
| 125 |
+
ref_file,
|
| 126 |
+
ref_text,
|
| 127 |
+
gen_text,
|
| 128 |
+
self.ema_model,
|
| 129 |
+
self.vocoder,
|
| 130 |
+
self.mel_spec_type,
|
| 131 |
+
show_info=show_info,
|
| 132 |
+
progress=progress,
|
| 133 |
+
target_rms=target_rms,
|
| 134 |
+
cross_fade_duration=cross_fade_duration,
|
| 135 |
+
nfe_step=nfe_step,
|
| 136 |
+
cfg_strength=cfg_strength,
|
| 137 |
+
sway_sampling_coef=sway_sampling_coef,
|
| 138 |
+
speed=speed,
|
| 139 |
+
fix_duration=fix_duration,
|
| 140 |
+
device=self.device,
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
if file_wave is not None:
|
| 144 |
+
self.export_wav(wav, file_wave, remove_silence)
|
| 145 |
+
|
| 146 |
+
if file_spec is not None:
|
| 147 |
+
self.export_spectrogram(spec, file_spec)
|
| 148 |
+
|
| 149 |
+
return wav, sr, spec
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
if __name__ == "__main__":
|
| 153 |
+
f5tts = F5TTS()
|
| 154 |
+
|
| 155 |
+
wav, sr, spec = f5tts.infer(
|
| 156 |
+
ref_file=str(files("f5_tts").joinpath("infer/examples/basic/basic_ref_en.wav")),
|
| 157 |
+
ref_text="Some call me nature, others call me mother nature.",
|
| 158 |
+
gen_text="I don't really care what you call me. I've been a silent spectator, watching species evolve, empires rise and fall. But always remember, I am mighty and enduring.",
|
| 159 |
+
file_wave=str(files("f5_tts").joinpath("../../tests/api_out.wav")),
|
| 160 |
+
file_spec=str(files("f5_tts").joinpath("../../tests/api_out.png")),
|
| 161 |
+
seed=None,
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
print("seed :", f5tts.seed)
|
src/f5_tts/configs/E2TTS_Base.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
|
| 4 |
+
|
| 5 |
+
datasets:
|
| 6 |
+
name: Emilia_ZH_EN # dataset name
|
| 7 |
+
batch_size_per_gpu: 38400 # 8 GPUs, 8 * 38400 = 307200
|
| 8 |
+
batch_size_type: frame # frame | sample
|
| 9 |
+
max_samples: 64 # max sequences per batch if use frame-wise batch_size. we set 32 for small models, 64 for base models
|
| 10 |
+
num_workers: 16
|
| 11 |
+
|
| 12 |
+
optim:
|
| 13 |
+
epochs: 11
|
| 14 |
+
learning_rate: 7.5e-5
|
| 15 |
+
num_warmup_updates: 20000 # warmup updates
|
| 16 |
+
grad_accumulation_steps: 1 # note: updates = steps / grad_accumulation_steps
|
| 17 |
+
max_grad_norm: 1.0 # gradient clipping
|
| 18 |
+
bnb_optimizer: False # use bnb 8bit AdamW optimizer or not
|
| 19 |
+
|
| 20 |
+
model:
|
| 21 |
+
name: E2TTS_Base
|
| 22 |
+
tokenizer: pinyin
|
| 23 |
+
tokenizer_path: null # if 'custom' tokenizer, define the path want to use (should be vocab.txt)
|
| 24 |
+
backbone: UNetT
|
| 25 |
+
arch:
|
| 26 |
+
dim: 1024
|
| 27 |
+
depth: 24
|
| 28 |
+
heads: 16
|
| 29 |
+
ff_mult: 4
|
| 30 |
+
text_mask_padding: False
|
| 31 |
+
pe_attn_head: 1
|
| 32 |
+
mel_spec:
|
| 33 |
+
target_sample_rate: 24000
|
| 34 |
+
n_mel_channels: 100
|
| 35 |
+
hop_length: 256
|
| 36 |
+
win_length: 1024
|
| 37 |
+
n_fft: 1024
|
| 38 |
+
mel_spec_type: vocos # vocos | bigvgan
|
| 39 |
+
vocoder:
|
| 40 |
+
is_local: False # use local offline ckpt or not
|
| 41 |
+
local_path: null # local vocoder path
|
| 42 |
+
|
| 43 |
+
ckpts:
|
| 44 |
+
logger: wandb # wandb | tensorboard | null
|
| 45 |
+
log_samples: True # infer random sample per save checkpoint. wip, normal to fail with extra long samples
|
| 46 |
+
save_per_updates: 50000 # save checkpoint per updates
|
| 47 |
+
keep_last_n_checkpoints: -1 # -1 to keep all, 0 to not save intermediate, > 0 to keep last N checkpoints
|
| 48 |
+
last_per_updates: 5000 # save last checkpoint per updates
|
| 49 |
+
save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}
|
src/f5_tts/configs/E2TTS_Small.yaml
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
|
| 4 |
+
|
| 5 |
+
datasets:
|
| 6 |
+
name: Emilia_ZH_EN
|
| 7 |
+
batch_size_per_gpu: 38400 # 8 GPUs, 8 * 38400 = 307200
|
| 8 |
+
batch_size_type: frame # frame | sample
|
| 9 |
+
max_samples: 64 # max sequences per batch if use frame-wise batch_size. we set 32 for small models, 64 for base models
|
| 10 |
+
num_workers: 16
|
| 11 |
+
|
| 12 |
+
optim:
|
| 13 |
+
epochs: 11
|
| 14 |
+
learning_rate: 7.5e-5
|
| 15 |
+
num_warmup_updates: 20000 # warmup updates
|
| 16 |
+
grad_accumulation_steps: 1 # note: updates = steps / grad_accumulation_steps
|
| 17 |
+
max_grad_norm: 1.0
|
| 18 |
+
bnb_optimizer: False
|
| 19 |
+
|
| 20 |
+
model:
|
| 21 |
+
name: E2TTS_Small
|
| 22 |
+
tokenizer: pinyin
|
| 23 |
+
tokenizer_path: null # if 'custom' tokenizer, define the path want to use (should be vocab.txt)
|
| 24 |
+
backbone: UNetT
|
| 25 |
+
arch:
|
| 26 |
+
dim: 768
|
| 27 |
+
depth: 20
|
| 28 |
+
heads: 12
|
| 29 |
+
ff_mult: 4
|
| 30 |
+
text_mask_padding: False
|
| 31 |
+
pe_attn_head: 1
|
| 32 |
+
mel_spec:
|
| 33 |
+
target_sample_rate: 24000
|
| 34 |
+
n_mel_channels: 100
|
| 35 |
+
hop_length: 256
|
| 36 |
+
win_length: 1024
|
| 37 |
+
n_fft: 1024
|
| 38 |
+
mel_spec_type: vocos # vocos | bigvgan
|
| 39 |
+
vocoder:
|
| 40 |
+
is_local: False # use local offline ckpt or not
|
| 41 |
+
local_path: null # local vocoder path
|
| 42 |
+
|
| 43 |
+
ckpts:
|
| 44 |
+
logger: wandb # wandb | tensorboard | null
|
| 45 |
+
log_samples: True # infer random sample per save checkpoint. wip, normal to fail with extra long samples
|
| 46 |
+
save_per_updates: 50000 # save checkpoint per updates
|
| 47 |
+
keep_last_n_checkpoints: -1 # -1 to keep all, 0 to not save intermediate, > 0 to keep last N checkpoints
|
| 48 |
+
last_per_updates: 5000 # save last checkpoint per updates
|
| 49 |
+
save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}
|
src/f5_tts/configs/F5TTS_Base.yaml
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
|
| 4 |
+
|
| 5 |
+
datasets:
|
| 6 |
+
name: Emilia_ZH_EN # dataset name
|
| 7 |
+
batch_size_per_gpu: 38400 # 8 GPUs, 8 * 38400 = 307200
|
| 8 |
+
batch_size_type: frame # frame | sample
|
| 9 |
+
max_samples: 64 # max sequences per batch if use frame-wise batch_size. we set 32 for small models, 64 for base models
|
| 10 |
+
num_workers: 16
|
| 11 |
+
|
| 12 |
+
optim:
|
| 13 |
+
epochs: 11
|
| 14 |
+
learning_rate: 7.5e-5
|
| 15 |
+
num_warmup_updates: 20000 # warmup updates
|
| 16 |
+
grad_accumulation_steps: 1 # note: updates = steps / grad_accumulation_steps
|
| 17 |
+
max_grad_norm: 1.0 # gradient clipping
|
| 18 |
+
bnb_optimizer: False # use bnb 8bit AdamW optimizer or not
|
| 19 |
+
|
| 20 |
+
model:
|
| 21 |
+
name: F5TTS_Base # model name
|
| 22 |
+
tokenizer: pinyin # tokenizer type
|
| 23 |
+
tokenizer_path: null # if 'custom' tokenizer, define the path want to use (should be vocab.txt)
|
| 24 |
+
backbone: DiT
|
| 25 |
+
arch:
|
| 26 |
+
dim: 1024
|
| 27 |
+
depth: 22
|
| 28 |
+
heads: 16
|
| 29 |
+
ff_mult: 2
|
| 30 |
+
text_dim: 512
|
| 31 |
+
text_mask_padding: False
|
| 32 |
+
conv_layers: 4
|
| 33 |
+
pe_attn_head: 1
|
| 34 |
+
attn_backend: torch # torch | flash_attn
|
| 35 |
+
attn_mask_enabled: False
|
| 36 |
+
checkpoint_activations: False # recompute activations and save memory for extra compute
|
| 37 |
+
mel_spec:
|
| 38 |
+
target_sample_rate: 24000
|
| 39 |
+
n_mel_channels: 100
|
| 40 |
+
hop_length: 256
|
| 41 |
+
win_length: 1024
|
| 42 |
+
n_fft: 1024
|
| 43 |
+
mel_spec_type: vocos # vocos | bigvgan
|
| 44 |
+
vocoder:
|
| 45 |
+
is_local: False # use local offline ckpt or not
|
| 46 |
+
local_path: null # local vocoder path
|
| 47 |
+
|
| 48 |
+
ckpts:
|
| 49 |
+
logger: wandb # wandb | tensorboard | null
|
| 50 |
+
log_samples: True # infer random sample per save checkpoint. wip, normal to fail with extra long samples
|
| 51 |
+
save_per_updates: 50000 # save checkpoint per updates
|
| 52 |
+
keep_last_n_checkpoints: -1 # -1 to keep all, 0 to not save intermediate, > 0 to keep last N checkpoints
|
| 53 |
+
last_per_updates: 5000 # save last checkpoint per updates
|
| 54 |
+
save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}
|
src/f5_tts/configs/F5TTS_Small.yaml
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
|
| 4 |
+
|
| 5 |
+
datasets:
|
| 6 |
+
name: Emilia_ZH_EN
|
| 7 |
+
batch_size_per_gpu: 38400 # 8 GPUs, 8 * 38400 = 307200
|
| 8 |
+
batch_size_type: frame # frame | sample
|
| 9 |
+
max_samples: 64 # max sequences per batch if use frame-wise batch_size. we set 32 for small models, 64 for base models
|
| 10 |
+
num_workers: 16
|
| 11 |
+
|
| 12 |
+
optim:
|
| 13 |
+
epochs: 11 # only suitable for Emilia, if you want to train it on LibriTTS, set epoch 686
|
| 14 |
+
learning_rate: 7.5e-5
|
| 15 |
+
num_warmup_updates: 20000 # warmup updates
|
| 16 |
+
grad_accumulation_steps: 1 # note: updates = steps / grad_accumulation_steps
|
| 17 |
+
max_grad_norm: 1.0 # gradient clipping
|
| 18 |
+
bnb_optimizer: False # use bnb 8bit AdamW optimizer or not
|
| 19 |
+
|
| 20 |
+
model:
|
| 21 |
+
name: F5TTS_Small
|
| 22 |
+
tokenizer: pinyin
|
| 23 |
+
tokenizer_path: null # if 'custom' tokenizer, define the path want to use (should be vocab.txt)
|
| 24 |
+
backbone: DiT
|
| 25 |
+
arch:
|
| 26 |
+
dim: 768
|
| 27 |
+
depth: 18
|
| 28 |
+
heads: 12
|
| 29 |
+
ff_mult: 2
|
| 30 |
+
text_dim: 512
|
| 31 |
+
text_mask_padding: False
|
| 32 |
+
conv_layers: 4
|
| 33 |
+
pe_attn_head: 1
|
| 34 |
+
attn_backend: torch # torch | flash_attn
|
| 35 |
+
attn_mask_enabled: False
|
| 36 |
+
checkpoint_activations: False # recompute activations and save memory for extra compute
|
| 37 |
+
mel_spec:
|
| 38 |
+
target_sample_rate: 24000
|
| 39 |
+
n_mel_channels: 100
|
| 40 |
+
hop_length: 256
|
| 41 |
+
win_length: 1024
|
| 42 |
+
n_fft: 1024
|
| 43 |
+
mel_spec_type: vocos # vocos | bigvgan
|
| 44 |
+
vocoder:
|
| 45 |
+
is_local: False # use local offline ckpt or not
|
| 46 |
+
local_path: null # local vocoder path
|
| 47 |
+
|
| 48 |
+
ckpts:
|
| 49 |
+
logger: wandb # wandb | tensorboard | null
|
| 50 |
+
log_samples: True # infer random sample per save checkpoint. wip, normal to fail with extra long samples
|
| 51 |
+
save_per_updates: 50000 # save checkpoint per updates
|
| 52 |
+
keep_last_n_checkpoints: -1 # -1 to keep all, 0 to not save intermediate, > 0 to keep last N checkpoints
|
| 53 |
+
last_per_updates: 5000 # save last checkpoint per updates
|
| 54 |
+
save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}
|
src/f5_tts/configs/F5TTS_v1_Base.yaml
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
hydra:
|
| 2 |
+
run:
|
| 3 |
+
dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S}
|
| 4 |
+
|
| 5 |
+
datasets:
|
| 6 |
+
name: Emilia_ZH_EN # dataset name
|
| 7 |
+
batch_size_per_gpu: 38400 # 8 GPUs, 8 * 38400 = 307200
|
| 8 |
+
batch_size_type: frame # frame | sample
|
| 9 |
+
max_samples: 64 # max sequences per batch if use frame-wise batch_size. we set 32 for small models, 64 for base models
|
| 10 |
+
num_workers: 16
|
| 11 |
+
|
| 12 |
+
optim:
|
| 13 |
+
epochs: 11
|
| 14 |
+
learning_rate: 7.5e-5
|
| 15 |
+
num_warmup_updates: 20000 # warmup updates
|
| 16 |
+
grad_accumulation_steps: 1 # note: updates = steps / grad_accumulation_steps
|
| 17 |
+
max_grad_norm: 1.0 # gradient clipping
|
| 18 |
+
bnb_optimizer: False # use bnb 8bit AdamW optimizer or not
|
| 19 |
+
|
| 20 |
+
model:
|
| 21 |
+
name: F5TTS_v1_Base # model name
|
| 22 |
+
tokenizer: pinyin # tokenizer type
|
| 23 |
+
tokenizer_path: null # if 'custom' tokenizer, define the path want to use (should be vocab.txt)
|
| 24 |
+
backbone: DiT
|
| 25 |
+
arch:
|
| 26 |
+
dim: 1024
|
| 27 |
+
depth: 22
|
| 28 |
+
heads: 16
|
| 29 |
+
ff_mult: 2
|
| 30 |
+
text_dim: 512
|
| 31 |
+
text_mask_padding: True
|
| 32 |
+
qk_norm: null # null | rms_norm
|
| 33 |
+
conv_layers: 4
|
| 34 |
+
pe_attn_head: null
|
| 35 |
+
attn_backend: torch # torch | flash_attn
|
| 36 |
+
attn_mask_enabled: False
|
| 37 |
+
checkpoint_activations: False # recompute activations and save memory for extra compute
|
| 38 |
+
mel_spec:
|
| 39 |
+
target_sample_rate: 24000
|
| 40 |
+
n_mel_channels: 100
|
| 41 |
+
hop_length: 256
|
| 42 |
+
win_length: 1024
|
| 43 |
+
n_fft: 1024
|
| 44 |
+
mel_spec_type: vocos # vocos | bigvgan
|
| 45 |
+
vocoder:
|
| 46 |
+
is_local: False # use local offline ckpt or not
|
| 47 |
+
local_path: null # local vocoder path
|
| 48 |
+
|
| 49 |
+
ckpts:
|
| 50 |
+
logger: wandb # wandb | tensorboard | null
|
| 51 |
+
log_samples: True # infer random sample per save checkpoint. wip, normal to fail with extra long samples
|
| 52 |
+
save_per_updates: 50000 # save checkpoint per updates
|
| 53 |
+
keep_last_n_checkpoints: -1 # -1 to keep all, 0 to not save intermediate, > 0 to keep last N checkpoints
|
| 54 |
+
last_per_updates: 5000 # save last checkpoint per updates
|
| 55 |
+
save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}
|
src/f5_tts/eval/README.md
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# Evaluation
|
| 3 |
+
|
| 4 |
+
Install packages for evaluation:
|
| 5 |
+
|
| 6 |
+
```bash
|
| 7 |
+
pip install -e .[eval]
|
| 8 |
+
```
|
| 9 |
+
|
| 10 |
+
## Generating Samples for Evaluation
|
| 11 |
+
|
| 12 |
+
### Prepare Test Datasets
|
| 13 |
+
|
| 14 |
+
1. *Seed-TTS testset*: Download from [seed-tts-eval](https://github.com/BytedanceSpeech/seed-tts-eval).
|
| 15 |
+
2. *LibriSpeech test-clean*: Download from [OpenSLR](http://www.openslr.org/12/).
|
| 16 |
+
3. Unzip the downloaded datasets and place them in the `data/` directory.
|
| 17 |
+
4. Our filtered LibriSpeech-PC 4-10s subset: `data/librispeech_pc_test_clean_cross_sentence.lst`
|
| 18 |
+
|
| 19 |
+
### Batch Inference for Test Set
|
| 20 |
+
|
| 21 |
+
To run batch inference for evaluations, execute the following commands:
|
| 22 |
+
|
| 23 |
+
```bash
|
| 24 |
+
# if not setup accelerate config yet
|
| 25 |
+
accelerate config
|
| 26 |
+
|
| 27 |
+
# if only perform inference
|
| 28 |
+
bash src/f5_tts/eval/eval_infer_batch.sh --infer-only
|
| 29 |
+
|
| 30 |
+
# if inference and with corresponding evaluation, setup the following tools first
|
| 31 |
+
bash src/f5_tts/eval/eval_infer_batch.sh
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
## Objective Evaluation on Generated Results
|
| 35 |
+
|
| 36 |
+
### Download Evaluation Model Checkpoints
|
| 37 |
+
|
| 38 |
+
1. Chinese ASR Model: [Paraformer-zh](https://huggingface.co/funasr/paraformer-zh)
|
| 39 |
+
2. English ASR Model: [Faster-Whisper](https://huggingface.co/Systran/faster-whisper-large-v3)
|
| 40 |
+
3. WavLM Model: Download from [Google Drive](https://drive.google.com/file/d/1-aE1NfzpRCLxA4GUxX9ITI3F9LlbtEGP/view).
|
| 41 |
+
|
| 42 |
+
> [!NOTE]
|
| 43 |
+
> ASR model will be automatically downloaded if `--local` not set for evaluation scripts.
|
| 44 |
+
> Otherwise, you should update the `asr_ckpt_dir` path values in `eval_librispeech_test_clean.py` or `eval_seedtts_testset.py`.
|
| 45 |
+
>
|
| 46 |
+
> WavLM model must be downloaded and your `wavlm_ckpt_dir` path updated in `eval_librispeech_test_clean.py` and `eval_seedtts_testset.py`.
|
| 47 |
+
|
| 48 |
+
### Objective Evaluation Examples
|
| 49 |
+
|
| 50 |
+
Update the path with your batch-inferenced results, and carry out WER / SIM / UTMOS evaluations:
|
| 51 |
+
```bash
|
| 52 |
+
# Evaluation [WER] for Seed-TTS test [ZH] set
|
| 53 |
+
python src/f5_tts/eval/eval_seedtts_testset.py --eval_task wer --lang zh --gen_wav_dir <GEN_WAV_DIR> --gpu_nums 8
|
| 54 |
+
|
| 55 |
+
# Evaluation [SIM] for LibriSpeech-PC test-clean (cross-sentence)
|
| 56 |
+
python src/f5_tts/eval/eval_librispeech_test_clean.py --eval_task sim --gen_wav_dir <GEN_WAV_DIR> --librispeech_test_clean_path <TEST_CLEAN_PATH>
|
| 57 |
+
|
| 58 |
+
# Evaluation [UTMOS]. --ext: Audio extension
|
| 59 |
+
python src/f5_tts/eval/eval_utmos.py --audio_dir <WAV_DIR> --ext wav
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
> [!NOTE]
|
| 63 |
+
> Evaluation results can also be found in `_*_results.jsonl` files saved in `<GEN_WAV_DIR>`/`<WAV_DIR>`.
|
src/f5_tts/eval/__pycache__/compare_checkpoints.cpython-311.pyc
ADDED
|
Binary file (9.67 kB). View file
|
|
|
src/f5_tts/eval/__pycache__/eval_bengali.cpython-311.pyc
ADDED
|
Binary file (15.1 kB). View file
|
|
|
src/f5_tts/eval/__pycache__/gen_bengali_batch.cpython-311.pyc
ADDED
|
Binary file (6.8 kB). View file
|
|
|
src/f5_tts/eval/__pycache__/gen_elevenlabs_batch.cpython-311.pyc
ADDED
|
Binary file (5.58 kB). View file
|
|
|
src/f5_tts/eval/__pycache__/gen_indicf5_batch.cpython-311.pyc
ADDED
|
Binary file (8.08 kB). View file
|
|
|
src/f5_tts/eval/compare_checkpoints.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Compare checkpoints on stimulai53 and plot results."""
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import csv
|
| 5 |
+
import json
|
| 6 |
+
import subprocess
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
import matplotlib.pyplot as plt
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def run_gen_eval(ckpt_file, output_dir, testset="stimulai53"):
|
| 14 |
+
"""Generate and evaluate for a single checkpoint."""
|
| 15 |
+
src_dir = Path(f"examples/{testset}")
|
| 16 |
+
dst_dir = Path(output_dir)
|
| 17 |
+
|
| 18 |
+
# Clear source dir to force regeneration
|
| 19 |
+
if src_dir.exists():
|
| 20 |
+
for f in src_dir.glob("*.wav"):
|
| 21 |
+
f.unlink()
|
| 22 |
+
|
| 23 |
+
# Generate
|
| 24 |
+
gen_cmd = [
|
| 25 |
+
sys.executable, "-m", "src.f5_tts.eval.gen_bengali_batch",
|
| 26 |
+
"--testset", testset,
|
| 27 |
+
"--ckpt_file", ckpt_file,
|
| 28 |
+
"--use_ema",
|
| 29 |
+
]
|
| 30 |
+
subprocess.run(gen_cmd, check=True)
|
| 31 |
+
|
| 32 |
+
# Move outputs to checkpoint-specific dir
|
| 33 |
+
dst_dir.mkdir(parents=True, exist_ok=True)
|
| 34 |
+
for f in src_dir.glob("*.wav"):
|
| 35 |
+
f.rename(dst_dir / f.name)
|
| 36 |
+
|
| 37 |
+
# Evaluate
|
| 38 |
+
eval_cmd = [
|
| 39 |
+
sys.executable, "-m", "src.f5_tts.eval.eval_bengali",
|
| 40 |
+
"--testset", testset,
|
| 41 |
+
"--gen_dir", str(dst_dir),
|
| 42 |
+
]
|
| 43 |
+
subprocess.run(eval_cmd, check=True)
|
| 44 |
+
|
| 45 |
+
# Rename result file to include step number
|
| 46 |
+
src_result = Path(f"results/bengali_eval_{testset}.json")
|
| 47 |
+
dst_result = Path(f"results/bengali_eval_{dst_dir.name}.json")
|
| 48 |
+
if src_result.exists():
|
| 49 |
+
src_result.rename(dst_result)
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def save_csv(results, output_path="results/checkpoint_comparison.csv"):
|
| 53 |
+
"""Save results to CSV."""
|
| 54 |
+
with open(output_path, 'w', newline='') as f:
|
| 55 |
+
writer = csv.writer(f)
|
| 56 |
+
writer.writerow(['step', 'cer', 'sim', 'utmos'])
|
| 57 |
+
for step in sorted(results.keys()):
|
| 58 |
+
r = results[step]
|
| 59 |
+
writer.writerow([step, r['cer'], r['sim'], r['utmos']])
|
| 60 |
+
print(f"CSV saved to {output_path}")
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def plot_results(results, output_path="results/checkpoint_comparison.png"):
|
| 64 |
+
"""Plot CER, SIM, UTMOS across checkpoints in 3 separate plots."""
|
| 65 |
+
steps = sorted(results.keys())
|
| 66 |
+
cer = [results[s]["cer"] for s in steps]
|
| 67 |
+
sim = [results[s]["sim"] for s in steps]
|
| 68 |
+
utmos = [results[s]["utmos"] for s in steps]
|
| 69 |
+
|
| 70 |
+
fig, axes = plt.subplots(3, 1, figsize=(8, 10))
|
| 71 |
+
|
| 72 |
+
# CER
|
| 73 |
+
axes[0].plot(steps, cer, 'o-', color='tab:red', linewidth=2, markersize=8)
|
| 74 |
+
axes[0].set_xlabel('Training Steps')
|
| 75 |
+
axes[0].set_ylabel('CER')
|
| 76 |
+
axes[0].set_title('Character Error Rate (lower=better)')
|
| 77 |
+
axes[0].grid(True, alpha=0.3)
|
| 78 |
+
for i, (x, y) in enumerate(zip(steps, cer)):
|
| 79 |
+
axes[0].annotate(f'{y:.4f}', (x, y), textcoords="offset points", xytext=(0, 10), ha='center')
|
| 80 |
+
|
| 81 |
+
# SIM
|
| 82 |
+
axes[1].plot(steps, sim, 'o-', color='tab:blue', linewidth=2, markersize=8)
|
| 83 |
+
axes[1].set_xlabel('Training Steps')
|
| 84 |
+
axes[1].set_ylabel('SIM')
|
| 85 |
+
axes[1].set_title('Speaker Similarity (higher=better)')
|
| 86 |
+
axes[1].grid(True, alpha=0.3)
|
| 87 |
+
for i, (x, y) in enumerate(zip(steps, sim)):
|
| 88 |
+
axes[1].annotate(f'{y:.4f}', (x, y), textcoords="offset points", xytext=(0, 10), ha='center')
|
| 89 |
+
|
| 90 |
+
# UTMOS
|
| 91 |
+
axes[2].plot(steps, utmos, 'o-', color='tab:green', linewidth=2, markersize=8)
|
| 92 |
+
axes[2].set_xlabel('Training Steps')
|
| 93 |
+
axes[2].set_ylabel('UTMOS')
|
| 94 |
+
axes[2].set_title('Audio Quality (higher=better)')
|
| 95 |
+
axes[2].grid(True, alpha=0.3)
|
| 96 |
+
for i, (x, y) in enumerate(zip(steps, utmos)):
|
| 97 |
+
axes[2].annotate(f'{y:.4f}', (x, y), textcoords="offset points", xytext=(0, 10), ha='center')
|
| 98 |
+
|
| 99 |
+
plt.tight_layout()
|
| 100 |
+
plt.savefig(output_path, dpi=150)
|
| 101 |
+
print(f"Plot saved to {output_path}")
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def main():
|
| 105 |
+
parser = argparse.ArgumentParser()
|
| 106 |
+
parser.add_argument("--ckpt_dir", default="ckpts/bengali_300h")
|
| 107 |
+
parser.add_argument("--testset", default="stimulai53")
|
| 108 |
+
parser.add_argument("--steps", nargs="+", type=int, default=[10000, 20000, 30000, 40000, 50000])
|
| 109 |
+
args = parser.parse_args()
|
| 110 |
+
|
| 111 |
+
results = {}
|
| 112 |
+
|
| 113 |
+
for step in args.steps:
|
| 114 |
+
ckpt_file = f"{args.ckpt_dir}/model_{step}.pt"
|
| 115 |
+
output_dir = f"examples/{args.testset}_{step}"
|
| 116 |
+
result_file = f"results/bengali_eval_{args.testset}_{step}.json"
|
| 117 |
+
|
| 118 |
+
print(f"\n{'='*50}")
|
| 119 |
+
print(f"Processing step {step}")
|
| 120 |
+
print(f"{'='*50}")
|
| 121 |
+
|
| 122 |
+
run_gen_eval(ckpt_file, output_dir, args.testset)
|
| 123 |
+
|
| 124 |
+
# Load results
|
| 125 |
+
with open(result_file) as f:
|
| 126 |
+
data = json.load(f)
|
| 127 |
+
results[step] = {
|
| 128 |
+
"cer": data["avg_cer"],
|
| 129 |
+
"sim": data["avg_sim"],
|
| 130 |
+
"utmos": data["avg_utmos"],
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
print(f"Step {step}: CER={data['avg_cer']:.4f}, SIM={data['avg_sim']:.4f}, UTMOS={data['avg_utmos']:.4f}")
|
| 134 |
+
|
| 135 |
+
# Print summary table
|
| 136 |
+
print(f"\n{'='*50}")
|
| 137 |
+
print("Summary")
|
| 138 |
+
print(f"{'='*50}")
|
| 139 |
+
print(f"{'Step':>10} {'CER':>10} {'SIM':>10} {'UTMOS':>10}")
|
| 140 |
+
for step in sorted(results.keys()):
|
| 141 |
+
r = results[step]
|
| 142 |
+
print(f"{step:>10} {r['cer']:>10.4f} {r['sim']:>10.4f} {r['utmos']:>10.4f}")
|
| 143 |
+
|
| 144 |
+
# Save CSV and plot
|
| 145 |
+
save_csv(results)
|
| 146 |
+
plot_results(results)
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
if __name__ == "__main__":
|
| 150 |
+
main()
|
src/f5_tts/eval/ecapa_tdnn.py
ADDED
|
@@ -0,0 +1,331 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# just for speaker similarity evaluation, third-party code
|
| 2 |
+
|
| 3 |
+
# From https://github.com/microsoft/UniSpeech/blob/main/downstreams/speaker_verification/models/
|
| 4 |
+
# part of the code is borrowed from https://github.com/lawlict/ECAPA-TDNN
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
import torch
|
| 9 |
+
import torch.nn as nn
|
| 10 |
+
import torch.nn.functional as F
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
""" Res2Conv1d + BatchNorm1d + ReLU
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class Res2Conv1dReluBn(nn.Module):
|
| 18 |
+
"""
|
| 19 |
+
in_channels == out_channels == channels
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
def __init__(self, channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=True, scale=4):
|
| 23 |
+
super().__init__()
|
| 24 |
+
assert channels % scale == 0, "{} % {} != 0".format(channels, scale)
|
| 25 |
+
self.scale = scale
|
| 26 |
+
self.width = channels // scale
|
| 27 |
+
self.nums = scale if scale == 1 else scale - 1
|
| 28 |
+
|
| 29 |
+
self.convs = []
|
| 30 |
+
self.bns = []
|
| 31 |
+
for i in range(self.nums):
|
| 32 |
+
self.convs.append(nn.Conv1d(self.width, self.width, kernel_size, stride, padding, dilation, bias=bias))
|
| 33 |
+
self.bns.append(nn.BatchNorm1d(self.width))
|
| 34 |
+
self.convs = nn.ModuleList(self.convs)
|
| 35 |
+
self.bns = nn.ModuleList(self.bns)
|
| 36 |
+
|
| 37 |
+
def forward(self, x):
|
| 38 |
+
out = []
|
| 39 |
+
spx = torch.split(x, self.width, 1)
|
| 40 |
+
for i in range(self.nums):
|
| 41 |
+
if i == 0:
|
| 42 |
+
sp = spx[i]
|
| 43 |
+
else:
|
| 44 |
+
sp = sp + spx[i]
|
| 45 |
+
# Order: conv -> relu -> bn
|
| 46 |
+
sp = self.convs[i](sp)
|
| 47 |
+
sp = self.bns[i](F.relu(sp))
|
| 48 |
+
out.append(sp)
|
| 49 |
+
if self.scale != 1:
|
| 50 |
+
out.append(spx[self.nums])
|
| 51 |
+
out = torch.cat(out, dim=1)
|
| 52 |
+
|
| 53 |
+
return out
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
""" Conv1d + BatchNorm1d + ReLU
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
class Conv1dReluBn(nn.Module):
|
| 61 |
+
def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, dilation=1, bias=True):
|
| 62 |
+
super().__init__()
|
| 63 |
+
self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias)
|
| 64 |
+
self.bn = nn.BatchNorm1d(out_channels)
|
| 65 |
+
|
| 66 |
+
def forward(self, x):
|
| 67 |
+
return self.bn(F.relu(self.conv(x)))
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
""" The SE connection of 1D case.
|
| 71 |
+
"""
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
class SE_Connect(nn.Module):
|
| 75 |
+
def __init__(self, channels, se_bottleneck_dim=128):
|
| 76 |
+
super().__init__()
|
| 77 |
+
self.linear1 = nn.Linear(channels, se_bottleneck_dim)
|
| 78 |
+
self.linear2 = nn.Linear(se_bottleneck_dim, channels)
|
| 79 |
+
|
| 80 |
+
def forward(self, x):
|
| 81 |
+
out = x.mean(dim=2)
|
| 82 |
+
out = F.relu(self.linear1(out))
|
| 83 |
+
out = torch.sigmoid(self.linear2(out))
|
| 84 |
+
out = x * out.unsqueeze(2)
|
| 85 |
+
|
| 86 |
+
return out
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
""" SE-Res2Block of the ECAPA-TDNN architecture.
|
| 90 |
+
"""
|
| 91 |
+
|
| 92 |
+
# def SE_Res2Block(channels, kernel_size, stride, padding, dilation, scale):
|
| 93 |
+
# return nn.Sequential(
|
| 94 |
+
# Conv1dReluBn(channels, 512, kernel_size=1, stride=1, padding=0),
|
| 95 |
+
# Res2Conv1dReluBn(512, kernel_size, stride, padding, dilation, scale=scale),
|
| 96 |
+
# Conv1dReluBn(512, channels, kernel_size=1, stride=1, padding=0),
|
| 97 |
+
# SE_Connect(channels)
|
| 98 |
+
# )
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
class SE_Res2Block(nn.Module):
|
| 102 |
+
def __init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, scale, se_bottleneck_dim):
|
| 103 |
+
super().__init__()
|
| 104 |
+
self.Conv1dReluBn1 = Conv1dReluBn(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
|
| 105 |
+
self.Res2Conv1dReluBn = Res2Conv1dReluBn(out_channels, kernel_size, stride, padding, dilation, scale=scale)
|
| 106 |
+
self.Conv1dReluBn2 = Conv1dReluBn(out_channels, out_channels, kernel_size=1, stride=1, padding=0)
|
| 107 |
+
self.SE_Connect = SE_Connect(out_channels, se_bottleneck_dim)
|
| 108 |
+
|
| 109 |
+
self.shortcut = None
|
| 110 |
+
if in_channels != out_channels:
|
| 111 |
+
self.shortcut = nn.Conv1d(
|
| 112 |
+
in_channels=in_channels,
|
| 113 |
+
out_channels=out_channels,
|
| 114 |
+
kernel_size=1,
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
def forward(self, x):
|
| 118 |
+
residual = x
|
| 119 |
+
if self.shortcut:
|
| 120 |
+
residual = self.shortcut(x)
|
| 121 |
+
|
| 122 |
+
x = self.Conv1dReluBn1(x)
|
| 123 |
+
x = self.Res2Conv1dReluBn(x)
|
| 124 |
+
x = self.Conv1dReluBn2(x)
|
| 125 |
+
x = self.SE_Connect(x)
|
| 126 |
+
|
| 127 |
+
return x + residual
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
""" Attentive weighted mean and standard deviation pooling.
|
| 131 |
+
"""
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
class AttentiveStatsPool(nn.Module):
|
| 135 |
+
def __init__(self, in_dim, attention_channels=128, global_context_att=False):
|
| 136 |
+
super().__init__()
|
| 137 |
+
self.global_context_att = global_context_att
|
| 138 |
+
|
| 139 |
+
# Use Conv1d with stride == 1 rather than Linear, then we don't need to transpose inputs.
|
| 140 |
+
if global_context_att:
|
| 141 |
+
self.linear1 = nn.Conv1d(in_dim * 3, attention_channels, kernel_size=1) # equals W and b in the paper
|
| 142 |
+
else:
|
| 143 |
+
self.linear1 = nn.Conv1d(in_dim, attention_channels, kernel_size=1) # equals W and b in the paper
|
| 144 |
+
self.linear2 = nn.Conv1d(attention_channels, in_dim, kernel_size=1) # equals V and k in the paper
|
| 145 |
+
|
| 146 |
+
def forward(self, x):
|
| 147 |
+
if self.global_context_att:
|
| 148 |
+
context_mean = torch.mean(x, dim=-1, keepdim=True).expand_as(x)
|
| 149 |
+
context_std = torch.sqrt(torch.var(x, dim=-1, keepdim=True) + 1e-10).expand_as(x)
|
| 150 |
+
x_in = torch.cat((x, context_mean, context_std), dim=1)
|
| 151 |
+
else:
|
| 152 |
+
x_in = x
|
| 153 |
+
|
| 154 |
+
# DON'T use ReLU here! In experiments, I find ReLU hard to converge.
|
| 155 |
+
alpha = torch.tanh(self.linear1(x_in))
|
| 156 |
+
# alpha = F.relu(self.linear1(x_in))
|
| 157 |
+
alpha = torch.softmax(self.linear2(alpha), dim=2)
|
| 158 |
+
mean = torch.sum(alpha * x, dim=2)
|
| 159 |
+
residuals = torch.sum(alpha * (x**2), dim=2) - mean**2
|
| 160 |
+
std = torch.sqrt(residuals.clamp(min=1e-9))
|
| 161 |
+
return torch.cat([mean, std], dim=1)
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
class ECAPA_TDNN(nn.Module):
|
| 165 |
+
def __init__(
|
| 166 |
+
self,
|
| 167 |
+
feat_dim=80,
|
| 168 |
+
channels=512,
|
| 169 |
+
emb_dim=192,
|
| 170 |
+
global_context_att=False,
|
| 171 |
+
feat_type="wavlm_large",
|
| 172 |
+
sr=16000,
|
| 173 |
+
feature_selection="hidden_states",
|
| 174 |
+
update_extract=False,
|
| 175 |
+
config_path=None,
|
| 176 |
+
):
|
| 177 |
+
super().__init__()
|
| 178 |
+
|
| 179 |
+
self.feat_type = feat_type
|
| 180 |
+
self.feature_selection = feature_selection
|
| 181 |
+
self.update_extract = update_extract
|
| 182 |
+
self.sr = sr
|
| 183 |
+
|
| 184 |
+
torch.hub._validate_not_a_forked_repo = lambda a, b, c: True
|
| 185 |
+
try:
|
| 186 |
+
local_s3prl_path = os.path.expanduser("~/.cache/torch/hub/s3prl_s3prl_main")
|
| 187 |
+
self.feature_extract = torch.hub.load(local_s3prl_path, feat_type, source="local", config_path=config_path)
|
| 188 |
+
except: # noqa: E722
|
| 189 |
+
self.feature_extract = torch.hub.load("s3prl/s3prl", feat_type)
|
| 190 |
+
|
| 191 |
+
if len(self.feature_extract.model.encoder.layers) == 24 and hasattr(
|
| 192 |
+
self.feature_extract.model.encoder.layers[23].self_attn, "fp32_attention"
|
| 193 |
+
):
|
| 194 |
+
self.feature_extract.model.encoder.layers[23].self_attn.fp32_attention = False
|
| 195 |
+
if len(self.feature_extract.model.encoder.layers) == 24 and hasattr(
|
| 196 |
+
self.feature_extract.model.encoder.layers[11].self_attn, "fp32_attention"
|
| 197 |
+
):
|
| 198 |
+
self.feature_extract.model.encoder.layers[11].self_attn.fp32_attention = False
|
| 199 |
+
|
| 200 |
+
self.feat_num = self.get_feat_num()
|
| 201 |
+
self.feature_weight = nn.Parameter(torch.zeros(self.feat_num))
|
| 202 |
+
|
| 203 |
+
if feat_type != "fbank" and feat_type != "mfcc":
|
| 204 |
+
freeze_list = ["final_proj", "label_embs_concat", "mask_emb", "project_q", "quantizer"]
|
| 205 |
+
for name, param in self.feature_extract.named_parameters():
|
| 206 |
+
for freeze_val in freeze_list:
|
| 207 |
+
if freeze_val in name:
|
| 208 |
+
param.requires_grad = False
|
| 209 |
+
break
|
| 210 |
+
|
| 211 |
+
if not self.update_extract:
|
| 212 |
+
for param in self.feature_extract.parameters():
|
| 213 |
+
param.requires_grad = False
|
| 214 |
+
|
| 215 |
+
self.instance_norm = nn.InstanceNorm1d(feat_dim)
|
| 216 |
+
# self.channels = [channels] * 4 + [channels * 3]
|
| 217 |
+
self.channels = [channels] * 4 + [1536]
|
| 218 |
+
|
| 219 |
+
self.layer1 = Conv1dReluBn(feat_dim, self.channels[0], kernel_size=5, padding=2)
|
| 220 |
+
self.layer2 = SE_Res2Block(
|
| 221 |
+
self.channels[0],
|
| 222 |
+
self.channels[1],
|
| 223 |
+
kernel_size=3,
|
| 224 |
+
stride=1,
|
| 225 |
+
padding=2,
|
| 226 |
+
dilation=2,
|
| 227 |
+
scale=8,
|
| 228 |
+
se_bottleneck_dim=128,
|
| 229 |
+
)
|
| 230 |
+
self.layer3 = SE_Res2Block(
|
| 231 |
+
self.channels[1],
|
| 232 |
+
self.channels[2],
|
| 233 |
+
kernel_size=3,
|
| 234 |
+
stride=1,
|
| 235 |
+
padding=3,
|
| 236 |
+
dilation=3,
|
| 237 |
+
scale=8,
|
| 238 |
+
se_bottleneck_dim=128,
|
| 239 |
+
)
|
| 240 |
+
self.layer4 = SE_Res2Block(
|
| 241 |
+
self.channels[2],
|
| 242 |
+
self.channels[3],
|
| 243 |
+
kernel_size=3,
|
| 244 |
+
stride=1,
|
| 245 |
+
padding=4,
|
| 246 |
+
dilation=4,
|
| 247 |
+
scale=8,
|
| 248 |
+
se_bottleneck_dim=128,
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
# self.conv = nn.Conv1d(self.channels[-1], self.channels[-1], kernel_size=1)
|
| 252 |
+
cat_channels = channels * 3
|
| 253 |
+
self.conv = nn.Conv1d(cat_channels, self.channels[-1], kernel_size=1)
|
| 254 |
+
self.pooling = AttentiveStatsPool(
|
| 255 |
+
self.channels[-1], attention_channels=128, global_context_att=global_context_att
|
| 256 |
+
)
|
| 257 |
+
self.bn = nn.BatchNorm1d(self.channels[-1] * 2)
|
| 258 |
+
self.linear = nn.Linear(self.channels[-1] * 2, emb_dim)
|
| 259 |
+
|
| 260 |
+
def get_feat_num(self):
|
| 261 |
+
self.feature_extract.eval()
|
| 262 |
+
wav = [torch.randn(self.sr).to(next(self.feature_extract.parameters()).device)]
|
| 263 |
+
with torch.no_grad():
|
| 264 |
+
features = self.feature_extract(wav)
|
| 265 |
+
select_feature = features[self.feature_selection]
|
| 266 |
+
if isinstance(select_feature, (list, tuple)):
|
| 267 |
+
return len(select_feature)
|
| 268 |
+
else:
|
| 269 |
+
return 1
|
| 270 |
+
|
| 271 |
+
def get_feat(self, x):
|
| 272 |
+
if self.update_extract:
|
| 273 |
+
x = self.feature_extract([sample for sample in x])
|
| 274 |
+
else:
|
| 275 |
+
with torch.no_grad():
|
| 276 |
+
if self.feat_type == "fbank" or self.feat_type == "mfcc":
|
| 277 |
+
x = self.feature_extract(x) + 1e-6 # B x feat_dim x time_len
|
| 278 |
+
else:
|
| 279 |
+
x = self.feature_extract([sample for sample in x])
|
| 280 |
+
|
| 281 |
+
if self.feat_type == "fbank":
|
| 282 |
+
x = x.log()
|
| 283 |
+
|
| 284 |
+
if self.feat_type != "fbank" and self.feat_type != "mfcc":
|
| 285 |
+
x = x[self.feature_selection]
|
| 286 |
+
if isinstance(x, (list, tuple)):
|
| 287 |
+
x = torch.stack(x, dim=0)
|
| 288 |
+
else:
|
| 289 |
+
x = x.unsqueeze(0)
|
| 290 |
+
norm_weights = F.softmax(self.feature_weight, dim=-1).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
|
| 291 |
+
x = (norm_weights * x).sum(dim=0)
|
| 292 |
+
x = torch.transpose(x, 1, 2) + 1e-6
|
| 293 |
+
|
| 294 |
+
x = self.instance_norm(x)
|
| 295 |
+
return x
|
| 296 |
+
|
| 297 |
+
def forward(self, x):
|
| 298 |
+
x = self.get_feat(x)
|
| 299 |
+
|
| 300 |
+
out1 = self.layer1(x)
|
| 301 |
+
out2 = self.layer2(out1)
|
| 302 |
+
out3 = self.layer3(out2)
|
| 303 |
+
out4 = self.layer4(out3)
|
| 304 |
+
|
| 305 |
+
out = torch.cat([out2, out3, out4], dim=1)
|
| 306 |
+
out = F.relu(self.conv(out))
|
| 307 |
+
out = self.bn(self.pooling(out))
|
| 308 |
+
out = self.linear(out)
|
| 309 |
+
|
| 310 |
+
return out
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
def ECAPA_TDNN_SMALL(
|
| 314 |
+
feat_dim,
|
| 315 |
+
emb_dim=256,
|
| 316 |
+
feat_type="wavlm_large",
|
| 317 |
+
sr=16000,
|
| 318 |
+
feature_selection="hidden_states",
|
| 319 |
+
update_extract=False,
|
| 320 |
+
config_path=None,
|
| 321 |
+
):
|
| 322 |
+
return ECAPA_TDNN(
|
| 323 |
+
feat_dim=feat_dim,
|
| 324 |
+
channels=512,
|
| 325 |
+
emb_dim=emb_dim,
|
| 326 |
+
feat_type=feat_type,
|
| 327 |
+
sr=sr,
|
| 328 |
+
feature_selection=feature_selection,
|
| 329 |
+
update_extract=update_extract,
|
| 330 |
+
config_path=config_path,
|
| 331 |
+
)
|
src/f5_tts/eval/eval_bengali.py
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
import librosa
|
| 7 |
+
import torch
|
| 8 |
+
import torch.nn.functional as F
|
| 9 |
+
import torchaudio
|
| 10 |
+
from tqdm import tqdm
|
| 11 |
+
|
| 12 |
+
from huggingface_hub import hf_hub_download
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
TESTSET_CONFIG = {
|
| 16 |
+
"stimulai53": {
|
| 17 |
+
"gen_dir": "examples/stimulai53",
|
| 18 |
+
"text_file": "examples/BengaliStimulai53.txt",
|
| 19 |
+
"n_samples": 53,
|
| 20 |
+
},
|
| 21 |
+
"ne200": {
|
| 22 |
+
"gen_dir": "examples/ne200",
|
| 23 |
+
"text_file": "examples/BengaliNamedEntity200.txt",
|
| 24 |
+
"n_samples": 200,
|
| 25 |
+
},
|
| 26 |
+
"st200": {
|
| 27 |
+
"gen_dir": "examples/st200",
|
| 28 |
+
"text_file": "examples/ShortText200.txt",
|
| 29 |
+
"n_samples": 200,
|
| 30 |
+
},
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
REF_DIR = "examples/slr37"
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def load_texts(text_file):
|
| 37 |
+
with open(text_file, "r", encoding="utf-8") as f:
|
| 38 |
+
return [line.strip() for line in f.readlines()]
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def normalize_bengali_text(text):
|
| 42 |
+
import re
|
| 43 |
+
text = re.sub(r"[।,\.!?;:\"\'\-\(\)]", "", text)
|
| 44 |
+
text = " ".join(text.split())
|
| 45 |
+
return text
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def compute_cer(reference, hypothesis):
|
| 49 |
+
from jiwer import cer
|
| 50 |
+
ref_norm = normalize_bengali_text(reference)
|
| 51 |
+
hyp_norm = normalize_bengali_text(hypothesis)
|
| 52 |
+
if not ref_norm:
|
| 53 |
+
return 0.0
|
| 54 |
+
return cer(ref_norm, hyp_norm)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def run_asr_bengali(audio_paths, device="cuda", model_id="bengaliAI/tugstugi_bengaliai-asr_whisper-medium"):
|
| 58 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration, GenerationConfig
|
| 59 |
+
import librosa
|
| 60 |
+
|
| 61 |
+
processor = WhisperProcessor.from_pretrained(model_id)
|
| 62 |
+
model = WhisperForConditionalGeneration.from_pretrained(model_id).to(device)
|
| 63 |
+
|
| 64 |
+
# Fix outdated generation config
|
| 65 |
+
model.generation_config = GenerationConfig.from_pretrained("openai/whisper-medium")
|
| 66 |
+
|
| 67 |
+
transcriptions = []
|
| 68 |
+
for audio_path in tqdm(audio_paths, desc="ASR"):
|
| 69 |
+
audio, sr = librosa.load(str(audio_path), sr=16000)
|
| 70 |
+
input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features.to(device)
|
| 71 |
+
predicted_ids = model.generate(input_features, language="bn", task="transcribe")
|
| 72 |
+
text = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
| 73 |
+
transcriptions.append(text)
|
| 74 |
+
return transcriptions
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def run_sim_bengali(gen_paths, ref_paths, ckpt_dir=None, device="cuda"):
|
| 78 |
+
# Load ECAPA2 model from HuggingFace
|
| 79 |
+
model_file = hf_hub_download(repo_id="Jenthe/ECAPA2", filename="ecapa2.pt")
|
| 80 |
+
ecapa2 = torch.jit.load(model_file, map_location=device)
|
| 81 |
+
|
| 82 |
+
sim_scores = []
|
| 83 |
+
for gen_path, ref_path in tqdm(zip(gen_paths, ref_paths), desc="SIM", total=len(gen_paths)):
|
| 84 |
+
wav1, sr1 = torchaudio.load(gen_path)
|
| 85 |
+
wav2, sr2 = torchaudio.load(ref_path)
|
| 86 |
+
|
| 87 |
+
if sr1 != 16000:
|
| 88 |
+
resample1 = torchaudio.transforms.Resample(orig_freq=sr1, new_freq=16000)
|
| 89 |
+
wav1 = resample1(wav1)
|
| 90 |
+
if sr2 != 16000:
|
| 91 |
+
resample2 = torchaudio.transforms.Resample(orig_freq=sr2, new_freq=16000)
|
| 92 |
+
wav2 = resample2(wav2)
|
| 93 |
+
|
| 94 |
+
wav1 = wav1.to(device)
|
| 95 |
+
wav2 = wav2.to(device)
|
| 96 |
+
|
| 97 |
+
with torch.jit.optimized_execution(False):
|
| 98 |
+
emb1 = ecapa2(wav1)
|
| 99 |
+
emb2 = ecapa2(wav2)
|
| 100 |
+
|
| 101 |
+
sim = F.cosine_similarity(emb1, emb2).item()
|
| 102 |
+
sim_scores.append(sim)
|
| 103 |
+
|
| 104 |
+
return sim_scores
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def run_utmos_bengali(audio_paths, device="cuda"):
|
| 108 |
+
predictor = torch.hub.load("tarepan/SpeechMOS:v1.2.0", "utmos22_strong", trust_repo=True)
|
| 109 |
+
predictor = predictor.to(device)
|
| 110 |
+
|
| 111 |
+
utmos_scores = []
|
| 112 |
+
for audio_path in tqdm(audio_paths, desc="UTMOS"):
|
| 113 |
+
wav, sr = librosa.load(audio_path, sr=None, mono=True)
|
| 114 |
+
wav_tensor = torch.from_numpy(wav).to(device).unsqueeze(0)
|
| 115 |
+
score = predictor(wav_tensor, sr)
|
| 116 |
+
utmos_scores.append(score.item())
|
| 117 |
+
|
| 118 |
+
return utmos_scores
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def evaluate_testset(testset_name, base_dir, gen_dir=None, device="cuda"):
|
| 122 |
+
config = TESTSET_CONFIG[testset_name]
|
| 123 |
+
gen_dir = Path(gen_dir) if gen_dir else Path(base_dir) / config["gen_dir"]
|
| 124 |
+
text_file = Path(base_dir) / config["text_file"]
|
| 125 |
+
ref_dir = Path(base_dir) / REF_DIR
|
| 126 |
+
n_samples = config["n_samples"]
|
| 127 |
+
|
| 128 |
+
gen_paths = [gen_dir / f"output_{i}.wav" for i in range(n_samples)]
|
| 129 |
+
ref_paths = [ref_dir / f"slr_{i}.wav" for i in range(n_samples)]
|
| 130 |
+
gt_texts = load_texts(text_file)
|
| 131 |
+
|
| 132 |
+
missing = [p for p in gen_paths if not p.exists()]
|
| 133 |
+
if missing:
|
| 134 |
+
print(f"Warning: {len(missing)} generated files missing")
|
| 135 |
+
existing_idx = [i for i, p in enumerate(gen_paths) if p.exists()]
|
| 136 |
+
gen_paths = [gen_paths[i] for i in existing_idx]
|
| 137 |
+
ref_paths = [ref_paths[i] for i in existing_idx]
|
| 138 |
+
gt_texts = [gt_texts[i] for i in existing_idx]
|
| 139 |
+
|
| 140 |
+
print(f"\n=== Evaluating {testset_name} ({len(gen_paths)} samples) ===")
|
| 141 |
+
|
| 142 |
+
# ASR + CER
|
| 143 |
+
print("Running ASR...")
|
| 144 |
+
transcriptions = run_asr_bengali(gen_paths, device)
|
| 145 |
+
cer_scores = [compute_cer(gt, hyp) for gt, hyp in zip(gt_texts, transcriptions)]
|
| 146 |
+
|
| 147 |
+
# Speaker Similarity
|
| 148 |
+
print("Running Speaker Similarity...")
|
| 149 |
+
sim_scores = run_sim_bengali(gen_paths, ref_paths, device=device)
|
| 150 |
+
|
| 151 |
+
# UTMOS
|
| 152 |
+
print("Running UTMOS...")
|
| 153 |
+
utmos_scores = run_utmos_bengali(gen_paths, device)
|
| 154 |
+
|
| 155 |
+
results = {
|
| 156 |
+
"testset": testset_name,
|
| 157 |
+
"n_samples": len(gen_paths),
|
| 158 |
+
"avg_cer": sum(cer_scores) / len(cer_scores),
|
| 159 |
+
"avg_sim": sum(sim_scores) / len(sim_scores),
|
| 160 |
+
"avg_utmos": sum(utmos_scores) / len(utmos_scores),
|
| 161 |
+
"per_sample": [
|
| 162 |
+
{
|
| 163 |
+
"idx": i,
|
| 164 |
+
"gt_text": gt_texts[i],
|
| 165 |
+
"hyp_text": transcriptions[i],
|
| 166 |
+
"cer": cer_scores[i],
|
| 167 |
+
"sim": sim_scores[i],
|
| 168 |
+
"utmos": utmos_scores[i],
|
| 169 |
+
}
|
| 170 |
+
for i in range(len(gen_paths))
|
| 171 |
+
],
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
return results
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def main():
|
| 178 |
+
parser = argparse.ArgumentParser(description="Bengali TTS Evaluation")
|
| 179 |
+
parser.add_argument("--testset", type=str, default="all",
|
| 180 |
+
choices=["stimulai53", "ne200", "st200", "all"])
|
| 181 |
+
parser.add_argument("--base_dir", type=str, default=".")
|
| 182 |
+
parser.add_argument("--gen_dir", type=str, default=None,
|
| 183 |
+
help="Override generated audio directory")
|
| 184 |
+
parser.add_argument("--output_dir", type=str, default="results")
|
| 185 |
+
parser.add_argument("--device", type=str, default="cuda")
|
| 186 |
+
args = parser.parse_args()
|
| 187 |
+
|
| 188 |
+
output_dir = Path(args.base_dir) / args.output_dir
|
| 189 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 190 |
+
|
| 191 |
+
testsets = list(TESTSET_CONFIG.keys()) if args.testset == "all" else [args.testset]
|
| 192 |
+
|
| 193 |
+
all_results = []
|
| 194 |
+
for testset in testsets:
|
| 195 |
+
results = evaluate_testset(testset, args.base_dir, args.gen_dir, args.device)
|
| 196 |
+
all_results.append(results)
|
| 197 |
+
|
| 198 |
+
output_file = output_dir / f"bengali_eval_{testset}.json"
|
| 199 |
+
with open(output_file, "w", encoding="utf-8") as f:
|
| 200 |
+
json.dump(results, f, ensure_ascii=False, indent=2)
|
| 201 |
+
print(f"Results saved to {output_file}")
|
| 202 |
+
|
| 203 |
+
print(f"\n{testset}: CER={results['avg_cer']:.4f}, SIM={results['avg_sim']:.4f}, UTMOS={results['avg_utmos']:.4f}")
|
| 204 |
+
|
| 205 |
+
if len(all_results) > 1:
|
| 206 |
+
total_samples = sum(r["n_samples"] for r in all_results)
|
| 207 |
+
avg_cer = sum(r["avg_cer"] * r["n_samples"] for r in all_results) / total_samples
|
| 208 |
+
avg_sim = sum(r["avg_sim"] * r["n_samples"] for r in all_results) / total_samples
|
| 209 |
+
avg_utmos = sum(r["avg_utmos"] * r["n_samples"] for r in all_results) / total_samples
|
| 210 |
+
print(f"\n=== Overall ({total_samples} samples) ===")
|
| 211 |
+
print(f"CER={avg_cer:.4f}, SIM={avg_sim:.4f}, UTMOS={avg_utmos:.4f}")
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
if __name__ == "__main__":
|
| 215 |
+
main()
|
src/f5_tts/eval/eval_gemini.py
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gemini TTS Evaluation Script for Bengali
|
| 3 |
+
|
| 4 |
+
Computes CER (via Whisper ASR) and UTMOS for Gemini-generated audio.
|
| 5 |
+
No SIM computation (Gemini doesn't do voice cloning).
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import argparse
|
| 9 |
+
import json
|
| 10 |
+
import re
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
|
| 13 |
+
import librosa
|
| 14 |
+
import torch
|
| 15 |
+
from tqdm import tqdm
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
TESTSET_CONFIG = {
|
| 19 |
+
"stimulai53": {
|
| 20 |
+
"gen_dir": "examples/stimulai53_gemini",
|
| 21 |
+
"text_file": "examples/BengaliStimulai53.txt",
|
| 22 |
+
"n_samples": 53,
|
| 23 |
+
},
|
| 24 |
+
"ne200": {
|
| 25 |
+
"gen_dir": "examples/ne200_gemini",
|
| 26 |
+
"text_file": "examples/BengaliNamedEntity200.txt",
|
| 27 |
+
"n_samples": 200,
|
| 28 |
+
},
|
| 29 |
+
"st200": {
|
| 30 |
+
"gen_dir": "examples/st200_gemini",
|
| 31 |
+
"text_file": "examples/ShortText200.txt",
|
| 32 |
+
"n_samples": 200,
|
| 33 |
+
},
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def load_texts(text_file):
|
| 38 |
+
with open(text_file, "r", encoding="utf-8") as f:
|
| 39 |
+
return [line.strip() for line in f.readlines()]
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def normalize_bengali_text(text):
|
| 43 |
+
text = re.sub(r"[।,\.!?;:\"\'\-\(\)]", "", text)
|
| 44 |
+
text = " ".join(text.split())
|
| 45 |
+
return text
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def compute_cer(reference, hypothesis):
|
| 49 |
+
from jiwer import cer
|
| 50 |
+
ref_norm = normalize_bengali_text(reference)
|
| 51 |
+
hyp_norm = normalize_bengali_text(hypothesis)
|
| 52 |
+
if not ref_norm:
|
| 53 |
+
return 0.0
|
| 54 |
+
return cer(ref_norm, hyp_norm)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def run_asr_bengali(audio_paths, device="cuda", model_id="bengaliAI/tugstugi_bengaliai-asr_whisper-medium"):
|
| 58 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration, GenerationConfig
|
| 59 |
+
|
| 60 |
+
processor = WhisperProcessor.from_pretrained(model_id)
|
| 61 |
+
model = WhisperForConditionalGeneration.from_pretrained(model_id).to(device)
|
| 62 |
+
model.generation_config = GenerationConfig.from_pretrained("openai/whisper-medium")
|
| 63 |
+
|
| 64 |
+
transcriptions = []
|
| 65 |
+
for audio_path in tqdm(audio_paths, desc="ASR"):
|
| 66 |
+
audio, sr = librosa.load(str(audio_path), sr=16000)
|
| 67 |
+
input_features = processor(audio, sampling_rate=16000, return_tensors="pt").input_features.to(device)
|
| 68 |
+
predicted_ids = model.generate(input_features, language="bn", task="transcribe")
|
| 69 |
+
text = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
| 70 |
+
transcriptions.append(text)
|
| 71 |
+
return transcriptions
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def run_utmos(audio_paths, device="cuda"):
|
| 75 |
+
predictor = torch.hub.load("tarepan/SpeechMOS:v1.2.0", "utmos22_strong", trust_repo=True)
|
| 76 |
+
predictor = predictor.to(device)
|
| 77 |
+
|
| 78 |
+
utmos_scores = []
|
| 79 |
+
for audio_path in tqdm(audio_paths, desc="UTMOS"):
|
| 80 |
+
wav, sr = librosa.load(audio_path, sr=None, mono=True)
|
| 81 |
+
wav_tensor = torch.from_numpy(wav).to(device).unsqueeze(0)
|
| 82 |
+
score = predictor(wav_tensor, sr)
|
| 83 |
+
utmos_scores.append(score.item())
|
| 84 |
+
|
| 85 |
+
return utmos_scores
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def evaluate_testset(testset_name, base_dir, gen_dir=None, device="cuda"):
|
| 89 |
+
config = TESTSET_CONFIG[testset_name]
|
| 90 |
+
gen_dir = Path(gen_dir) if gen_dir else Path(base_dir) / config["gen_dir"]
|
| 91 |
+
text_file = Path(base_dir) / config["text_file"]
|
| 92 |
+
n_samples = config["n_samples"]
|
| 93 |
+
|
| 94 |
+
gen_paths = [gen_dir / f"output_{i}.wav" for i in range(n_samples)]
|
| 95 |
+
gt_texts = load_texts(text_file)
|
| 96 |
+
|
| 97 |
+
missing = [p for p in gen_paths if not p.exists()]
|
| 98 |
+
if missing:
|
| 99 |
+
print(f"Warning: {len(missing)} generated files missing")
|
| 100 |
+
existing_idx = [i for i, p in enumerate(gen_paths) if p.exists()]
|
| 101 |
+
gen_paths = [gen_paths[i] for i in existing_idx]
|
| 102 |
+
gt_texts = [gt_texts[i] for i in existing_idx]
|
| 103 |
+
|
| 104 |
+
print(f"\n=== Evaluating {testset_name} ({len(gen_paths)} samples) ===")
|
| 105 |
+
|
| 106 |
+
# ASR + CER
|
| 107 |
+
print("Running ASR...")
|
| 108 |
+
transcriptions = run_asr_bengali(gen_paths, device)
|
| 109 |
+
cer_scores = [compute_cer(gt, hyp) for gt, hyp in zip(gt_texts, transcriptions)]
|
| 110 |
+
|
| 111 |
+
# UTMOS
|
| 112 |
+
print("Running UTMOS...")
|
| 113 |
+
utmos_scores = run_utmos(gen_paths, device)
|
| 114 |
+
|
| 115 |
+
results = {
|
| 116 |
+
"testset": testset_name,
|
| 117 |
+
"n_samples": len(gen_paths),
|
| 118 |
+
"avg_cer": sum(cer_scores) / len(cer_scores),
|
| 119 |
+
"avg_utmos": sum(utmos_scores) / len(utmos_scores),
|
| 120 |
+
"per_sample": [
|
| 121 |
+
{
|
| 122 |
+
"idx": i,
|
| 123 |
+
"gt_text": gt_texts[i],
|
| 124 |
+
"hyp_text": transcriptions[i],
|
| 125 |
+
"cer": cer_scores[i],
|
| 126 |
+
"utmos": utmos_scores[i],
|
| 127 |
+
}
|
| 128 |
+
for i in range(len(gen_paths))
|
| 129 |
+
],
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
return results
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def main():
|
| 136 |
+
parser = argparse.ArgumentParser(description="Gemini TTS Evaluation (CER + UTMOS)")
|
| 137 |
+
parser.add_argument("--testset", type=str, default="stimulai53",
|
| 138 |
+
choices=list(TESTSET_CONFIG.keys()))
|
| 139 |
+
parser.add_argument("--base_dir", type=str, default=".")
|
| 140 |
+
parser.add_argument("--gen_dir", type=str, default=None,
|
| 141 |
+
help="Override generated audio directory")
|
| 142 |
+
parser.add_argument("--output_dir", type=str, default="results")
|
| 143 |
+
parser.add_argument("--device", type=str, default="cuda")
|
| 144 |
+
args = parser.parse_args()
|
| 145 |
+
|
| 146 |
+
output_dir = Path(args.base_dir) / args.output_dir
|
| 147 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 148 |
+
|
| 149 |
+
results = evaluate_testset(args.testset, args.base_dir, args.gen_dir, args.device)
|
| 150 |
+
|
| 151 |
+
output_file = output_dir / f"bengali_eval_{args.testset}_gemini.json"
|
| 152 |
+
with open(output_file, "w", encoding="utf-8") as f:
|
| 153 |
+
json.dump(results, f, ensure_ascii=False, indent=2)
|
| 154 |
+
print(f"Results saved to {output_file}")
|
| 155 |
+
|
| 156 |
+
print(f"\n{args.testset}: CER={results['avg_cer']:.4f}, UTMOS={results['avg_utmos']:.4f}")
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
if __name__ == "__main__":
|
| 160 |
+
main()
|
src/f5_tts/eval/eval_infer_batch.py
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
sys.path.append(os.getcwd())
|
| 6 |
+
|
| 7 |
+
import argparse
|
| 8 |
+
import time
|
| 9 |
+
from importlib.resources import files
|
| 10 |
+
|
| 11 |
+
import torch
|
| 12 |
+
import torchaudio
|
| 13 |
+
from accelerate import Accelerator
|
| 14 |
+
from hydra.utils import get_class
|
| 15 |
+
from omegaconf import OmegaConf
|
| 16 |
+
from tqdm import tqdm
|
| 17 |
+
|
| 18 |
+
from f5_tts.eval.utils_eval import (
|
| 19 |
+
get_inference_prompt,
|
| 20 |
+
get_librispeech_test_clean_metainfo,
|
| 21 |
+
get_seedtts_testset_metainfo,
|
| 22 |
+
)
|
| 23 |
+
from f5_tts.infer.utils_infer import load_checkpoint, load_vocoder
|
| 24 |
+
from f5_tts.model import CFM
|
| 25 |
+
from f5_tts.model.utils import get_tokenizer
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
accelerator = Accelerator()
|
| 29 |
+
device = f"cuda:{accelerator.process_index}"
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
use_ema = True
|
| 33 |
+
target_rms = 0.1
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
rel_path = str(files("f5_tts").joinpath("../../"))
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def main():
|
| 40 |
+
parser = argparse.ArgumentParser(description="batch inference")
|
| 41 |
+
|
| 42 |
+
parser.add_argument("-s", "--seed", default=None, type=int)
|
| 43 |
+
parser.add_argument("-n", "--expname", required=True)
|
| 44 |
+
parser.add_argument("-c", "--ckptstep", default=1250000, type=int)
|
| 45 |
+
|
| 46 |
+
parser.add_argument("-nfe", "--nfestep", default=32, type=int)
|
| 47 |
+
parser.add_argument("-o", "--odemethod", default="euler")
|
| 48 |
+
parser.add_argument("-ss", "--swaysampling", default=-1, type=float)
|
| 49 |
+
|
| 50 |
+
parser.add_argument("-t", "--testset", required=True)
|
| 51 |
+
parser.add_argument(
|
| 52 |
+
"-p", "--librispeech_test_clean_path", default=f"{rel_path}/data/LibriSpeech/test-clean", type=str
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
parser.add_argument("--local", action="store_true", help="Use local vocoder checkpoint directory")
|
| 56 |
+
|
| 57 |
+
args = parser.parse_args()
|
| 58 |
+
|
| 59 |
+
seed = args.seed
|
| 60 |
+
exp_name = args.expname
|
| 61 |
+
ckpt_step = args.ckptstep
|
| 62 |
+
|
| 63 |
+
nfe_step = args.nfestep
|
| 64 |
+
ode_method = args.odemethod
|
| 65 |
+
sway_sampling_coef = args.swaysampling
|
| 66 |
+
|
| 67 |
+
testset = args.testset
|
| 68 |
+
|
| 69 |
+
infer_batch_size = 1 # max frames. 1 for ddp single inference (recommended)
|
| 70 |
+
cfg_strength = 2.0
|
| 71 |
+
speed = 1.0
|
| 72 |
+
use_truth_duration = False
|
| 73 |
+
no_ref_audio = False
|
| 74 |
+
|
| 75 |
+
model_cfg = OmegaConf.load(str(files("f5_tts").joinpath(f"configs/{exp_name}.yaml")))
|
| 76 |
+
model_cls = get_class(f"f5_tts.model.{model_cfg.model.backbone}")
|
| 77 |
+
model_arc = model_cfg.model.arch
|
| 78 |
+
|
| 79 |
+
dataset_name = model_cfg.datasets.name
|
| 80 |
+
tokenizer = model_cfg.model.tokenizer
|
| 81 |
+
|
| 82 |
+
mel_spec_type = model_cfg.model.mel_spec.mel_spec_type
|
| 83 |
+
target_sample_rate = model_cfg.model.mel_spec.target_sample_rate
|
| 84 |
+
n_mel_channels = model_cfg.model.mel_spec.n_mel_channels
|
| 85 |
+
hop_length = model_cfg.model.mel_spec.hop_length
|
| 86 |
+
win_length = model_cfg.model.mel_spec.win_length
|
| 87 |
+
n_fft = model_cfg.model.mel_spec.n_fft
|
| 88 |
+
|
| 89 |
+
if testset == "ls_pc_test_clean":
|
| 90 |
+
metalst = rel_path + "/data/librispeech_pc_test_clean_cross_sentence.lst"
|
| 91 |
+
librispeech_test_clean_path = args.librispeech_test_clean_path
|
| 92 |
+
metainfo = get_librispeech_test_clean_metainfo(metalst, librispeech_test_clean_path)
|
| 93 |
+
|
| 94 |
+
elif testset == "seedtts_test_zh":
|
| 95 |
+
metalst = rel_path + "/data/seedtts_testset/zh/meta.lst"
|
| 96 |
+
metainfo = get_seedtts_testset_metainfo(metalst)
|
| 97 |
+
|
| 98 |
+
elif testset == "seedtts_test_en":
|
| 99 |
+
metalst = rel_path + "/data/seedtts_testset/en/meta.lst"
|
| 100 |
+
metainfo = get_seedtts_testset_metainfo(metalst)
|
| 101 |
+
|
| 102 |
+
# path to save genereted wavs
|
| 103 |
+
output_dir = (
|
| 104 |
+
f"{rel_path}/"
|
| 105 |
+
f"results/{exp_name}_{ckpt_step}/{testset}/"
|
| 106 |
+
f"seed{seed}_{ode_method}_nfe{nfe_step}_{mel_spec_type}"
|
| 107 |
+
f"{f'_ss{sway_sampling_coef}' if sway_sampling_coef else ''}"
|
| 108 |
+
f"_cfg{cfg_strength}_speed{speed}"
|
| 109 |
+
f"{'_gt-dur' if use_truth_duration else ''}"
|
| 110 |
+
f"{'_no-ref-audio' if no_ref_audio else ''}"
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
# -------------------------------------------------#
|
| 114 |
+
|
| 115 |
+
prompts_all = get_inference_prompt(
|
| 116 |
+
metainfo,
|
| 117 |
+
speed=speed,
|
| 118 |
+
tokenizer=tokenizer,
|
| 119 |
+
target_sample_rate=target_sample_rate,
|
| 120 |
+
n_mel_channels=n_mel_channels,
|
| 121 |
+
hop_length=hop_length,
|
| 122 |
+
mel_spec_type=mel_spec_type,
|
| 123 |
+
target_rms=target_rms,
|
| 124 |
+
use_truth_duration=use_truth_duration,
|
| 125 |
+
infer_batch_size=infer_batch_size,
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
# Vocoder model
|
| 129 |
+
local = args.local
|
| 130 |
+
if mel_spec_type == "vocos":
|
| 131 |
+
vocoder_local_path = "../checkpoints/charactr/vocos-mel-24khz"
|
| 132 |
+
elif mel_spec_type == "bigvgan":
|
| 133 |
+
vocoder_local_path = "../checkpoints/bigvgan_v2_24khz_100band_256x"
|
| 134 |
+
vocoder = load_vocoder(vocoder_name=mel_spec_type, is_local=local, local_path=vocoder_local_path)
|
| 135 |
+
|
| 136 |
+
# Tokenizer
|
| 137 |
+
vocab_char_map, vocab_size = get_tokenizer(dataset_name, tokenizer)
|
| 138 |
+
|
| 139 |
+
# Model
|
| 140 |
+
model = CFM(
|
| 141 |
+
transformer=model_cls(**model_arc, text_num_embeds=vocab_size, mel_dim=n_mel_channels),
|
| 142 |
+
mel_spec_kwargs=dict(
|
| 143 |
+
n_fft=n_fft,
|
| 144 |
+
hop_length=hop_length,
|
| 145 |
+
win_length=win_length,
|
| 146 |
+
n_mel_channels=n_mel_channels,
|
| 147 |
+
target_sample_rate=target_sample_rate,
|
| 148 |
+
mel_spec_type=mel_spec_type,
|
| 149 |
+
),
|
| 150 |
+
odeint_kwargs=dict(
|
| 151 |
+
method=ode_method,
|
| 152 |
+
),
|
| 153 |
+
vocab_char_map=vocab_char_map,
|
| 154 |
+
).to(device)
|
| 155 |
+
|
| 156 |
+
ckpt_prefix = rel_path + f"/ckpts/{exp_name}/model_{ckpt_step}"
|
| 157 |
+
if os.path.exists(ckpt_prefix + ".pt"):
|
| 158 |
+
ckpt_path = ckpt_prefix + ".pt"
|
| 159 |
+
elif os.path.exists(ckpt_prefix + ".safetensors"):
|
| 160 |
+
ckpt_path = ckpt_prefix + ".safetensors"
|
| 161 |
+
else:
|
| 162 |
+
print("Loading from self-organized training checkpoints rather than released pretrained.")
|
| 163 |
+
ckpt_prefix = rel_path + f"/{model_cfg.ckpts.save_dir}/model_{ckpt_step}"
|
| 164 |
+
if os.path.exists(ckpt_prefix + ".pt"):
|
| 165 |
+
ckpt_path = ckpt_prefix + ".pt"
|
| 166 |
+
elif os.path.exists(ckpt_prefix + ".safetensors"):
|
| 167 |
+
ckpt_path = ckpt_prefix + ".safetensors"
|
| 168 |
+
else:
|
| 169 |
+
raise ValueError("The checkpoint does not exist or cannot be found in given location.")
|
| 170 |
+
|
| 171 |
+
dtype = torch.float32 if mel_spec_type == "bigvgan" else None
|
| 172 |
+
model = load_checkpoint(model, ckpt_path, device, dtype=dtype, use_ema=use_ema)
|
| 173 |
+
|
| 174 |
+
if not os.path.exists(output_dir) and accelerator.is_main_process:
|
| 175 |
+
os.makedirs(output_dir)
|
| 176 |
+
|
| 177 |
+
# start batch inference
|
| 178 |
+
accelerator.wait_for_everyone()
|
| 179 |
+
start = time.time()
|
| 180 |
+
|
| 181 |
+
with accelerator.split_between_processes(prompts_all) as prompts:
|
| 182 |
+
for prompt in tqdm(prompts, disable=not accelerator.is_local_main_process):
|
| 183 |
+
utts, ref_rms_list, ref_mels, ref_mel_lens, total_mel_lens, final_text_list = prompt
|
| 184 |
+
ref_mels = ref_mels.to(device)
|
| 185 |
+
ref_mel_lens = torch.tensor(ref_mel_lens, dtype=torch.long).to(device)
|
| 186 |
+
total_mel_lens = torch.tensor(total_mel_lens, dtype=torch.long).to(device)
|
| 187 |
+
|
| 188 |
+
# Inference
|
| 189 |
+
with torch.inference_mode():
|
| 190 |
+
generated, _ = model.sample(
|
| 191 |
+
cond=ref_mels,
|
| 192 |
+
text=final_text_list,
|
| 193 |
+
duration=total_mel_lens,
|
| 194 |
+
lens=ref_mel_lens,
|
| 195 |
+
steps=nfe_step,
|
| 196 |
+
cfg_strength=cfg_strength,
|
| 197 |
+
sway_sampling_coef=sway_sampling_coef,
|
| 198 |
+
no_ref_audio=no_ref_audio,
|
| 199 |
+
seed=seed,
|
| 200 |
+
)
|
| 201 |
+
# Final result
|
| 202 |
+
for i, gen in enumerate(generated):
|
| 203 |
+
gen = gen[ref_mel_lens[i] : total_mel_lens[i], :].unsqueeze(0)
|
| 204 |
+
gen_mel_spec = gen.permute(0, 2, 1).to(torch.float32)
|
| 205 |
+
if mel_spec_type == "vocos":
|
| 206 |
+
generated_wave = vocoder.decode(gen_mel_spec).cpu()
|
| 207 |
+
elif mel_spec_type == "bigvgan":
|
| 208 |
+
generated_wave = vocoder(gen_mel_spec).squeeze(0).cpu()
|
| 209 |
+
|
| 210 |
+
if ref_rms_list[i] < target_rms:
|
| 211 |
+
generated_wave = generated_wave * ref_rms_list[i] / target_rms
|
| 212 |
+
torchaudio.save(f"{output_dir}/{utts[i]}.wav", generated_wave, target_sample_rate)
|
| 213 |
+
|
| 214 |
+
accelerator.wait_for_everyone()
|
| 215 |
+
if accelerator.is_main_process:
|
| 216 |
+
timediff = time.time() - start
|
| 217 |
+
print(f"Done batch inference in {timediff / 60:.2f} minutes.")
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
if __name__ == "__main__":
|
| 221 |
+
main()
|
src/f5_tts/eval/eval_infer_batch.sh
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
set -e
|
| 3 |
+
export PYTHONWARNINGS="ignore::UserWarning,ignore::FutureWarning"
|
| 4 |
+
|
| 5 |
+
# Configuration parameters
|
| 6 |
+
MODEL_NAME="F5TTS_v1_Base"
|
| 7 |
+
SEEDS=(0 1 2)
|
| 8 |
+
CKPTSTEPS=(1250000)
|
| 9 |
+
TASKS=("seedtts_test_zh" "seedtts_test_en" "ls_pc_test_clean")
|
| 10 |
+
LS_TEST_CLEAN_PATH="data/LibriSpeech/test-clean"
|
| 11 |
+
GPUS="[0,1,2,3,4,5,6,7]"
|
| 12 |
+
OFFLINE_MODE=false
|
| 13 |
+
|
| 14 |
+
# Parse arguments
|
| 15 |
+
if [ $OFFLINE_MODE = true ]; then
|
| 16 |
+
LOCAL="--local"
|
| 17 |
+
else
|
| 18 |
+
LOCAL=""
|
| 19 |
+
fi
|
| 20 |
+
INFER_ONLY=false
|
| 21 |
+
while [[ $# -gt 0 ]]; do
|
| 22 |
+
case $1 in
|
| 23 |
+
--infer-only)
|
| 24 |
+
INFER_ONLY=true
|
| 25 |
+
shift
|
| 26 |
+
;;
|
| 27 |
+
*)
|
| 28 |
+
echo "======== Unknown parameter: $1"
|
| 29 |
+
exit 1
|
| 30 |
+
;;
|
| 31 |
+
esac
|
| 32 |
+
done
|
| 33 |
+
|
| 34 |
+
echo "======== Starting F5-TTS batch evaluation task..."
|
| 35 |
+
if [ "$INFER_ONLY" = true ]; then
|
| 36 |
+
echo "======== Mode: Execute infer tasks only"
|
| 37 |
+
else
|
| 38 |
+
echo "======== Mode: Execute full pipeline (infer + eval)"
|
| 39 |
+
fi
|
| 40 |
+
|
| 41 |
+
# Function: Execute eval tasks
|
| 42 |
+
execute_eval_tasks() {
|
| 43 |
+
local ckptstep=$1
|
| 44 |
+
local seed=$2
|
| 45 |
+
local task_name=$3
|
| 46 |
+
|
| 47 |
+
local gen_wav_dir="results/${MODEL_NAME}_${ckptstep}/${task_name}/seed${seed}_euler_nfe32_vocos_ss-1_cfg2.0_speed1.0"
|
| 48 |
+
|
| 49 |
+
echo ">>>>>>>> Starting eval task: ckptstep=${ckptstep}, seed=${seed}, task=${task_name}"
|
| 50 |
+
|
| 51 |
+
case $task_name in
|
| 52 |
+
"seedtts_test_zh")
|
| 53 |
+
python src/f5_tts/eval/eval_seedtts_testset.py -e wer -l zh -g "$gen_wav_dir" -n "$GPUS" $LOCAL
|
| 54 |
+
python src/f5_tts/eval/eval_seedtts_testset.py -e sim -l zh -g "$gen_wav_dir" -n "$GPUS" $LOCAL
|
| 55 |
+
python src/f5_tts/eval/eval_utmos.py --audio_dir "$gen_wav_dir"
|
| 56 |
+
;;
|
| 57 |
+
"seedtts_test_en")
|
| 58 |
+
python src/f5_tts/eval/eval_seedtts_testset.py -e wer -l en -g "$gen_wav_dir" -n "$GPUS" $LOCAL
|
| 59 |
+
python src/f5_tts/eval/eval_seedtts_testset.py -e sim -l en -g "$gen_wav_dir" -n "$GPUS" $LOCAL
|
| 60 |
+
python src/f5_tts/eval/eval_utmos.py --audio_dir "$gen_wav_dir"
|
| 61 |
+
;;
|
| 62 |
+
"ls_pc_test_clean")
|
| 63 |
+
python src/f5_tts/eval/eval_librispeech_test_clean.py -e wer -g "$gen_wav_dir" -n "$GPUS" -p "$LS_TEST_CLEAN_PATH" $LOCAL
|
| 64 |
+
python src/f5_tts/eval/eval_librispeech_test_clean.py -e sim -g "$gen_wav_dir" -n "$GPUS" -p "$LS_TEST_CLEAN_PATH" $LOCAL
|
| 65 |
+
python src/f5_tts/eval/eval_utmos.py --audio_dir "$gen_wav_dir"
|
| 66 |
+
;;
|
| 67 |
+
esac
|
| 68 |
+
|
| 69 |
+
echo ">>>>>>>> Completed eval task: ckptstep=${ckptstep}, seed=${seed}, task=${task_name}"
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
# Main execution loop
|
| 73 |
+
for ckptstep in "${CKPTSTEPS[@]}"; do
|
| 74 |
+
echo "======== Processing ckptstep: ${ckptstep}"
|
| 75 |
+
|
| 76 |
+
for seed in "${SEEDS[@]}"; do
|
| 77 |
+
echo "-------- Processing seed: ${seed}"
|
| 78 |
+
|
| 79 |
+
# Store eval task PIDs for current seed (if not infer-only mode)
|
| 80 |
+
if [ "$INFER_ONLY" = false ]; then
|
| 81 |
+
declare -a eval_pids
|
| 82 |
+
fi
|
| 83 |
+
|
| 84 |
+
# Execute each infer task sequentially
|
| 85 |
+
for task in "${TASKS[@]}"; do
|
| 86 |
+
echo ">>>>>>>> Executing infer task: accelerate launch src/f5_tts/eval/eval_infer_batch.py -s ${seed} -n \"${MODEL_NAME}\" -t \"${task}\" -c ${ckptstep} $LOCAL"
|
| 87 |
+
|
| 88 |
+
# Execute infer task (foreground execution, wait for completion)
|
| 89 |
+
accelerate launch src/f5_tts/eval/eval_infer_batch.py -s ${seed} -n "${MODEL_NAME}" -t "${task}" -c ${ckptstep} -p "${LS_TEST_CLEAN_PATH}" $LOCAL
|
| 90 |
+
|
| 91 |
+
# If not infer-only mode, launch corresponding eval task
|
| 92 |
+
if [ "$INFER_ONLY" = false ]; then
|
| 93 |
+
# Launch corresponding eval task (background execution, non-blocking for next infer)
|
| 94 |
+
execute_eval_tasks $ckptstep $seed $task &
|
| 95 |
+
eval_pids+=($!)
|
| 96 |
+
fi
|
| 97 |
+
done
|
| 98 |
+
|
| 99 |
+
# If not infer-only mode, wait for all eval tasks of current seed to complete
|
| 100 |
+
if [ "$INFER_ONLY" = false ]; then
|
| 101 |
+
echo ">>>>>>>> All infer tasks for seed ${seed} completed, waiting for corresponding eval tasks to finish..."
|
| 102 |
+
|
| 103 |
+
for pid in "${eval_pids[@]}"; do
|
| 104 |
+
wait $pid
|
| 105 |
+
done
|
| 106 |
+
|
| 107 |
+
unset eval_pids # Clean up array
|
| 108 |
+
fi
|
| 109 |
+
echo "-------- All eval tasks for seed ${seed} completed"
|
| 110 |
+
done
|
| 111 |
+
|
| 112 |
+
echo "======== Completed ckptstep: ${ckptstep}"
|
| 113 |
+
echo
|
| 114 |
+
done
|
| 115 |
+
|
| 116 |
+
echo "======== All tasks completed!"
|
src/f5_tts/eval/eval_infer_batch_example.sh
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# e.g. F5-TTS, 16 NFE
|
| 4 |
+
accelerate launch src/f5_tts/eval/eval_infer_batch.py -s 0 -n "F5TTS_v1_Base" -t "seedtts_test_zh" -nfe 16
|
| 5 |
+
accelerate launch src/f5_tts/eval/eval_infer_batch.py -s 0 -n "F5TTS_v1_Base" -t "seedtts_test_en" -nfe 16
|
| 6 |
+
accelerate launch src/f5_tts/eval/eval_infer_batch.py -s 0 -n "F5TTS_v1_Base" -t "ls_pc_test_clean" -nfe 16 -p data/LibriSpeech/test-clean
|
| 7 |
+
|
| 8 |
+
# e.g. Vanilla E2 TTS, 32 NFE
|
| 9 |
+
accelerate launch src/f5_tts/eval/eval_infer_batch.py -s 0 -n "E2TTS_Base" -c 1200000 -t "seedtts_test_zh" -o "midpoint" -ss 0
|
| 10 |
+
accelerate launch src/f5_tts/eval/eval_infer_batch.py -s 0 -n "E2TTS_Base" -c 1200000 -t "seedtts_test_en" -o "midpoint" -ss 0
|
| 11 |
+
accelerate launch src/f5_tts/eval/eval_infer_batch.py -s 0 -n "E2TTS_Base" -c 1200000 -t "ls_pc_test_clean" -o "midpoint" -ss 0 -p data/LibriSpeech/test-clean
|
| 12 |
+
|
| 13 |
+
# e.g. evaluate F5-TTS 16 NFE result on Seed-TTS test-zh
|
| 14 |
+
python src/f5_tts/eval/eval_seedtts_testset.py -e wer -l zh --gen_wav_dir results/F5TTS_v1_Base_1250000/seedtts_test_zh/seed0_euler_nfe16_vocos_ss-1_cfg2.0_speed1.0 --gpu_nums 8
|
| 15 |
+
python src/f5_tts/eval/eval_seedtts_testset.py -e sim -l zh --gen_wav_dir results/F5TTS_v1_Base_1250000/seedtts_test_zh/seed0_euler_nfe16_vocos_ss-1_cfg2.0_speed1.0 --gpu_nums 8
|
| 16 |
+
python src/f5_tts/eval/eval_utmos.py --audio_dir results/F5TTS_v1_Base_1250000/seedtts_test_zh/seed0_euler_nfe16_vocos_ss-1_cfg2.0_speed1.0
|
| 17 |
+
|
| 18 |
+
# etc.
|
src/f5_tts/eval/eval_librispeech_test_clean.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Evaluate with Librispeech test-clean, ~3s prompt to generate 4-10s audio (the way of valle/voicebox evaluation)
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import ast
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
sys.path.append(os.getcwd())
|
| 11 |
+
|
| 12 |
+
import multiprocessing as mp
|
| 13 |
+
from importlib.resources import files
|
| 14 |
+
|
| 15 |
+
import numpy as np
|
| 16 |
+
|
| 17 |
+
from f5_tts.eval.utils_eval import get_librispeech_test, run_asr_wer, run_sim
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
rel_path = str(files("f5_tts").joinpath("../../"))
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def get_args():
|
| 24 |
+
parser = argparse.ArgumentParser()
|
| 25 |
+
parser.add_argument("-e", "--eval_task", type=str, default="wer", choices=["sim", "wer"])
|
| 26 |
+
parser.add_argument("-l", "--lang", type=str, default="en")
|
| 27 |
+
parser.add_argument("-g", "--gen_wav_dir", type=str, required=True)
|
| 28 |
+
parser.add_argument("-p", "--librispeech_test_clean_path", type=str, required=True)
|
| 29 |
+
parser.add_argument(
|
| 30 |
+
"-n", "--gpu_nums", type=str, default="8", help="Number of GPUs to use (e.g., 8) or GPU list (e.g., [0,1,2,3])"
|
| 31 |
+
)
|
| 32 |
+
parser.add_argument("--local", action="store_true", help="Use local custom checkpoint directory")
|
| 33 |
+
return parser.parse_args()
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def parse_gpu_nums(gpu_nums_str):
|
| 37 |
+
try:
|
| 38 |
+
if gpu_nums_str.startswith("[") and gpu_nums_str.endswith("]"):
|
| 39 |
+
gpu_list = ast.literal_eval(gpu_nums_str)
|
| 40 |
+
if isinstance(gpu_list, list):
|
| 41 |
+
return gpu_list
|
| 42 |
+
return list(range(int(gpu_nums_str)))
|
| 43 |
+
except (ValueError, SyntaxError):
|
| 44 |
+
raise argparse.ArgumentTypeError(
|
| 45 |
+
f"Invalid GPU specification: {gpu_nums_str}. Use a number (e.g., 8) or a list (e.g., [0,1,2,3])"
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def main():
|
| 50 |
+
args = get_args()
|
| 51 |
+
eval_task = args.eval_task
|
| 52 |
+
lang = args.lang
|
| 53 |
+
librispeech_test_clean_path = args.librispeech_test_clean_path # test-clean path
|
| 54 |
+
gen_wav_dir = args.gen_wav_dir
|
| 55 |
+
metalst = rel_path + "/data/librispeech_pc_test_clean_cross_sentence.lst"
|
| 56 |
+
|
| 57 |
+
gpus = parse_gpu_nums(args.gpu_nums)
|
| 58 |
+
test_set = get_librispeech_test(metalst, gen_wav_dir, gpus, librispeech_test_clean_path)
|
| 59 |
+
|
| 60 |
+
## In LibriSpeech, some speakers utilized varying voice characteristics for different characters in the book,
|
| 61 |
+
## leading to a low similarity for the ground truth in some cases.
|
| 62 |
+
# test_set = get_librispeech_test(metalst, gen_wav_dir, gpus, librispeech_test_clean_path, eval_ground_truth = True) # eval ground truth
|
| 63 |
+
|
| 64 |
+
local = args.local
|
| 65 |
+
if local: # use local custom checkpoint dir
|
| 66 |
+
asr_ckpt_dir = "../checkpoints/Systran/faster-whisper-large-v3"
|
| 67 |
+
else:
|
| 68 |
+
asr_ckpt_dir = "" # auto download to cache dir
|
| 69 |
+
wavlm_ckpt_dir = "../checkpoints/UniSpeech/wavlm_large_finetune.pth"
|
| 70 |
+
|
| 71 |
+
# --------------------------------------------------------------------------
|
| 72 |
+
|
| 73 |
+
full_results = []
|
| 74 |
+
metrics = []
|
| 75 |
+
|
| 76 |
+
if eval_task == "wer":
|
| 77 |
+
with mp.Pool(processes=len(gpus)) as pool:
|
| 78 |
+
args = [(rank, lang, sub_test_set, asr_ckpt_dir) for (rank, sub_test_set) in test_set]
|
| 79 |
+
results = pool.map(run_asr_wer, args)
|
| 80 |
+
for r in results:
|
| 81 |
+
full_results.extend(r)
|
| 82 |
+
elif eval_task == "sim":
|
| 83 |
+
with mp.Pool(processes=len(gpus)) as pool:
|
| 84 |
+
args = [(rank, sub_test_set, wavlm_ckpt_dir) for (rank, sub_test_set) in test_set]
|
| 85 |
+
results = pool.map(run_sim, args)
|
| 86 |
+
for r in results:
|
| 87 |
+
full_results.extend(r)
|
| 88 |
+
else:
|
| 89 |
+
raise ValueError(f"Unknown metric type: {eval_task}")
|
| 90 |
+
|
| 91 |
+
result_path = f"{gen_wav_dir}/_{eval_task}_results.jsonl"
|
| 92 |
+
with open(result_path, "w") as f:
|
| 93 |
+
for line in full_results:
|
| 94 |
+
metrics.append(line[eval_task])
|
| 95 |
+
f.write(json.dumps(line, ensure_ascii=False) + "\n")
|
| 96 |
+
metric = round(np.mean(metrics), 5)
|
| 97 |
+
f.write(f"\n{eval_task.upper()}: {metric}\n")
|
| 98 |
+
|
| 99 |
+
print(f"\nTotal {len(metrics)} samples")
|
| 100 |
+
print(f"{eval_task.upper()}: {metric}")
|
| 101 |
+
print(f"{eval_task.upper()} results saved to {result_path}")
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
if __name__ == "__main__":
|
| 105 |
+
main()
|
src/f5_tts/eval/eval_seedtts_testset.py
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Evaluate with Seed-TTS testset
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import ast
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
import sys
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
sys.path.append(os.getcwd())
|
| 11 |
+
|
| 12 |
+
import multiprocessing as mp
|
| 13 |
+
from importlib.resources import files
|
| 14 |
+
|
| 15 |
+
import numpy as np
|
| 16 |
+
|
| 17 |
+
from f5_tts.eval.utils_eval import get_seed_tts_test, run_asr_wer, run_sim
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
rel_path = str(files("f5_tts").joinpath("../../"))
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def get_args():
|
| 24 |
+
parser = argparse.ArgumentParser()
|
| 25 |
+
parser.add_argument("-e", "--eval_task", type=str, default="wer", choices=["sim", "wer"])
|
| 26 |
+
parser.add_argument("-l", "--lang", type=str, default="en", choices=["zh", "en"])
|
| 27 |
+
parser.add_argument("-g", "--gen_wav_dir", type=str, required=True)
|
| 28 |
+
parser.add_argument(
|
| 29 |
+
"-n", "--gpu_nums", type=str, default="8", help="Number of GPUs to use (e.g., 8) or GPU list (e.g., [0,1,2,3])"
|
| 30 |
+
)
|
| 31 |
+
parser.add_argument("--local", action="store_true", help="Use local custom checkpoint directory")
|
| 32 |
+
return parser.parse_args()
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def parse_gpu_nums(gpu_nums_str):
|
| 36 |
+
try:
|
| 37 |
+
if gpu_nums_str.startswith("[") and gpu_nums_str.endswith("]"):
|
| 38 |
+
gpu_list = ast.literal_eval(gpu_nums_str)
|
| 39 |
+
if isinstance(gpu_list, list):
|
| 40 |
+
return gpu_list
|
| 41 |
+
return list(range(int(gpu_nums_str)))
|
| 42 |
+
except (ValueError, SyntaxError):
|
| 43 |
+
raise argparse.ArgumentTypeError(
|
| 44 |
+
f"Invalid GPU specification: {gpu_nums_str}. Use a number (e.g., 8) or a list (e.g., [0,1,2,3])"
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def main():
|
| 49 |
+
args = get_args()
|
| 50 |
+
eval_task = args.eval_task
|
| 51 |
+
lang = args.lang
|
| 52 |
+
gen_wav_dir = args.gen_wav_dir
|
| 53 |
+
metalst = rel_path + f"/data/seedtts_testset/{lang}/meta.lst" # seed-tts testset
|
| 54 |
+
|
| 55 |
+
# NOTE. paraformer-zh result will be slightly different according to the number of gpus, cuz batchsize is different
|
| 56 |
+
# zh 1.254 seems a result of 4 workers wer_seed_tts
|
| 57 |
+
gpus = parse_gpu_nums(args.gpu_nums)
|
| 58 |
+
test_set = get_seed_tts_test(metalst, gen_wav_dir, gpus)
|
| 59 |
+
|
| 60 |
+
local = args.local
|
| 61 |
+
if local: # use local custom checkpoint dir
|
| 62 |
+
if lang == "zh":
|
| 63 |
+
asr_ckpt_dir = "../checkpoints/funasr" # paraformer-zh dir under funasr
|
| 64 |
+
elif lang == "en":
|
| 65 |
+
asr_ckpt_dir = "../checkpoints/Systran/faster-whisper-large-v3"
|
| 66 |
+
else:
|
| 67 |
+
asr_ckpt_dir = "" # auto download to cache dir
|
| 68 |
+
wavlm_ckpt_dir = "../checkpoints/UniSpeech/wavlm_large_finetune.pth"
|
| 69 |
+
|
| 70 |
+
# --------------------------------------------------------------------------
|
| 71 |
+
|
| 72 |
+
full_results = []
|
| 73 |
+
metrics = []
|
| 74 |
+
|
| 75 |
+
if eval_task == "wer":
|
| 76 |
+
with mp.Pool(processes=len(gpus)) as pool:
|
| 77 |
+
args = [(rank, lang, sub_test_set, asr_ckpt_dir) for (rank, sub_test_set) in test_set]
|
| 78 |
+
results = pool.map(run_asr_wer, args)
|
| 79 |
+
for r in results:
|
| 80 |
+
full_results.extend(r)
|
| 81 |
+
elif eval_task == "sim":
|
| 82 |
+
with mp.Pool(processes=len(gpus)) as pool:
|
| 83 |
+
args = [(rank, sub_test_set, wavlm_ckpt_dir) for (rank, sub_test_set) in test_set]
|
| 84 |
+
results = pool.map(run_sim, args)
|
| 85 |
+
for r in results:
|
| 86 |
+
full_results.extend(r)
|
| 87 |
+
else:
|
| 88 |
+
raise ValueError(f"Unknown metric type: {eval_task}")
|
| 89 |
+
|
| 90 |
+
result_path = f"{gen_wav_dir}/_{eval_task}_results.jsonl"
|
| 91 |
+
with open(result_path, "w") as f:
|
| 92 |
+
for line in full_results:
|
| 93 |
+
metrics.append(line[eval_task])
|
| 94 |
+
f.write(json.dumps(line, ensure_ascii=False) + "\n")
|
| 95 |
+
metric = round(np.mean(metrics), 5)
|
| 96 |
+
f.write(f"\n{eval_task.upper()}: {metric}\n")
|
| 97 |
+
|
| 98 |
+
print(f"\nTotal {len(metrics)} samples")
|
| 99 |
+
print(f"{eval_task.upper()}: {metric}")
|
| 100 |
+
print(f"{eval_task.upper()} results saved to {result_path}")
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
if __name__ == "__main__":
|
| 104 |
+
main()
|
src/f5_tts/eval/eval_utmos.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import json
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
import librosa
|
| 6 |
+
import torch
|
| 7 |
+
from tqdm import tqdm
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def main():
|
| 11 |
+
parser = argparse.ArgumentParser(description="UTMOS Evaluation")
|
| 12 |
+
parser.add_argument("--audio_dir", type=str, required=True, help="Audio file path.")
|
| 13 |
+
parser.add_argument("--ext", type=str, default="wav", help="Audio extension.")
|
| 14 |
+
args = parser.parse_args()
|
| 15 |
+
|
| 16 |
+
device = "cuda" if torch.cuda.is_available() else "xpu" if torch.xpu.is_available() else "cpu"
|
| 17 |
+
|
| 18 |
+
predictor = torch.hub.load("tarepan/SpeechMOS:v1.2.0", "utmos22_strong", trust_repo=True)
|
| 19 |
+
predictor = predictor.to(device)
|
| 20 |
+
|
| 21 |
+
audio_paths = list(Path(args.audio_dir).rglob(f"*.{args.ext}"))
|
| 22 |
+
utmos_score = 0
|
| 23 |
+
|
| 24 |
+
utmos_result_path = Path(args.audio_dir) / "_utmos_results.jsonl"
|
| 25 |
+
with open(utmos_result_path, "w", encoding="utf-8") as f:
|
| 26 |
+
for audio_path in tqdm(audio_paths, desc="Processing"):
|
| 27 |
+
wav, sr = librosa.load(audio_path, sr=None, mono=True)
|
| 28 |
+
wav_tensor = torch.from_numpy(wav).to(device).unsqueeze(0)
|
| 29 |
+
score = predictor(wav_tensor, sr)
|
| 30 |
+
line = {}
|
| 31 |
+
line["wav"], line["utmos"] = str(audio_path.stem), score.item()
|
| 32 |
+
utmos_score += score.item()
|
| 33 |
+
f.write(json.dumps(line, ensure_ascii=False) + "\n")
|
| 34 |
+
avg_score = utmos_score / len(audio_paths) if len(audio_paths) > 0 else 0
|
| 35 |
+
f.write(f"\nUTMOS: {avg_score:.4f}\n")
|
| 36 |
+
|
| 37 |
+
print(f"UTMOS: {avg_score:.4f}")
|
| 38 |
+
print(f"UTMOS results saved to {utmos_result_path}")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
if __name__ == "__main__":
|
| 42 |
+
main()
|
src/f5_tts/eval/gen_bengali_batch.py
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Batch inference script for Bengali TTS evaluation.
|
| 3 |
+
Generates all test audios efficiently by loading model once and processing in batches.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import argparse
|
| 7 |
+
import os
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
import torch
|
| 11 |
+
import torchaudio
|
| 12 |
+
from tqdm import tqdm
|
| 13 |
+
|
| 14 |
+
from f5_tts.infer.utils_infer import (
|
| 15 |
+
load_model,
|
| 16 |
+
load_vocoder,
|
| 17 |
+
infer_process,
|
| 18 |
+
preprocess_ref_audio_text,
|
| 19 |
+
)
|
| 20 |
+
from f5_tts.model import DiT
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
TESTSET_CONFIG = {
|
| 24 |
+
"stimulai53": {
|
| 25 |
+
"text_file": "examples/BengaliStimulai53.txt",
|
| 26 |
+
"output_dir": "examples/stimulai53",
|
| 27 |
+
"n_samples": 53,
|
| 28 |
+
"speed": 1.0,
|
| 29 |
+
},
|
| 30 |
+
"ne200": {
|
| 31 |
+
"text_file": "examples/BengaliNamedEntity200.txt",
|
| 32 |
+
"output_dir": "examples/ne200",
|
| 33 |
+
"n_samples": 200,
|
| 34 |
+
"speed": 0.9,
|
| 35 |
+
},
|
| 36 |
+
"st200": {
|
| 37 |
+
"text_file": "examples/ShortText200.txt",
|
| 38 |
+
"output_dir": "examples/st200",
|
| 39 |
+
"n_samples": 200,
|
| 40 |
+
"speed": 0.9,
|
| 41 |
+
},
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
REF_DIR = "examples/slr37"
|
| 45 |
+
REF_TEXT_FILE = "examples/slr37/slr37_texts.txt"
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def load_texts(text_file):
|
| 49 |
+
with open(text_file, "r", encoding="utf-8") as f:
|
| 50 |
+
return [line.strip() for line in f.readlines()]
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def generate_testset(
|
| 54 |
+
testset_name,
|
| 55 |
+
model,
|
| 56 |
+
vocoder,
|
| 57 |
+
vocab_file,
|
| 58 |
+
base_dir,
|
| 59 |
+
device,
|
| 60 |
+
use_ema=True,
|
| 61 |
+
):
|
| 62 |
+
config = TESTSET_CONFIG[testset_name]
|
| 63 |
+
text_file = Path(base_dir) / config["text_file"]
|
| 64 |
+
output_dir = Path(base_dir) / config["output_dir"]
|
| 65 |
+
ref_dir = Path(base_dir) / REF_DIR
|
| 66 |
+
ref_text_file = Path(base_dir) / REF_TEXT_FILE
|
| 67 |
+
n_samples = config["n_samples"]
|
| 68 |
+
speed = config["speed"]
|
| 69 |
+
|
| 70 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 71 |
+
|
| 72 |
+
gen_texts = load_texts(text_file)
|
| 73 |
+
ref_texts = load_texts(ref_text_file)
|
| 74 |
+
|
| 75 |
+
print(f"\n=== Generating {testset_name} ({n_samples} samples) ===")
|
| 76 |
+
|
| 77 |
+
for i in tqdm(range(n_samples), desc=testset_name):
|
| 78 |
+
ref_audio_path = ref_dir / f"slr_{i}.wav"
|
| 79 |
+
output_path = output_dir / f"output_{i}.wav"
|
| 80 |
+
|
| 81 |
+
if output_path.exists():
|
| 82 |
+
continue
|
| 83 |
+
|
| 84 |
+
ref_text = ref_texts[i]
|
| 85 |
+
gen_text = gen_texts[i]
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
ref_audio, ref_text_processed = preprocess_ref_audio_text(
|
| 89 |
+
str(ref_audio_path), ref_text, show_info=lambda x: None
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
audio, sr, _ = infer_process(
|
| 93 |
+
ref_audio,
|
| 94 |
+
ref_text_processed,
|
| 95 |
+
gen_text,
|
| 96 |
+
model,
|
| 97 |
+
vocoder,
|
| 98 |
+
device=device,
|
| 99 |
+
speed=speed,
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
audio_tensor = torch.tensor(audio).unsqueeze(0)
|
| 103 |
+
torchaudio.save(str(output_path), audio_tensor, sr)
|
| 104 |
+
|
| 105 |
+
except Exception as e:
|
| 106 |
+
print(f"Failed {i}: {e}")
|
| 107 |
+
continue
|
| 108 |
+
|
| 109 |
+
print(f"Done! Generated {n_samples} files in {output_dir}")
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def main():
|
| 113 |
+
parser = argparse.ArgumentParser(description="Batch Bengali TTS Generation")
|
| 114 |
+
parser.add_argument("--testset", type=str, default="all",
|
| 115 |
+
choices=["stimulai53", "ne200", "st200", "all"])
|
| 116 |
+
parser.add_argument("--base_dir", type=str, default=".")
|
| 117 |
+
parser.add_argument("--ckpt_file", type=str,
|
| 118 |
+
default="ckpts/bengali_300h/model_50000.pt")
|
| 119 |
+
parser.add_argument("--vocab_file", type=str,
|
| 120 |
+
default="data/Bengali/vocab.txt")
|
| 121 |
+
parser.add_argument("--use_ema", action="store_true", default=True)
|
| 122 |
+
parser.add_argument("--device", type=str, default=None)
|
| 123 |
+
args = parser.parse_args()
|
| 124 |
+
|
| 125 |
+
device = args.device or ("cuda" if torch.cuda.is_available() else "cpu")
|
| 126 |
+
|
| 127 |
+
print("Loading model...")
|
| 128 |
+
F5TTS_model_cfg = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
|
| 129 |
+
model = load_model(
|
| 130 |
+
model_cls=DiT,
|
| 131 |
+
model_cfg=F5TTS_model_cfg,
|
| 132 |
+
ckpt_path=args.ckpt_file,
|
| 133 |
+
mel_spec_type="vocos",
|
| 134 |
+
vocab_file=args.vocab_file,
|
| 135 |
+
device=device,
|
| 136 |
+
use_ema=args.use_ema,
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
print("Loading vocoder...")
|
| 140 |
+
vocoder = load_vocoder(vocoder_name="vocos", is_local=False)
|
| 141 |
+
|
| 142 |
+
testsets = list(TESTSET_CONFIG.keys()) if args.testset == "all" else [args.testset]
|
| 143 |
+
|
| 144 |
+
for testset in testsets:
|
| 145 |
+
generate_testset(
|
| 146 |
+
testset,
|
| 147 |
+
model,
|
| 148 |
+
vocoder,
|
| 149 |
+
args.vocab_file,
|
| 150 |
+
args.base_dir,
|
| 151 |
+
device,
|
| 152 |
+
args.use_ema,
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
print("\n=== All generation complete! ===")
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
if __name__ == "__main__":
|
| 159 |
+
main()
|
src/f5_tts/eval/gen_elevenlabs_batch.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ElevenLabs Batch Generation Script for Bengali Evaluation
|
| 3 |
+
|
| 4 |
+
Generates audio for evaluation testsets using ElevenLabs API with voice cloning.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import argparse
|
| 8 |
+
import os
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
from elevenlabs import ElevenLabs
|
| 13 |
+
from tqdm import tqdm
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
TESTSET_CONFIG = {
|
| 17 |
+
"stimulai53": {
|
| 18 |
+
"text_file": "examples/BengaliStimulai53.txt",
|
| 19 |
+
"n_samples": 53,
|
| 20 |
+
},
|
| 21 |
+
"ne200": {
|
| 22 |
+
"text_file": "examples/BengaliNamedEntity200.txt",
|
| 23 |
+
"n_samples": 200,
|
| 24 |
+
},
|
| 25 |
+
"st200": {
|
| 26 |
+
"text_file": "examples/ShortText200.txt",
|
| 27 |
+
"n_samples": 200,
|
| 28 |
+
},
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
REF_DIR = "examples/slr37"
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def load_texts(text_file):
|
| 35 |
+
with open(text_file, "r", encoding="utf-8") as f:
|
| 36 |
+
return [line.strip() for line in f.readlines()]
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def main():
|
| 40 |
+
load_dotenv()
|
| 41 |
+
|
| 42 |
+
parser = argparse.ArgumentParser(description="ElevenLabs Batch Generation")
|
| 43 |
+
parser.add_argument("--testset", type=str, required=True,
|
| 44 |
+
choices=list(TESTSET_CONFIG.keys()))
|
| 45 |
+
parser.add_argument("--model", type=str, default="eleven_v3",
|
| 46 |
+
help="ElevenLabs model ID")
|
| 47 |
+
parser.add_argument("--output_dir", type=str, default=None,
|
| 48 |
+
help="Output directory (default: examples/{testset}_elevenlabs)")
|
| 49 |
+
parser.add_argument("--base_dir", type=str, default=".")
|
| 50 |
+
args = parser.parse_args()
|
| 51 |
+
|
| 52 |
+
api_key = os.getenv("ELEVENLABS_API_KEY")
|
| 53 |
+
if not api_key:
|
| 54 |
+
raise ValueError("ELEVENLABS_API_KEY not found. Add to .env file.")
|
| 55 |
+
|
| 56 |
+
client = ElevenLabs(api_key=api_key)
|
| 57 |
+
|
| 58 |
+
config = TESTSET_CONFIG[args.testset]
|
| 59 |
+
n_samples = config["n_samples"]
|
| 60 |
+
text_file = Path(args.base_dir) / config["text_file"]
|
| 61 |
+
ref_dir = Path(args.base_dir) / REF_DIR
|
| 62 |
+
|
| 63 |
+
output_dir = Path(args.output_dir) if args.output_dir else Path(args.base_dir) / f"examples/{args.testset}_elevenlabs"
|
| 64 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 65 |
+
|
| 66 |
+
gen_texts = load_texts(text_file)
|
| 67 |
+
|
| 68 |
+
print(f"Generating {n_samples} samples for {args.testset}")
|
| 69 |
+
print(f"Model: {args.model}")
|
| 70 |
+
print(f"Output: {output_dir}")
|
| 71 |
+
|
| 72 |
+
for i in tqdm(range(n_samples), desc="Generating"):
|
| 73 |
+
output_path = output_dir / f"output_{i}.wav"
|
| 74 |
+
|
| 75 |
+
# Skip existing files (resume support)
|
| 76 |
+
if output_path.exists():
|
| 77 |
+
continue
|
| 78 |
+
|
| 79 |
+
ref_audio = ref_dir / f"slr_{i}.wav"
|
| 80 |
+
gen_text = gen_texts[i]
|
| 81 |
+
|
| 82 |
+
# Create voice clone
|
| 83 |
+
with open(ref_audio, "rb") as audio_file:
|
| 84 |
+
voice = client.voices.ivc.create(
|
| 85 |
+
name=f"temp_clone_{i}",
|
| 86 |
+
files=[audio_file],
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
try:
|
| 90 |
+
# Generate speech
|
| 91 |
+
audio_generator = client.text_to_speech.convert(
|
| 92 |
+
voice_id=voice.voice_id,
|
| 93 |
+
text=gen_text,
|
| 94 |
+
model_id=args.model,
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
# Save output
|
| 98 |
+
with open(output_path, "wb") as f:
|
| 99 |
+
for chunk in audio_generator:
|
| 100 |
+
f.write(chunk)
|
| 101 |
+
|
| 102 |
+
finally:
|
| 103 |
+
# Cleanup voice
|
| 104 |
+
client.voices.delete(voice.voice_id)
|
| 105 |
+
|
| 106 |
+
print(f"Done! Generated files in {output_dir}")
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
if __name__ == "__main__":
|
| 110 |
+
main()
|
src/f5_tts/eval/gen_gemini_batch.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gemini TTS Batch Generation Script for Bengali Evaluation
|
| 3 |
+
|
| 4 |
+
Generates audio for evaluation testsets using Gemini 2.5 TTS API.
|
| 5 |
+
No voice cloning - Gemini auto-detects language from Bengali text.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import argparse
|
| 9 |
+
import os
|
| 10 |
+
import time
|
| 11 |
+
import wave
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
|
| 14 |
+
from dotenv import load_dotenv
|
| 15 |
+
from google import genai
|
| 16 |
+
from google.genai import types
|
| 17 |
+
from google.genai.errors import ClientError
|
| 18 |
+
from tqdm import tqdm
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
TESTSET_CONFIG = {
|
| 22 |
+
"stimulai53": {
|
| 23 |
+
"text_file": "examples/BengaliStimulai53.txt",
|
| 24 |
+
"n_samples": 53,
|
| 25 |
+
},
|
| 26 |
+
"ne200": {
|
| 27 |
+
"text_file": "examples/BengaliNamedEntity200.txt",
|
| 28 |
+
"n_samples": 200,
|
| 29 |
+
},
|
| 30 |
+
"st200": {
|
| 31 |
+
"text_file": "examples/ShortText200.txt",
|
| 32 |
+
"n_samples": 200,
|
| 33 |
+
},
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def load_texts(text_file):
|
| 38 |
+
with open(text_file, "r", encoding="utf-8") as f:
|
| 39 |
+
return [line.strip() for line in f.readlines()]
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def main():
|
| 43 |
+
load_dotenv()
|
| 44 |
+
|
| 45 |
+
parser = argparse.ArgumentParser(description="Gemini TTS Batch Generation")
|
| 46 |
+
parser.add_argument("--testset", type=str, required=True,
|
| 47 |
+
choices=list(TESTSET_CONFIG.keys()))
|
| 48 |
+
parser.add_argument("--model", type=str, default="gemini-2.5-flash-preview-tts",
|
| 49 |
+
help="Gemini TTS model")
|
| 50 |
+
parser.add_argument("--output_dir", type=str, default=None,
|
| 51 |
+
help="Output directory (default: examples/{testset}_gemini)")
|
| 52 |
+
parser.add_argument("--base_dir", type=str, default=".")
|
| 53 |
+
args = parser.parse_args()
|
| 54 |
+
|
| 55 |
+
api_key = os.getenv("GEMINI_API_KEY")
|
| 56 |
+
if not api_key:
|
| 57 |
+
raise ValueError("GEMINI_API_KEY not found. Add to .env file.")
|
| 58 |
+
|
| 59 |
+
client = genai.Client(api_key=api_key)
|
| 60 |
+
|
| 61 |
+
config = TESTSET_CONFIG[args.testset]
|
| 62 |
+
n_samples = config["n_samples"]
|
| 63 |
+
text_file = Path(args.base_dir) / config["text_file"]
|
| 64 |
+
|
| 65 |
+
output_dir = Path(args.output_dir) if args.output_dir else Path(args.base_dir) / f"examples/{args.testset}_gemini"
|
| 66 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 67 |
+
|
| 68 |
+
gen_texts = load_texts(text_file)
|
| 69 |
+
|
| 70 |
+
print(f"Generating {n_samples} samples for {args.testset}")
|
| 71 |
+
print(f"Model: {args.model}")
|
| 72 |
+
print(f"Output: {output_dir}")
|
| 73 |
+
|
| 74 |
+
for i in tqdm(range(n_samples), desc="Generating"):
|
| 75 |
+
output_path = output_dir / f"output_{i}.wav"
|
| 76 |
+
|
| 77 |
+
# Skip existing files (resume support)
|
| 78 |
+
if output_path.exists():
|
| 79 |
+
continue
|
| 80 |
+
|
| 81 |
+
gen_text = gen_texts[i]
|
| 82 |
+
|
| 83 |
+
# Retry with backoff for rate limits
|
| 84 |
+
response = None
|
| 85 |
+
for attempt in range(5):
|
| 86 |
+
try:
|
| 87 |
+
response = client.models.generate_content(
|
| 88 |
+
model=args.model,
|
| 89 |
+
contents=gen_text,
|
| 90 |
+
config=types.GenerateContentConfig(
|
| 91 |
+
response_modalities=["AUDIO"],
|
| 92 |
+
),
|
| 93 |
+
)
|
| 94 |
+
break
|
| 95 |
+
except ClientError as e:
|
| 96 |
+
if "429" in str(e) or "RESOURCE_EXHAUSTED" in str(e):
|
| 97 |
+
wait = 20 * (attempt + 1)
|
| 98 |
+
print(f"\nRate limit hit, waiting {wait}s...")
|
| 99 |
+
time.sleep(wait)
|
| 100 |
+
else:
|
| 101 |
+
raise
|
| 102 |
+
|
| 103 |
+
if response is None:
|
| 104 |
+
print(f"\nSkipping sample {i} after 5 failed attempts")
|
| 105 |
+
continue
|
| 106 |
+
|
| 107 |
+
# Extract audio data
|
| 108 |
+
audio_data = response.candidates[0].content.parts[0].inline_data.data
|
| 109 |
+
|
| 110 |
+
# Write as WAV file (Gemini returns PCM audio at 24kHz)
|
| 111 |
+
with wave.open(str(output_path), "wb") as wav_file:
|
| 112 |
+
wav_file.setnchannels(1)
|
| 113 |
+
wav_file.setsampwidth(2) # 16-bit
|
| 114 |
+
wav_file.setframerate(24000)
|
| 115 |
+
wav_file.writeframes(audio_data)
|
| 116 |
+
|
| 117 |
+
print(f"Done! Generated files in {output_dir}")
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
if __name__ == "__main__":
|
| 121 |
+
main()
|
src/f5_tts/eval/utils_eval.py
ADDED
|
@@ -0,0 +1,444 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
import os
|
| 3 |
+
import random
|
| 4 |
+
import string
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
import torch.nn.functional as F
|
| 9 |
+
import torchaudio
|
| 10 |
+
from tqdm import tqdm
|
| 11 |
+
|
| 12 |
+
from f5_tts.eval.ecapa_tdnn import ECAPA_TDNN_SMALL
|
| 13 |
+
from f5_tts.model.modules import MelSpec
|
| 14 |
+
from f5_tts.model.utils import convert_char_to_pinyin
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# seedtts testset metainfo: utt, prompt_text, prompt_wav, gt_text, gt_wav
|
| 18 |
+
def get_seedtts_testset_metainfo(metalst):
|
| 19 |
+
f = open(metalst)
|
| 20 |
+
lines = f.readlines()
|
| 21 |
+
f.close()
|
| 22 |
+
metainfo = []
|
| 23 |
+
for line in lines:
|
| 24 |
+
if len(line.strip().split("|")) == 5:
|
| 25 |
+
utt, prompt_text, prompt_wav, gt_text, gt_wav = line.strip().split("|")
|
| 26 |
+
elif len(line.strip().split("|")) == 4:
|
| 27 |
+
utt, prompt_text, prompt_wav, gt_text = line.strip().split("|")
|
| 28 |
+
gt_wav = os.path.join(os.path.dirname(metalst), "wavs", utt + ".wav")
|
| 29 |
+
if not os.path.isabs(prompt_wav):
|
| 30 |
+
prompt_wav = os.path.join(os.path.dirname(metalst), prompt_wav)
|
| 31 |
+
metainfo.append((utt, prompt_text, prompt_wav, gt_text, gt_wav))
|
| 32 |
+
return metainfo
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# librispeech test-clean metainfo: gen_utt, ref_txt, ref_wav, gen_txt, gen_wav
|
| 36 |
+
def get_librispeech_test_clean_metainfo(metalst, librispeech_test_clean_path):
|
| 37 |
+
f = open(metalst)
|
| 38 |
+
lines = f.readlines()
|
| 39 |
+
f.close()
|
| 40 |
+
metainfo = []
|
| 41 |
+
for line in lines:
|
| 42 |
+
ref_utt, ref_dur, ref_txt, gen_utt, gen_dur, gen_txt = line.strip().split("\t")
|
| 43 |
+
|
| 44 |
+
# ref_txt = ref_txt[0] + ref_txt[1:].lower() + '.' # if use librispeech test-clean (no-pc)
|
| 45 |
+
ref_spk_id, ref_chaptr_id, _ = ref_utt.split("-")
|
| 46 |
+
ref_wav = os.path.join(librispeech_test_clean_path, ref_spk_id, ref_chaptr_id, ref_utt + ".flac")
|
| 47 |
+
|
| 48 |
+
# gen_txt = gen_txt[0] + gen_txt[1:].lower() + '.' # if use librispeech test-clean (no-pc)
|
| 49 |
+
gen_spk_id, gen_chaptr_id, _ = gen_utt.split("-")
|
| 50 |
+
gen_wav = os.path.join(librispeech_test_clean_path, gen_spk_id, gen_chaptr_id, gen_utt + ".flac")
|
| 51 |
+
|
| 52 |
+
metainfo.append((gen_utt, ref_txt, ref_wav, " " + gen_txt, gen_wav))
|
| 53 |
+
|
| 54 |
+
return metainfo
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# padded to max length mel batch
|
| 58 |
+
def padded_mel_batch(ref_mels):
|
| 59 |
+
max_mel_length = torch.LongTensor([mel.shape[-1] for mel in ref_mels]).amax()
|
| 60 |
+
padded_ref_mels = []
|
| 61 |
+
for mel in ref_mels:
|
| 62 |
+
padded_ref_mel = F.pad(mel, (0, max_mel_length - mel.shape[-1]), value=0)
|
| 63 |
+
padded_ref_mels.append(padded_ref_mel)
|
| 64 |
+
padded_ref_mels = torch.stack(padded_ref_mels)
|
| 65 |
+
padded_ref_mels = padded_ref_mels.permute(0, 2, 1)
|
| 66 |
+
return padded_ref_mels
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# get prompts from metainfo containing: utt, prompt_text, prompt_wav, gt_text, gt_wav
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def get_inference_prompt(
|
| 73 |
+
metainfo,
|
| 74 |
+
speed=1.0,
|
| 75 |
+
tokenizer="pinyin",
|
| 76 |
+
polyphone=True,
|
| 77 |
+
target_sample_rate=24000,
|
| 78 |
+
n_fft=1024,
|
| 79 |
+
win_length=1024,
|
| 80 |
+
n_mel_channels=100,
|
| 81 |
+
hop_length=256,
|
| 82 |
+
mel_spec_type="vocos",
|
| 83 |
+
target_rms=0.1,
|
| 84 |
+
use_truth_duration=False,
|
| 85 |
+
infer_batch_size=1,
|
| 86 |
+
num_buckets=200,
|
| 87 |
+
min_secs=3,
|
| 88 |
+
max_secs=40,
|
| 89 |
+
):
|
| 90 |
+
prompts_all = []
|
| 91 |
+
|
| 92 |
+
min_tokens = min_secs * target_sample_rate // hop_length
|
| 93 |
+
max_tokens = max_secs * target_sample_rate // hop_length
|
| 94 |
+
|
| 95 |
+
batch_accum = [0] * num_buckets
|
| 96 |
+
utts, ref_rms_list, ref_mels, ref_mel_lens, total_mel_lens, final_text_list = (
|
| 97 |
+
[[] for _ in range(num_buckets)] for _ in range(6)
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
mel_spectrogram = MelSpec(
|
| 101 |
+
n_fft=n_fft,
|
| 102 |
+
hop_length=hop_length,
|
| 103 |
+
win_length=win_length,
|
| 104 |
+
n_mel_channels=n_mel_channels,
|
| 105 |
+
target_sample_rate=target_sample_rate,
|
| 106 |
+
mel_spec_type=mel_spec_type,
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
for utt, prompt_text, prompt_wav, gt_text, gt_wav in tqdm(metainfo, desc="Processing prompts..."):
|
| 110 |
+
# Audio
|
| 111 |
+
ref_audio, ref_sr = torchaudio.load(prompt_wav)
|
| 112 |
+
ref_rms = torch.sqrt(torch.mean(torch.square(ref_audio)))
|
| 113 |
+
if ref_rms < target_rms:
|
| 114 |
+
ref_audio = ref_audio * target_rms / ref_rms
|
| 115 |
+
assert ref_audio.shape[-1] > 5000, f"Empty prompt wav: {prompt_wav}, or torchaudio backend issue."
|
| 116 |
+
if ref_sr != target_sample_rate:
|
| 117 |
+
resampler = torchaudio.transforms.Resample(ref_sr, target_sample_rate)
|
| 118 |
+
ref_audio = resampler(ref_audio)
|
| 119 |
+
|
| 120 |
+
# Text
|
| 121 |
+
if len(prompt_text[-1].encode("utf-8")) == 1:
|
| 122 |
+
prompt_text = prompt_text + " "
|
| 123 |
+
text = [prompt_text + gt_text]
|
| 124 |
+
if tokenizer == "pinyin":
|
| 125 |
+
text_list = convert_char_to_pinyin(text, polyphone=polyphone)
|
| 126 |
+
else:
|
| 127 |
+
text_list = text
|
| 128 |
+
|
| 129 |
+
# to mel spectrogram
|
| 130 |
+
ref_mel = mel_spectrogram(ref_audio)
|
| 131 |
+
ref_mel = ref_mel.squeeze(0)
|
| 132 |
+
|
| 133 |
+
# Duration, mel frame length
|
| 134 |
+
ref_mel_len = ref_mel.shape[-1]
|
| 135 |
+
|
| 136 |
+
if use_truth_duration:
|
| 137 |
+
gt_audio, gt_sr = torchaudio.load(gt_wav)
|
| 138 |
+
if gt_sr != target_sample_rate:
|
| 139 |
+
resampler = torchaudio.transforms.Resample(gt_sr, target_sample_rate)
|
| 140 |
+
gt_audio = resampler(gt_audio)
|
| 141 |
+
total_mel_len = ref_mel_len + int(gt_audio.shape[-1] / hop_length / speed)
|
| 142 |
+
|
| 143 |
+
# # test vocoder resynthesis
|
| 144 |
+
# ref_audio = gt_audio
|
| 145 |
+
else:
|
| 146 |
+
ref_text_len = len(prompt_text.encode("utf-8"))
|
| 147 |
+
gen_text_len = len(gt_text.encode("utf-8"))
|
| 148 |
+
total_mel_len = ref_mel_len + int(ref_mel_len / ref_text_len * gen_text_len / speed)
|
| 149 |
+
|
| 150 |
+
# deal with batch
|
| 151 |
+
assert infer_batch_size > 0, "infer_batch_size should be greater than 0."
|
| 152 |
+
assert min_tokens <= total_mel_len <= max_tokens, (
|
| 153 |
+
f"Audio {utt} has duration {total_mel_len * hop_length // target_sample_rate}s out of range [{min_secs}, {max_secs}]."
|
| 154 |
+
)
|
| 155 |
+
bucket_i = math.floor((total_mel_len - min_tokens) / (max_tokens - min_tokens + 1) * num_buckets)
|
| 156 |
+
|
| 157 |
+
utts[bucket_i].append(utt)
|
| 158 |
+
ref_rms_list[bucket_i].append(ref_rms)
|
| 159 |
+
ref_mels[bucket_i].append(ref_mel)
|
| 160 |
+
ref_mel_lens[bucket_i].append(ref_mel_len)
|
| 161 |
+
total_mel_lens[bucket_i].append(total_mel_len)
|
| 162 |
+
final_text_list[bucket_i].extend(text_list)
|
| 163 |
+
|
| 164 |
+
batch_accum[bucket_i] += total_mel_len
|
| 165 |
+
|
| 166 |
+
if batch_accum[bucket_i] >= infer_batch_size:
|
| 167 |
+
# print(f"\n{len(ref_mels[bucket_i][0][0])}\n{ref_mel_lens[bucket_i]}\n{total_mel_lens[bucket_i]}")
|
| 168 |
+
prompts_all.append(
|
| 169 |
+
(
|
| 170 |
+
utts[bucket_i],
|
| 171 |
+
ref_rms_list[bucket_i],
|
| 172 |
+
padded_mel_batch(ref_mels[bucket_i]),
|
| 173 |
+
ref_mel_lens[bucket_i],
|
| 174 |
+
total_mel_lens[bucket_i],
|
| 175 |
+
final_text_list[bucket_i],
|
| 176 |
+
)
|
| 177 |
+
)
|
| 178 |
+
batch_accum[bucket_i] = 0
|
| 179 |
+
(
|
| 180 |
+
utts[bucket_i],
|
| 181 |
+
ref_rms_list[bucket_i],
|
| 182 |
+
ref_mels[bucket_i],
|
| 183 |
+
ref_mel_lens[bucket_i],
|
| 184 |
+
total_mel_lens[bucket_i],
|
| 185 |
+
final_text_list[bucket_i],
|
| 186 |
+
) = [], [], [], [], [], []
|
| 187 |
+
|
| 188 |
+
# add residual
|
| 189 |
+
for bucket_i, bucket_frames in enumerate(batch_accum):
|
| 190 |
+
if bucket_frames > 0:
|
| 191 |
+
prompts_all.append(
|
| 192 |
+
(
|
| 193 |
+
utts[bucket_i],
|
| 194 |
+
ref_rms_list[bucket_i],
|
| 195 |
+
padded_mel_batch(ref_mels[bucket_i]),
|
| 196 |
+
ref_mel_lens[bucket_i],
|
| 197 |
+
total_mel_lens[bucket_i],
|
| 198 |
+
final_text_list[bucket_i],
|
| 199 |
+
)
|
| 200 |
+
)
|
| 201 |
+
# not only leave easy work for last workers
|
| 202 |
+
random.seed(666)
|
| 203 |
+
random.shuffle(prompts_all)
|
| 204 |
+
|
| 205 |
+
return prompts_all
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
# get wav_res_ref_text of seed-tts test metalst
|
| 209 |
+
# https://github.com/BytedanceSpeech/seed-tts-eval
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
def get_seed_tts_test(metalst, gen_wav_dir, gpus):
|
| 213 |
+
f = open(metalst)
|
| 214 |
+
lines = f.readlines()
|
| 215 |
+
f.close()
|
| 216 |
+
|
| 217 |
+
test_set_ = []
|
| 218 |
+
for line in tqdm(lines):
|
| 219 |
+
if len(line.strip().split("|")) == 5:
|
| 220 |
+
utt, prompt_text, prompt_wav, gt_text, gt_wav = line.strip().split("|")
|
| 221 |
+
elif len(line.strip().split("|")) == 4:
|
| 222 |
+
utt, prompt_text, prompt_wav, gt_text = line.strip().split("|")
|
| 223 |
+
|
| 224 |
+
if not os.path.exists(os.path.join(gen_wav_dir, utt + ".wav")):
|
| 225 |
+
continue
|
| 226 |
+
gen_wav = os.path.join(gen_wav_dir, utt + ".wav")
|
| 227 |
+
if not os.path.isabs(prompt_wav):
|
| 228 |
+
prompt_wav = os.path.join(os.path.dirname(metalst), prompt_wav)
|
| 229 |
+
|
| 230 |
+
test_set_.append((gen_wav, prompt_wav, gt_text))
|
| 231 |
+
|
| 232 |
+
num_jobs = len(gpus)
|
| 233 |
+
if num_jobs == 1:
|
| 234 |
+
return [(gpus[0], test_set_)]
|
| 235 |
+
|
| 236 |
+
wav_per_job = len(test_set_) // num_jobs + 1
|
| 237 |
+
test_set = []
|
| 238 |
+
for i in range(num_jobs):
|
| 239 |
+
test_set.append((gpus[i], test_set_[i * wav_per_job : (i + 1) * wav_per_job]))
|
| 240 |
+
|
| 241 |
+
return test_set
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
# get librispeech test-clean cross sentence test
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
def get_librispeech_test(metalst, gen_wav_dir, gpus, librispeech_test_clean_path, eval_ground_truth=False):
|
| 248 |
+
f = open(metalst)
|
| 249 |
+
lines = f.readlines()
|
| 250 |
+
f.close()
|
| 251 |
+
|
| 252 |
+
test_set_ = []
|
| 253 |
+
for line in tqdm(lines):
|
| 254 |
+
ref_utt, ref_dur, ref_txt, gen_utt, gen_dur, gen_txt = line.strip().split("\t")
|
| 255 |
+
|
| 256 |
+
if eval_ground_truth:
|
| 257 |
+
gen_spk_id, gen_chaptr_id, _ = gen_utt.split("-")
|
| 258 |
+
gen_wav = os.path.join(librispeech_test_clean_path, gen_spk_id, gen_chaptr_id, gen_utt + ".flac")
|
| 259 |
+
else:
|
| 260 |
+
if not os.path.exists(os.path.join(gen_wav_dir, gen_utt + ".wav")):
|
| 261 |
+
raise FileNotFoundError(f"Generated wav not found: {gen_utt}")
|
| 262 |
+
gen_wav = os.path.join(gen_wav_dir, gen_utt + ".wav")
|
| 263 |
+
|
| 264 |
+
ref_spk_id, ref_chaptr_id, _ = ref_utt.split("-")
|
| 265 |
+
ref_wav = os.path.join(librispeech_test_clean_path, ref_spk_id, ref_chaptr_id, ref_utt + ".flac")
|
| 266 |
+
|
| 267 |
+
test_set_.append((gen_wav, ref_wav, gen_txt))
|
| 268 |
+
|
| 269 |
+
num_jobs = len(gpus)
|
| 270 |
+
if num_jobs == 1:
|
| 271 |
+
return [(gpus[0], test_set_)]
|
| 272 |
+
|
| 273 |
+
wav_per_job = len(test_set_) // num_jobs + 1
|
| 274 |
+
test_set = []
|
| 275 |
+
for i in range(num_jobs):
|
| 276 |
+
test_set.append((gpus[i], test_set_[i * wav_per_job : (i + 1) * wav_per_job]))
|
| 277 |
+
|
| 278 |
+
return test_set
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
# load asr model
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
def load_asr_model(lang, ckpt_dir=""):
|
| 285 |
+
if lang == "zh":
|
| 286 |
+
from funasr import AutoModel
|
| 287 |
+
|
| 288 |
+
model = AutoModel(
|
| 289 |
+
model=os.path.join(ckpt_dir, "paraformer-zh"),
|
| 290 |
+
# vad_model = os.path.join(ckpt_dir, "fsmn-vad"),
|
| 291 |
+
# punc_model = os.path.join(ckpt_dir, "ct-punc"),
|
| 292 |
+
# spk_model = os.path.join(ckpt_dir, "cam++"),
|
| 293 |
+
disable_update=True,
|
| 294 |
+
) # following seed-tts setting
|
| 295 |
+
elif lang == "en":
|
| 296 |
+
from faster_whisper import WhisperModel
|
| 297 |
+
|
| 298 |
+
model_size = "large-v3" if ckpt_dir == "" else ckpt_dir
|
| 299 |
+
model = WhisperModel(model_size, device="cuda", compute_type="float16")
|
| 300 |
+
elif lang == "bn":
|
| 301 |
+
from faster_whisper import WhisperModel
|
| 302 |
+
|
| 303 |
+
model_size = "large-v3" if ckpt_dir == "" else ckpt_dir
|
| 304 |
+
model = WhisperModel(model_size, device="cuda", compute_type="float16")
|
| 305 |
+
return model
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
# WER Evaluation, the way Seed-TTS does
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
def run_asr_wer(args):
|
| 312 |
+
rank, lang, test_set, ckpt_dir = args
|
| 313 |
+
|
| 314 |
+
if lang == "zh":
|
| 315 |
+
import zhconv
|
| 316 |
+
|
| 317 |
+
torch.cuda.set_device(rank)
|
| 318 |
+
elif lang == "en":
|
| 319 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = str(rank)
|
| 320 |
+
elif lang == "bn":
|
| 321 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = str(rank)
|
| 322 |
+
else:
|
| 323 |
+
raise NotImplementedError(
|
| 324 |
+
"lang support only 'zh' (funasr paraformer-zh), 'en'/'bn' (faster-whisper-large-v3), for now."
|
| 325 |
+
)
|
| 326 |
+
|
| 327 |
+
asr_model = load_asr_model(lang, ckpt_dir=ckpt_dir)
|
| 328 |
+
|
| 329 |
+
from zhon.hanzi import punctuation
|
| 330 |
+
|
| 331 |
+
punctuation_all = punctuation + string.punctuation
|
| 332 |
+
wer_results = []
|
| 333 |
+
|
| 334 |
+
from jiwer import compute_measures
|
| 335 |
+
|
| 336 |
+
for gen_wav, prompt_wav, truth in tqdm(test_set):
|
| 337 |
+
if lang == "zh":
|
| 338 |
+
res = asr_model.generate(input=gen_wav, batch_size_s=300, disable_pbar=True)
|
| 339 |
+
hypo = res[0]["text"]
|
| 340 |
+
hypo = zhconv.convert(hypo, "zh-cn")
|
| 341 |
+
elif lang == "en":
|
| 342 |
+
segments, _ = asr_model.transcribe(gen_wav, beam_size=5, language="en")
|
| 343 |
+
hypo = ""
|
| 344 |
+
for segment in segments:
|
| 345 |
+
hypo = hypo + " " + segment.text
|
| 346 |
+
elif lang == "bn":
|
| 347 |
+
segments, _ = asr_model.transcribe(gen_wav, beam_size=5, language="bn")
|
| 348 |
+
hypo = ""
|
| 349 |
+
for segment in segments:
|
| 350 |
+
hypo = hypo + segment.text
|
| 351 |
+
|
| 352 |
+
raw_truth = truth
|
| 353 |
+
raw_hypo = hypo
|
| 354 |
+
|
| 355 |
+
for x in punctuation_all:
|
| 356 |
+
truth = truth.replace(x, "")
|
| 357 |
+
hypo = hypo.replace(x, "")
|
| 358 |
+
|
| 359 |
+
truth = truth.replace(" ", " ")
|
| 360 |
+
hypo = hypo.replace(" ", " ")
|
| 361 |
+
|
| 362 |
+
if lang == "zh":
|
| 363 |
+
truth = " ".join([x for x in truth])
|
| 364 |
+
hypo = " ".join([x for x in hypo])
|
| 365 |
+
elif lang == "en":
|
| 366 |
+
truth = truth.lower()
|
| 367 |
+
hypo = hypo.lower()
|
| 368 |
+
elif lang == "bn":
|
| 369 |
+
# Bengali: character-level for CER, remove Bangla danda
|
| 370 |
+
truth = truth.replace("।", "")
|
| 371 |
+
hypo = hypo.replace("।", "")
|
| 372 |
+
truth = " ".join([x for x in truth if x.strip()])
|
| 373 |
+
hypo = " ".join([x for x in hypo if x.strip()])
|
| 374 |
+
|
| 375 |
+
measures = compute_measures(truth, hypo)
|
| 376 |
+
wer = measures["wer"]
|
| 377 |
+
|
| 378 |
+
# ref_list = truth.split(" ")
|
| 379 |
+
# subs = measures["substitutions"] / len(ref_list)
|
| 380 |
+
# dele = measures["deletions"] / len(ref_list)
|
| 381 |
+
# inse = measures["insertions"] / len(ref_list)
|
| 382 |
+
|
| 383 |
+
wer_results.append(
|
| 384 |
+
{
|
| 385 |
+
"wav": Path(gen_wav).stem,
|
| 386 |
+
"truth": raw_truth,
|
| 387 |
+
"hypo": raw_hypo,
|
| 388 |
+
"wer": wer,
|
| 389 |
+
}
|
| 390 |
+
)
|
| 391 |
+
|
| 392 |
+
return wer_results
|
| 393 |
+
|
| 394 |
+
|
| 395 |
+
# SIM Evaluation
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
def run_sim(args):
|
| 399 |
+
rank, test_set, ckpt_dir = args
|
| 400 |
+
device = f"cuda:{rank}"
|
| 401 |
+
|
| 402 |
+
model = ECAPA_TDNN_SMALL(feat_dim=1024, feat_type="wavlm_large", config_path=None)
|
| 403 |
+
state_dict = torch.load(ckpt_dir, weights_only=True, map_location=lambda storage, loc: storage)
|
| 404 |
+
model.load_state_dict(state_dict["model"], strict=False)
|
| 405 |
+
|
| 406 |
+
use_gpu = True if torch.cuda.is_available() else False
|
| 407 |
+
if use_gpu:
|
| 408 |
+
model = model.cuda(device)
|
| 409 |
+
model.eval()
|
| 410 |
+
|
| 411 |
+
sim_results = []
|
| 412 |
+
for gen_wav, prompt_wav, truth in tqdm(test_set):
|
| 413 |
+
wav1, sr1 = torchaudio.load(gen_wav)
|
| 414 |
+
wav2, sr2 = torchaudio.load(prompt_wav)
|
| 415 |
+
|
| 416 |
+
if use_gpu:
|
| 417 |
+
wav1 = wav1.cuda(device)
|
| 418 |
+
wav2 = wav2.cuda(device)
|
| 419 |
+
|
| 420 |
+
if sr1 != 16000:
|
| 421 |
+
resample1 = torchaudio.transforms.Resample(orig_freq=sr1, new_freq=16000)
|
| 422 |
+
if use_gpu:
|
| 423 |
+
resample1 = resample1.cuda(device)
|
| 424 |
+
wav1 = resample1(wav1)
|
| 425 |
+
if sr2 != 16000:
|
| 426 |
+
resample2 = torchaudio.transforms.Resample(orig_freq=sr2, new_freq=16000)
|
| 427 |
+
if use_gpu:
|
| 428 |
+
resample2 = resample2.cuda(device)
|
| 429 |
+
wav2 = resample2(wav2)
|
| 430 |
+
|
| 431 |
+
with torch.no_grad():
|
| 432 |
+
emb1 = model(wav1)
|
| 433 |
+
emb2 = model(wav2)
|
| 434 |
+
|
| 435 |
+
sim = F.cosine_similarity(emb1, emb2)[0].item()
|
| 436 |
+
# print(f"VSim score between two audios: {sim:.4f} (-1.0, 1.0).")
|
| 437 |
+
sim_results.append(
|
| 438 |
+
{
|
| 439 |
+
"wav": Path(gen_wav).stem,
|
| 440 |
+
"sim": sim,
|
| 441 |
+
}
|
| 442 |
+
)
|
| 443 |
+
|
| 444 |
+
return sim_results
|
src/f5_tts/infer/README.md
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Inference
|
| 2 |
+
|
| 3 |
+
The pretrained model checkpoints can be reached at [🤗 Hugging Face](https://huggingface.co/SWivid/F5-TTS) and [🤖 Model Scope](https://www.modelscope.cn/models/SWivid/F5-TTS_Emilia-ZH-EN), or will be automatically downloaded when running inference scripts.
|
| 4 |
+
|
| 5 |
+
**More checkpoints with whole community efforts can be found in [SHARED.md](SHARED.md), supporting more languages.**
|
| 6 |
+
|
| 7 |
+
Currently support **30s for a single** generation, which is the **total length** (same logic if `fix_duration`) including both prompt and output audio. However, `infer_cli` and `infer_gradio` will automatically do chunk generation for longer text. Long reference audio will be **clip short to ~12s**.
|
| 8 |
+
|
| 9 |
+
To avoid possible inference failures, make sure you have seen through the following instructions.
|
| 10 |
+
|
| 11 |
+
- Use reference audio <12s and leave proper silence space (e.g. 1s) at the end. Otherwise there is a risk of truncating in the middle of word, leading to suboptimal generation.
|
| 12 |
+
- <ins>Uppercased letters</ins> (best with form like K.F.C.) will be uttered letter by letter, and lowercased letters used for common words.
|
| 13 |
+
- Add some spaces (blank: " ") or punctuations (e.g. "," ".") <ins>to explicitly introduce some pauses</ins>.
|
| 14 |
+
- If English punctuation marks the end of a sentence, make sure there is a space " " after it. Otherwise not regarded as when chunk.
|
| 15 |
+
- <ins>Preprocess numbers</ins> to Chinese letters if you want to have them read in Chinese, otherwise in English.
|
| 16 |
+
- If the generation output is blank (pure silence), <ins>check for FFmpeg installation</ins>.
|
| 17 |
+
- Try <ins>turn off `use_ema` if using an early-stage</ins> finetuned checkpoint (which goes just few updates).
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
## Gradio App
|
| 21 |
+
|
| 22 |
+
Currently supported features:
|
| 23 |
+
|
| 24 |
+
- Basic TTS with Chunk Inference
|
| 25 |
+
- Multi-Style / Multi-Speaker Generation
|
| 26 |
+
- Voice Chat powered by Qwen2.5-3B-Instruct
|
| 27 |
+
- [Custom inference with more language support](SHARED.md)
|
| 28 |
+
|
| 29 |
+
The cli command `f5-tts_infer-gradio` equals to `python src/f5_tts/infer/infer_gradio.py`, which launches a Gradio APP (web interface) for inference.
|
| 30 |
+
|
| 31 |
+
The script will load model checkpoints from Huggingface. You can also manually download files and update the path to `load_model()` in `infer_gradio.py`. Currently only load TTS models first, will load ASR model to do transcription if `ref_text` not provided, will load LLM model if use Voice Chat.
|
| 32 |
+
|
| 33 |
+
More flags options:
|
| 34 |
+
|
| 35 |
+
```bash
|
| 36 |
+
# Automatically launch the interface in the default web browser
|
| 37 |
+
f5-tts_infer-gradio --inbrowser
|
| 38 |
+
|
| 39 |
+
# Set the root path of the application, if it's not served from the root ("/") of the domain
|
| 40 |
+
# For example, if the application is served at "https://example.com/myapp"
|
| 41 |
+
f5-tts_infer-gradio --root_path "/myapp"
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
Could also be used as a component for larger application:
|
| 45 |
+
```python
|
| 46 |
+
import gradio as gr
|
| 47 |
+
from f5_tts.infer.infer_gradio import app
|
| 48 |
+
|
| 49 |
+
with gr.Blocks() as main_app:
|
| 50 |
+
gr.Markdown("# This is an example of using F5-TTS within a bigger Gradio app")
|
| 51 |
+
|
| 52 |
+
# ... other Gradio components
|
| 53 |
+
|
| 54 |
+
app.render()
|
| 55 |
+
|
| 56 |
+
main_app.launch()
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
## CLI Inference
|
| 61 |
+
|
| 62 |
+
The cli command `f5-tts_infer-cli` equals to `python src/f5_tts/infer/infer_cli.py`, which is a command line tool for inference.
|
| 63 |
+
|
| 64 |
+
The script will load model checkpoints from Huggingface. You can also manually download files and use `--ckpt_file` to specify the model you want to load, or directly update in `infer_cli.py`.
|
| 65 |
+
|
| 66 |
+
For change vocab.txt use `--vocab_file` to provide your `vocab.txt` file.
|
| 67 |
+
|
| 68 |
+
Basically you can inference with flags:
|
| 69 |
+
```bash
|
| 70 |
+
# Leave --ref_text "" will have ASR model transcribe (extra GPU memory usage)
|
| 71 |
+
f5-tts_infer-cli \
|
| 72 |
+
--model F5TTS_v1_Base \
|
| 73 |
+
--ref_audio "ref_audio.wav" \
|
| 74 |
+
--ref_text "The content, subtitle or transcription of reference audio." \
|
| 75 |
+
--gen_text "Some text you want TTS model generate for you."
|
| 76 |
+
|
| 77 |
+
# Use BigVGAN as vocoder. Currently only support F5TTS_Base.
|
| 78 |
+
f5-tts_infer-cli --model F5TTS_Base --vocoder_name bigvgan --load_vocoder_from_local
|
| 79 |
+
|
| 80 |
+
# Use custom path checkpoint, e.g.
|
| 81 |
+
f5-tts_infer-cli --ckpt_file ckpts/F5TTS_v1_Base/model_1250000.safetensors
|
| 82 |
+
|
| 83 |
+
# More instructions
|
| 84 |
+
f5-tts_infer-cli --help
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
And a `.toml` file would help with more flexible usage.
|
| 88 |
+
|
| 89 |
+
```bash
|
| 90 |
+
f5-tts_infer-cli -c custom.toml
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
For example, you can use `.toml` to pass in variables, refer to `src/f5_tts/infer/examples/basic/basic.toml`:
|
| 94 |
+
|
| 95 |
+
```toml
|
| 96 |
+
# F5TTS_v1_Base | E2TTS_Base
|
| 97 |
+
model = "F5TTS_v1_Base"
|
| 98 |
+
ref_audio = "infer/examples/basic/basic_ref_en.wav"
|
| 99 |
+
# If an empty "", transcribes the reference audio automatically.
|
| 100 |
+
ref_text = "Some call me nature, others call me mother nature."
|
| 101 |
+
gen_text = "I don't really care what you call me. I've been a silent spectator, watching species evolve, empires rise and fall. But always remember, I am mighty and enduring."
|
| 102 |
+
# File with text to generate. Ignores the text above.
|
| 103 |
+
gen_file = ""
|
| 104 |
+
remove_silence = false
|
| 105 |
+
output_dir = "tests"
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
You can also leverage `.toml` file to do multi-style generation, refer to `src/f5_tts/infer/examples/multi/story.toml`.
|
| 109 |
+
|
| 110 |
+
```toml
|
| 111 |
+
# F5TTS_v1_Base | E2TTS_Base
|
| 112 |
+
model = "F5TTS_v1_Base"
|
| 113 |
+
ref_audio = "infer/examples/multi/main.flac"
|
| 114 |
+
# If an empty "", transcribes the reference audio automatically.
|
| 115 |
+
ref_text = ""
|
| 116 |
+
gen_text = ""
|
| 117 |
+
# File with text to generate. Ignores the text above.
|
| 118 |
+
gen_file = "infer/examples/multi/story.txt"
|
| 119 |
+
remove_silence = true
|
| 120 |
+
output_dir = "tests"
|
| 121 |
+
|
| 122 |
+
[voices.town]
|
| 123 |
+
ref_audio = "infer/examples/multi/town.flac"
|
| 124 |
+
ref_text = ""
|
| 125 |
+
|
| 126 |
+
[voices.country]
|
| 127 |
+
ref_audio = "infer/examples/multi/country.flac"
|
| 128 |
+
ref_text = ""
|
| 129 |
+
```
|
| 130 |
+
You should mark the voice with `[main]` `[town]` `[country]` whenever you want to change voice, refer to `src/f5_tts/infer/examples/multi/story.txt`.
|
| 131 |
+
|
| 132 |
+
## API Usage
|
| 133 |
+
|
| 134 |
+
```python
|
| 135 |
+
from importlib.resources import files
|
| 136 |
+
from f5_tts.api import F5TTS
|
| 137 |
+
|
| 138 |
+
f5tts = F5TTS()
|
| 139 |
+
wav, sr, spec = f5tts.infer(
|
| 140 |
+
ref_file=str(files("f5_tts").joinpath("infer/examples/basic/basic_ref_en.wav")),
|
| 141 |
+
ref_text="some call me nature, others call me mother nature.",
|
| 142 |
+
gen_text="""I don't really care what you call me. I've been a silent spectator, watching species evolve, empires rise and fall. But always remember, I am mighty and enduring. Respect me and I'll nurture you; ignore me and you shall face the consequences.""",
|
| 143 |
+
file_wave=str(files("f5_tts").joinpath("../../tests/api_out.wav")),
|
| 144 |
+
file_spec=str(files("f5_tts").joinpath("../../tests/api_out.png")),
|
| 145 |
+
seed=None,
|
| 146 |
+
)
|
| 147 |
+
```
|
| 148 |
+
Check [api.py](../api.py) for more details.
|
| 149 |
+
|
| 150 |
+
## TensorRT-LLM Deployment
|
| 151 |
+
|
| 152 |
+
See [detailed instructions](../runtime/triton_trtllm/README.md) for more information.
|
| 153 |
+
|
| 154 |
+
## Socket Real-time Service
|
| 155 |
+
|
| 156 |
+
Real-time voice output with chunk stream:
|
| 157 |
+
|
| 158 |
+
```bash
|
| 159 |
+
# Start socket server
|
| 160 |
+
python src/f5_tts/socket_server.py
|
| 161 |
+
|
| 162 |
+
# If PyAudio not installed
|
| 163 |
+
sudo apt-get install portaudio19-dev
|
| 164 |
+
pip install pyaudio
|
| 165 |
+
|
| 166 |
+
# Communicate with socket client
|
| 167 |
+
python src/f5_tts/socket_client.py
|
| 168 |
+
```
|
| 169 |
+
|
| 170 |
+
## Speech Editing
|
| 171 |
+
|
| 172 |
+
To test speech editing capabilities, use the following command:
|
| 173 |
+
|
| 174 |
+
```bash
|
| 175 |
+
python src/f5_tts/infer/speech_edit.py
|
| 176 |
+
```
|
| 177 |
+
|
src/f5_tts/infer/SHARED.md
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!-- omit in toc -->
|
| 2 |
+
# Shared Model Cards
|
| 3 |
+
|
| 4 |
+
<!-- omit in toc -->
|
| 5 |
+
### **Prerequisites of using**
|
| 6 |
+
- This document is serving as a quick lookup table for the community training/finetuning result, with various language support.
|
| 7 |
+
- The models in this repository are open source and are based on voluntary contributions from contributors.
|
| 8 |
+
- The use of models must be conditioned on respect for the respective creators. The convenience brought comes from their efforts.
|
| 9 |
+
|
| 10 |
+
<!-- omit in toc -->
|
| 11 |
+
### **Welcome to share here**
|
| 12 |
+
- Have a pretrained/finetuned result: model checkpoint (pruned best to facilitate inference, i.e. leave only `ema_model_state_dict`) and corresponding vocab file (for tokenization).
|
| 13 |
+
- Host a public [huggingface model repository](https://huggingface.co/new) and upload the model related files.
|
| 14 |
+
- Make a pull request adding a model card to the current page, i.e. `src\f5_tts\infer\SHARED.md`.
|
| 15 |
+
|
| 16 |
+
<!-- omit in toc -->
|
| 17 |
+
### Supported Languages
|
| 18 |
+
- [Multilingual](#multilingual)
|
| 19 |
+
- [F5-TTS v1 v0 Base @ zh \& en @ F5-TTS](#f5-tts-v1-v0-base--zh--en--f5-tts)
|
| 20 |
+
- [English](#english)
|
| 21 |
+
- [Finnish](#finnish)
|
| 22 |
+
- [F5-TTS Base @ fi @ AsmoKoskinen](#f5-tts-base--fi--asmokoskinen)
|
| 23 |
+
- [French](#french)
|
| 24 |
+
- [F5-TTS Base @ fr @ RASPIAUDIO](#f5-tts-base--fr--raspiaudio)
|
| 25 |
+
- [German](#german)
|
| 26 |
+
- [F5-TTS Base @ de @ hvoss-techfak](#f5-tts-base--de--hvoss-techfak)
|
| 27 |
+
- [Hindi](#hindi)
|
| 28 |
+
- [F5-TTS Small @ hi @ SPRINGLab](#f5-tts-small--hi--springlab)
|
| 29 |
+
- [Italian](#italian)
|
| 30 |
+
- [F5-TTS Base @ it @ alien79](#f5-tts-base--it--alien79)
|
| 31 |
+
- [Japanese](#japanese)
|
| 32 |
+
- [F5-TTS Base @ ja @ Jmica](#f5-tts-base--ja--jmica)
|
| 33 |
+
- [Mandarin](#mandarin)
|
| 34 |
+
- [Russian](#russian)
|
| 35 |
+
- [F5-TTS Base @ ru @ HotDro4illa](#f5-tts-base--ru--hotdro4illa)
|
| 36 |
+
- [Spanish](#spanish)
|
| 37 |
+
- [F5-TTS Base @ es @ jpgallegoar](#f5-tts-base--es--jpgallegoar)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
## Multilingual
|
| 41 |
+
|
| 42 |
+
#### F5-TTS v1 v0 Base @ zh & en @ F5-TTS
|
| 43 |
+
|Model|🤗Hugging Face|Data (Hours)|Model License|
|
| 44 |
+
|:---:|:------------:|:-----------:|:-------------:|
|
| 45 |
+
|F5-TTS v1 Base|[ckpt & vocab](https://huggingface.co/SWivid/F5-TTS/tree/main/F5TTS_v1_Base)|[Emilia 95K zh&en](https://huggingface.co/datasets/amphion/Emilia-Dataset/tree/fc71e07)|cc-by-nc-4.0|
|
| 46 |
+
|
| 47 |
+
```bash
|
| 48 |
+
Model: hf://SWivid/F5-TTS/F5TTS_v1_Base/model_1250000.safetensors
|
| 49 |
+
# A Variant Model: hf://SWivid/F5-TTS/F5TTS_v1_Base_no_zero_init/model_1250000.safetensors
|
| 50 |
+
Vocab: hf://SWivid/F5-TTS/F5TTS_v1_Base/vocab.txt
|
| 51 |
+
Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "conv_layers": 4}
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
|Model|🤗Hugging Face|Data (Hours)|Model License|
|
| 55 |
+
|:---:|:------------:|:-----------:|:-------------:|
|
| 56 |
+
|F5-TTS Base|[ckpt & vocab](https://huggingface.co/SWivid/F5-TTS/tree/main/F5TTS_Base)|[Emilia 95K zh&en](https://huggingface.co/datasets/amphion/Emilia-Dataset/tree/fc71e07)|cc-by-nc-4.0|
|
| 57 |
+
|
| 58 |
+
```bash
|
| 59 |
+
Model: hf://SWivid/F5-TTS/F5TTS_Base/model_1200000.safetensors
|
| 60 |
+
Vocab: hf://SWivid/F5-TTS/F5TTS_Base/vocab.txt
|
| 61 |
+
Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
*Other infos, e.g. Author info, Github repo, Link to some sampled results, Usage instruction, Tutorial (Blog, Video, etc.) ...*
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
## English
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
## Finnish
|
| 71 |
+
|
| 72 |
+
#### F5-TTS Base @ fi @ AsmoKoskinen
|
| 73 |
+
|Model|🤗Hugging Face|Data|Model License|
|
| 74 |
+
|:---:|:------------:|:-----------:|:-------------:|
|
| 75 |
+
|F5-TTS Base|[ckpt & vocab](https://huggingface.co/AsmoKoskinen/F5-TTS_Finnish_Model)|[Common Voice](https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0), [Vox Populi](https://huggingface.co/datasets/facebook/voxpopuli)|cc-by-nc-4.0|
|
| 76 |
+
|
| 77 |
+
```bash
|
| 78 |
+
Model: hf://AsmoKoskinen/F5-TTS_Finnish_Model/model_common_voice_fi_vox_populi_fi_20241206.safetensors
|
| 79 |
+
Vocab: hf://AsmoKoskinen/F5-TTS_Finnish_Model/vocab.txt
|
| 80 |
+
Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
|
| 81 |
+
```
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
## French
|
| 85 |
+
|
| 86 |
+
#### F5-TTS Base @ fr @ RASPIAUDIO
|
| 87 |
+
|Model|🤗Hugging Face|Data (Hours)|Model License|
|
| 88 |
+
|:---:|:------------:|:-----------:|:-------------:|
|
| 89 |
+
|F5-TTS Base|[ckpt & vocab](https://huggingface.co/RASPIAUDIO/F5-French-MixedSpeakers-reduced)|[LibriVox](https://librivox.org/)|cc-by-nc-4.0|
|
| 90 |
+
|
| 91 |
+
```bash
|
| 92 |
+
Model: hf://RASPIAUDIO/F5-French-MixedSpeakers-reduced/model_last_reduced.pt
|
| 93 |
+
Vocab: hf://RASPIAUDIO/F5-French-MixedSpeakers-reduced/vocab.txt
|
| 94 |
+
Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
- [Online Inference with Hugging Face Space](https://huggingface.co/spaces/RASPIAUDIO/f5-tts_french).
|
| 98 |
+
- [Tutorial video to train a new language model](https://www.youtube.com/watch?v=UO4usaOojys).
|
| 99 |
+
- [Discussion about this training can be found here](https://github.com/SWivid/F5-TTS/issues/434).
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
## German
|
| 103 |
+
|
| 104 |
+
#### F5-TTS Base @ de @ hvoss-techfak
|
| 105 |
+
|Model|🤗Hugging Face|Data (Hours)|Model License|
|
| 106 |
+
|:---:|:------------:|:-----------:|:-------------:|
|
| 107 |
+
|F5-TTS Base|[ckpt & vocab](https://huggingface.co/hvoss-techfak/F5-TTS-German)|[Mozilla Common Voice 19.0](https://commonvoice.mozilla.org/en/datasets) & 800 hours Crowdsourced |cc-by-nc-4.0|
|
| 108 |
+
|
| 109 |
+
```bash
|
| 110 |
+
Model: hf://hvoss-techfak/F5-TTS-German/model_f5tts_german.pt
|
| 111 |
+
Vocab: hf://hvoss-techfak/F5-TTS-German/vocab.txt
|
| 112 |
+
Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
|
| 113 |
+
```
|
| 114 |
+
|
| 115 |
+
- Finetuned by [@hvoss-techfak](https://github.com/hvoss-techfak)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
## Hindi
|
| 119 |
+
|
| 120 |
+
#### F5-TTS Small @ hi @ SPRINGLab
|
| 121 |
+
|Model|🤗Hugging Face|Data (Hours)|Model License|
|
| 122 |
+
|:---:|:------------:|:-----------:|:-------------:|
|
| 123 |
+
|F5-TTS Small|[ckpt & vocab](https://huggingface.co/SPRINGLab/F5-Hindi-24KHz)|[IndicTTS Hi](https://huggingface.co/datasets/SPRINGLab/IndicTTS-Hindi) & [IndicVoices-R Hi](https://huggingface.co/datasets/SPRINGLab/IndicVoices-R_Hindi) |cc-by-4.0|
|
| 124 |
+
|
| 125 |
+
```bash
|
| 126 |
+
Model: hf://SPRINGLab/F5-Hindi-24KHz/model_2500000.safetensors
|
| 127 |
+
Vocab: hf://SPRINGLab/F5-Hindi-24KHz/vocab.txt
|
| 128 |
+
Config: {"dim": 768, "depth": 18, "heads": 12, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
- Authors: SPRING Lab, Indian Institute of Technology, Madras
|
| 132 |
+
- Website: https://asr.iitm.ac.in/
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
## Italian
|
| 136 |
+
|
| 137 |
+
#### F5-TTS Base @ it @ alien79
|
| 138 |
+
|Model|🤗Hugging Face|Data|Model License|
|
| 139 |
+
|:---:|:------------:|:-----------:|:-------------:|
|
| 140 |
+
|F5-TTS Base|[ckpt & vocab](https://huggingface.co/alien79/F5-TTS-italian)|[ylacombe/cml-tts](https://huggingface.co/datasets/ylacombe/cml-tts) |cc-by-nc-4.0|
|
| 141 |
+
|
| 142 |
+
```bash
|
| 143 |
+
Model: hf://alien79/F5-TTS-italian/model_159600.safetensors
|
| 144 |
+
Vocab: hf://alien79/F5-TTS-italian/vocab.txt
|
| 145 |
+
Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
|
| 146 |
+
```
|
| 147 |
+
|
| 148 |
+
- Trained by [Mithril Man](https://github.com/MithrilMan)
|
| 149 |
+
- Model details on [hf project home](https://huggingface.co/alien79/F5-TTS-italian)
|
| 150 |
+
- Open to collaborations to further improve the model
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
## Japanese
|
| 154 |
+
|
| 155 |
+
#### F5-TTS Base @ ja @ Jmica
|
| 156 |
+
|Model|🤗Hugging Face|Data (Hours)|Model License|
|
| 157 |
+
|:---:|:------------:|:-----------:|:-------------:|
|
| 158 |
+
|F5-TTS Base|[ckpt & vocab](https://huggingface.co/Jmica/F5TTS/tree/main/JA_21999120)|[Emilia 1.7k JA](https://huggingface.co/datasets/amphion/Emilia-Dataset/tree/fc71e07) & [Galgame Dataset 5.4k](https://huggingface.co/datasets/OOPPEENN/Galgame_Dataset)|cc-by-nc-4.0|
|
| 159 |
+
|
| 160 |
+
```bash
|
| 161 |
+
Model: hf://Jmica/F5TTS/JA_21999120/model_21999120.pt
|
| 162 |
+
Vocab: hf://Jmica/F5TTS/JA_21999120/vocab_japanese.txt
|
| 163 |
+
Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
|
| 164 |
+
```
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
## Mandarin
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
## Russian
|
| 171 |
+
|
| 172 |
+
#### F5-TTS Base @ ru @ HotDro4illa
|
| 173 |
+
|Model|🤗Hugging Face|Data (Hours)|Model License|
|
| 174 |
+
|:---:|:------------:|:-----------:|:-------------:|
|
| 175 |
+
|F5-TTS Base|[ckpt & vocab](https://huggingface.co/hotstone228/F5-TTS-Russian)|[Common voice](https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0)|cc-by-nc-4.0|
|
| 176 |
+
|
| 177 |
+
```bash
|
| 178 |
+
Model: hf://hotstone228/F5-TTS-Russian/model_last.safetensors
|
| 179 |
+
Vocab: hf://hotstone228/F5-TTS-Russian/vocab.txt
|
| 180 |
+
Config: {"dim": 1024, "depth": 22, "heads": 16, "ff_mult": 2, "text_dim": 512, "text_mask_padding": False, "conv_layers": 4, "pe_attn_head": 1}
|
| 181 |
+
```
|
| 182 |
+
- Finetuned by [HotDro4illa](https://github.com/HotDro4illa)
|
| 183 |
+
- Any improvements are welcome
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
## Spanish
|
| 187 |
+
|
| 188 |
+
#### F5-TTS Base @ es @ jpgallegoar
|
| 189 |
+
|Model|🤗Hugging Face|Data (Hours)|Model License|
|
| 190 |
+
|:---:|:------------:|:-----------:|:-------------:|
|
| 191 |
+
|F5-TTS Base|[ckpt & vocab](https://huggingface.co/jpgallegoar/F5-Spanish)|[Voxpopuli](https://huggingface.co/datasets/facebook/voxpopuli) & Crowdsourced & TEDx, 218 hours|cc0-1.0|
|
| 192 |
+
|
| 193 |
+
- @jpgallegoar [GitHub repo](https://github.com/jpgallegoar/Spanish-F5), Jupyter Notebook and Gradio usage for Spanish model.
|
src/f5_tts/infer/__pycache__/infer_cli.cpython-311.pyc
ADDED
|
Binary file (15.2 kB). View file
|
|
|
src/f5_tts/infer/__pycache__/infer_cli_emotion.cpython-311.pyc
ADDED
|
Binary file (9.47 kB). View file
|
|
|
src/f5_tts/infer/__pycache__/infer_elevenlabs.cpython-311.pyc
ADDED
|
Binary file (4.53 kB). View file
|
|
|
src/f5_tts/infer/__pycache__/infer_emotion.cpython-311.pyc
ADDED
|
Binary file (11 kB). View file
|
|
|
src/f5_tts/infer/__pycache__/utils_infer.cpython-311.pyc
ADDED
|
Binary file (24.6 kB). View file
|
|
|
src/f5_tts/infer/examples/basic/basic.toml
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# F5TTS_v1_Base | E2TTS_Base
|
| 2 |
+
model = "F5TTS_v1_Base"
|
| 3 |
+
ref_audio = "infer/examples/basic/basic_ref_en.wav"
|
| 4 |
+
# If an empty "", transcribes the reference audio automatically.
|
| 5 |
+
ref_text = "Some call me nature, others call me mother nature."
|
| 6 |
+
gen_text = "I don't really care what you call me. I've been a silent spectator, watching species evolve, empires rise and fall. But always remember, I am mighty and enduring."
|
| 7 |
+
# File with text to generate. Ignores the text above.
|
| 8 |
+
gen_file = ""
|
| 9 |
+
remove_silence = false
|
| 10 |
+
output_dir = "tests"
|
| 11 |
+
output_file = "infer_cli_basic.wav"
|
src/f5_tts/infer/examples/basic/basic_ref_en.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0e22048e72414fcc1e6b6342e47a774d748a195ed34e4a5b3fcf416707f2b71
|
| 3 |
+
size 256018
|
src/f5_tts/infer/examples/basic/basic_ref_zh.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96724a113240d1f82c6ded1334122f0176b96c9226ccd3c919e625bcfd2a3ede
|
| 3 |
+
size 324558
|
src/f5_tts/infer/examples/multi/country.flac
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb15708b4b3875e37beec46591a5d89e1a9a63fdad3b8fe4a5c8738f4f554400
|
| 3 |
+
size 180321
|
src/f5_tts/infer/examples/multi/main.flac
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4abb1107771ce7e14926fde879b959dde6db6e572476b98684f04e45e978ab19
|
| 3 |
+
size 279219
|
src/f5_tts/infer/examples/multi/story.toml
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# F5TTS_v1_Base | E2TTS_Base
|
| 2 |
+
model = "F5TTS_v1_Base"
|
| 3 |
+
ref_audio = "infer/examples/multi/main.flac"
|
| 4 |
+
# If an empty "", transcribes the reference audio automatically.
|
| 5 |
+
ref_text = ""
|
| 6 |
+
gen_text = ""
|
| 7 |
+
# File with text to generate. Ignores the text above.
|
| 8 |
+
gen_file = "infer/examples/multi/story.txt"
|
| 9 |
+
remove_silence = true
|
| 10 |
+
output_dir = "tests"
|
| 11 |
+
output_file = "infer_cli_story.wav"
|
| 12 |
+
|
| 13 |
+
[voices.town]
|
| 14 |
+
ref_audio = "infer/examples/multi/town.flac"
|
| 15 |
+
ref_text = ""
|
| 16 |
+
speed = 0.8 # will ignore global speed
|
| 17 |
+
|
| 18 |
+
[voices.country]
|
| 19 |
+
ref_audio = "infer/examples/multi/country.flac"
|
| 20 |
+
ref_text = ""
|
src/f5_tts/infer/examples/multi/story.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
A Town Mouse and a Country Mouse were acquaintances, and the Country Mouse one day invited his friend to come and see him at his home in the fields. The Town Mouse came, and they sat down to a dinner of barleycorns and roots, the latter of which had a distinctly earthy flavour. The fare was not much to the taste of the guest, and presently he broke out with [town] "My poor dear friend, you live here no better than the ants! Now, you should just see how I fare! My larder is a regular horn of plenty. You must come and stay with me, and I promise you you shall live on the fat of the land." [main] So when he returned to town he took the Country Mouse with him, and showed him into a larder containing flour and oatmeal and figs and honey and dates. The Country Mouse had never seen anything like it, and sat down to enjoy the luxuries his friend provided: but before they had well begun, the door of the larder opened and someone came in. The two Mice scampered off and hid themselves in a narrow and exceedingly uncomfortable hole. Presently, when all was quiet, they ventured out again; but someone else came in, and off they scuttled again. This was too much for the visitor. [country] "Goodbye," [main] said he, [country] "I'm off. You live in the lap of luxury, I can see, but you are surrounded by dangers; whereas at home I can enjoy my simple dinner of roots and corn in peace."
|
src/f5_tts/infer/examples/multi/town.flac
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7d069b8ebd5180c3b30fde5d378f0a1ddac96722d62cf43537efc3c3f3a3ce8
|
| 3 |
+
size 229383
|
src/f5_tts/infer/examples/vocab.txt
ADDED
|
@@ -0,0 +1,2545 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
!
|
| 3 |
+
"
|
| 4 |
+
#
|
| 5 |
+
$
|
| 6 |
+
%
|
| 7 |
+
&
|
| 8 |
+
'
|
| 9 |
+
(
|
| 10 |
+
)
|
| 11 |
+
*
|
| 12 |
+
+
|
| 13 |
+
,
|
| 14 |
+
-
|
| 15 |
+
.
|
| 16 |
+
/
|
| 17 |
+
0
|
| 18 |
+
1
|
| 19 |
+
2
|
| 20 |
+
3
|
| 21 |
+
4
|
| 22 |
+
5
|
| 23 |
+
6
|
| 24 |
+
7
|
| 25 |
+
8
|
| 26 |
+
9
|
| 27 |
+
:
|
| 28 |
+
;
|
| 29 |
+
=
|
| 30 |
+
>
|
| 31 |
+
?
|
| 32 |
+
@
|
| 33 |
+
A
|
| 34 |
+
B
|
| 35 |
+
C
|
| 36 |
+
D
|
| 37 |
+
E
|
| 38 |
+
F
|
| 39 |
+
G
|
| 40 |
+
H
|
| 41 |
+
I
|
| 42 |
+
J
|
| 43 |
+
K
|
| 44 |
+
L
|
| 45 |
+
M
|
| 46 |
+
N
|
| 47 |
+
O
|
| 48 |
+
P
|
| 49 |
+
Q
|
| 50 |
+
R
|
| 51 |
+
S
|
| 52 |
+
T
|
| 53 |
+
U
|
| 54 |
+
V
|
| 55 |
+
W
|
| 56 |
+
X
|
| 57 |
+
Y
|
| 58 |
+
Z
|
| 59 |
+
[
|
| 60 |
+
\
|
| 61 |
+
]
|
| 62 |
+
_
|
| 63 |
+
a
|
| 64 |
+
a1
|
| 65 |
+
ai1
|
| 66 |
+
ai2
|
| 67 |
+
ai3
|
| 68 |
+
ai4
|
| 69 |
+
an1
|
| 70 |
+
an3
|
| 71 |
+
an4
|
| 72 |
+
ang1
|
| 73 |
+
ang2
|
| 74 |
+
ang4
|
| 75 |
+
ao1
|
| 76 |
+
ao2
|
| 77 |
+
ao3
|
| 78 |
+
ao4
|
| 79 |
+
b
|
| 80 |
+
ba
|
| 81 |
+
ba1
|
| 82 |
+
ba2
|
| 83 |
+
ba3
|
| 84 |
+
ba4
|
| 85 |
+
bai1
|
| 86 |
+
bai2
|
| 87 |
+
bai3
|
| 88 |
+
bai4
|
| 89 |
+
ban1
|
| 90 |
+
ban2
|
| 91 |
+
ban3
|
| 92 |
+
ban4
|
| 93 |
+
bang1
|
| 94 |
+
bang2
|
| 95 |
+
bang3
|
| 96 |
+
bang4
|
| 97 |
+
bao1
|
| 98 |
+
bao2
|
| 99 |
+
bao3
|
| 100 |
+
bao4
|
| 101 |
+
bei
|
| 102 |
+
bei1
|
| 103 |
+
bei2
|
| 104 |
+
bei3
|
| 105 |
+
bei4
|
| 106 |
+
ben1
|
| 107 |
+
ben2
|
| 108 |
+
ben3
|
| 109 |
+
ben4
|
| 110 |
+
beng
|
| 111 |
+
beng1
|
| 112 |
+
beng2
|
| 113 |
+
beng3
|
| 114 |
+
beng4
|
| 115 |
+
bi1
|
| 116 |
+
bi2
|
| 117 |
+
bi3
|
| 118 |
+
bi4
|
| 119 |
+
bian1
|
| 120 |
+
bian2
|
| 121 |
+
bian3
|
| 122 |
+
bian4
|
| 123 |
+
biao1
|
| 124 |
+
biao2
|
| 125 |
+
biao3
|
| 126 |
+
bie1
|
| 127 |
+
bie2
|
| 128 |
+
bie3
|
| 129 |
+
bie4
|
| 130 |
+
bin1
|
| 131 |
+
bin4
|
| 132 |
+
bing1
|
| 133 |
+
bing2
|
| 134 |
+
bing3
|
| 135 |
+
bing4
|
| 136 |
+
bo
|
| 137 |
+
bo1
|
| 138 |
+
bo2
|
| 139 |
+
bo3
|
| 140 |
+
bo4
|
| 141 |
+
bu2
|
| 142 |
+
bu3
|
| 143 |
+
bu4
|
| 144 |
+
c
|
| 145 |
+
ca1
|
| 146 |
+
cai1
|
| 147 |
+
cai2
|
| 148 |
+
cai3
|
| 149 |
+
cai4
|
| 150 |
+
can1
|
| 151 |
+
can2
|
| 152 |
+
can3
|
| 153 |
+
can4
|
| 154 |
+
cang1
|
| 155 |
+
cang2
|
| 156 |
+
cao1
|
| 157 |
+
cao2
|
| 158 |
+
cao3
|
| 159 |
+
ce4
|
| 160 |
+
cen1
|
| 161 |
+
cen2
|
| 162 |
+
ceng1
|
| 163 |
+
ceng2
|
| 164 |
+
ceng4
|
| 165 |
+
cha1
|
| 166 |
+
cha2
|
| 167 |
+
cha3
|
| 168 |
+
cha4
|
| 169 |
+
chai1
|
| 170 |
+
chai2
|
| 171 |
+
chan1
|
| 172 |
+
chan2
|
| 173 |
+
chan3
|
| 174 |
+
chan4
|
| 175 |
+
chang1
|
| 176 |
+
chang2
|
| 177 |
+
chang3
|
| 178 |
+
chang4
|
| 179 |
+
chao1
|
| 180 |
+
chao2
|
| 181 |
+
chao3
|
| 182 |
+
che1
|
| 183 |
+
che2
|
| 184 |
+
che3
|
| 185 |
+
che4
|
| 186 |
+
chen1
|
| 187 |
+
chen2
|
| 188 |
+
chen3
|
| 189 |
+
chen4
|
| 190 |
+
cheng1
|
| 191 |
+
cheng2
|
| 192 |
+
cheng3
|
| 193 |
+
cheng4
|
| 194 |
+
chi1
|
| 195 |
+
chi2
|
| 196 |
+
chi3
|
| 197 |
+
chi4
|
| 198 |
+
chong1
|
| 199 |
+
chong2
|
| 200 |
+
chong3
|
| 201 |
+
chong4
|
| 202 |
+
chou1
|
| 203 |
+
chou2
|
| 204 |
+
chou3
|
| 205 |
+
chou4
|
| 206 |
+
chu1
|
| 207 |
+
chu2
|
| 208 |
+
chu3
|
| 209 |
+
chu4
|
| 210 |
+
chua1
|
| 211 |
+
chuai1
|
| 212 |
+
chuai2
|
| 213 |
+
chuai3
|
| 214 |
+
chuai4
|
| 215 |
+
chuan1
|
| 216 |
+
chuan2
|
| 217 |
+
chuan3
|
| 218 |
+
chuan4
|
| 219 |
+
chuang1
|
| 220 |
+
chuang2
|
| 221 |
+
chuang3
|
| 222 |
+
chuang4
|
| 223 |
+
chui1
|
| 224 |
+
chui2
|
| 225 |
+
chun1
|
| 226 |
+
chun2
|
| 227 |
+
chun3
|
| 228 |
+
chuo1
|
| 229 |
+
chuo4
|
| 230 |
+
ci1
|
| 231 |
+
ci2
|
| 232 |
+
ci3
|
| 233 |
+
ci4
|
| 234 |
+
cong1
|
| 235 |
+
cong2
|
| 236 |
+
cou4
|
| 237 |
+
cu1
|
| 238 |
+
cu4
|
| 239 |
+
cuan1
|
| 240 |
+
cuan2
|
| 241 |
+
cuan4
|
| 242 |
+
cui1
|
| 243 |
+
cui3
|
| 244 |
+
cui4
|
| 245 |
+
cun1
|
| 246 |
+
cun2
|
| 247 |
+
cun4
|
| 248 |
+
cuo1
|
| 249 |
+
cuo2
|
| 250 |
+
cuo4
|
| 251 |
+
d
|
| 252 |
+
da
|
| 253 |
+
da1
|
| 254 |
+
da2
|
| 255 |
+
da3
|
| 256 |
+
da4
|
| 257 |
+
dai1
|
| 258 |
+
dai2
|
| 259 |
+
dai3
|
| 260 |
+
dai4
|
| 261 |
+
dan1
|
| 262 |
+
dan2
|
| 263 |
+
dan3
|
| 264 |
+
dan4
|
| 265 |
+
dang1
|
| 266 |
+
dang2
|
| 267 |
+
dang3
|
| 268 |
+
dang4
|
| 269 |
+
dao1
|
| 270 |
+
dao2
|
| 271 |
+
dao3
|
| 272 |
+
dao4
|
| 273 |
+
de
|
| 274 |
+
de1
|
| 275 |
+
de2
|
| 276 |
+
dei3
|
| 277 |
+
den4
|
| 278 |
+
deng1
|
| 279 |
+
deng2
|
| 280 |
+
deng3
|
| 281 |
+
deng4
|
| 282 |
+
di1
|
| 283 |
+
di2
|
| 284 |
+
di3
|
| 285 |
+
di4
|
| 286 |
+
dia3
|
| 287 |
+
dian1
|
| 288 |
+
dian2
|
| 289 |
+
dian3
|
| 290 |
+
dian4
|
| 291 |
+
diao1
|
| 292 |
+
diao3
|
| 293 |
+
diao4
|
| 294 |
+
die1
|
| 295 |
+
die2
|
| 296 |
+
die4
|
| 297 |
+
ding1
|
| 298 |
+
ding2
|
| 299 |
+
ding3
|
| 300 |
+
ding4
|
| 301 |
+
diu1
|
| 302 |
+
dong1
|
| 303 |
+
dong3
|
| 304 |
+
dong4
|
| 305 |
+
dou1
|
| 306 |
+
dou2
|
| 307 |
+
dou3
|
| 308 |
+
dou4
|
| 309 |
+
du1
|
| 310 |
+
du2
|
| 311 |
+
du3
|
| 312 |
+
du4
|
| 313 |
+
duan1
|
| 314 |
+
duan2
|
| 315 |
+
duan3
|
| 316 |
+
duan4
|
| 317 |
+
dui1
|
| 318 |
+
dui4
|
| 319 |
+
dun1
|
| 320 |
+
dun3
|
| 321 |
+
dun4
|
| 322 |
+
duo1
|
| 323 |
+
duo2
|
| 324 |
+
duo3
|
| 325 |
+
duo4
|
| 326 |
+
e
|
| 327 |
+
e1
|
| 328 |
+
e2
|
| 329 |
+
e3
|
| 330 |
+
e4
|
| 331 |
+
ei2
|
| 332 |
+
en1
|
| 333 |
+
en4
|
| 334 |
+
er
|
| 335 |
+
er2
|
| 336 |
+
er3
|
| 337 |
+
er4
|
| 338 |
+
f
|
| 339 |
+
fa1
|
| 340 |
+
fa2
|
| 341 |
+
fa3
|
| 342 |
+
fa4
|
| 343 |
+
fan1
|
| 344 |
+
fan2
|
| 345 |
+
fan3
|
| 346 |
+
fan4
|
| 347 |
+
fang1
|
| 348 |
+
fang2
|
| 349 |
+
fang3
|
| 350 |
+
fang4
|
| 351 |
+
fei1
|
| 352 |
+
fei2
|
| 353 |
+
fei3
|
| 354 |
+
fei4
|
| 355 |
+
fen1
|
| 356 |
+
fen2
|
| 357 |
+
fen3
|
| 358 |
+
fen4
|
| 359 |
+
feng1
|
| 360 |
+
feng2
|
| 361 |
+
feng3
|
| 362 |
+
feng4
|
| 363 |
+
fo2
|
| 364 |
+
fou2
|
| 365 |
+
fou3
|
| 366 |
+
fu1
|
| 367 |
+
fu2
|
| 368 |
+
fu3
|
| 369 |
+
fu4
|
| 370 |
+
g
|
| 371 |
+
ga1
|
| 372 |
+
ga2
|
| 373 |
+
ga3
|
| 374 |
+
ga4
|
| 375 |
+
gai1
|
| 376 |
+
gai2
|
| 377 |
+
gai3
|
| 378 |
+
gai4
|
| 379 |
+
gan1
|
| 380 |
+
gan2
|
| 381 |
+
gan3
|
| 382 |
+
gan4
|
| 383 |
+
gang1
|
| 384 |
+
gang2
|
| 385 |
+
gang3
|
| 386 |
+
gang4
|
| 387 |
+
gao1
|
| 388 |
+
gao2
|
| 389 |
+
gao3
|
| 390 |
+
gao4
|
| 391 |
+
ge1
|
| 392 |
+
ge2
|
| 393 |
+
ge3
|
| 394 |
+
ge4
|
| 395 |
+
gei2
|
| 396 |
+
gei3
|
| 397 |
+
gen1
|
| 398 |
+
gen2
|
| 399 |
+
gen3
|
| 400 |
+
gen4
|
| 401 |
+
geng1
|
| 402 |
+
geng3
|
| 403 |
+
geng4
|
| 404 |
+
gong1
|
| 405 |
+
gong3
|
| 406 |
+
gong4
|
| 407 |
+
gou1
|
| 408 |
+
gou2
|
| 409 |
+
gou3
|
| 410 |
+
gou4
|
| 411 |
+
gu
|
| 412 |
+
gu1
|
| 413 |
+
gu2
|
| 414 |
+
gu3
|
| 415 |
+
gu4
|
| 416 |
+
gua1
|
| 417 |
+
gua2
|
| 418 |
+
gua3
|
| 419 |
+
gua4
|
| 420 |
+
guai1
|
| 421 |
+
guai2
|
| 422 |
+
guai3
|
| 423 |
+
guai4
|
| 424 |
+
guan1
|
| 425 |
+
guan2
|
| 426 |
+
guan3
|
| 427 |
+
guan4
|
| 428 |
+
guang1
|
| 429 |
+
guang2
|
| 430 |
+
guang3
|
| 431 |
+
guang4
|
| 432 |
+
gui1
|
| 433 |
+
gui2
|
| 434 |
+
gui3
|
| 435 |
+
gui4
|
| 436 |
+
gun3
|
| 437 |
+
gun4
|
| 438 |
+
guo1
|
| 439 |
+
guo2
|
| 440 |
+
guo3
|
| 441 |
+
guo4
|
| 442 |
+
h
|
| 443 |
+
ha1
|
| 444 |
+
ha2
|
| 445 |
+
ha3
|
| 446 |
+
hai1
|
| 447 |
+
hai2
|
| 448 |
+
hai3
|
| 449 |
+
hai4
|
| 450 |
+
han1
|
| 451 |
+
han2
|
| 452 |
+
han3
|
| 453 |
+
han4
|
| 454 |
+
hang1
|
| 455 |
+
hang2
|
| 456 |
+
hang4
|
| 457 |
+
hao1
|
| 458 |
+
hao2
|
| 459 |
+
hao3
|
| 460 |
+
hao4
|
| 461 |
+
he1
|
| 462 |
+
he2
|
| 463 |
+
he4
|
| 464 |
+
hei1
|
| 465 |
+
hen2
|
| 466 |
+
hen3
|
| 467 |
+
hen4
|
| 468 |
+
heng1
|
| 469 |
+
heng2
|
| 470 |
+
heng4
|
| 471 |
+
hong1
|
| 472 |
+
hong2
|
| 473 |
+
hong3
|
| 474 |
+
hong4
|
| 475 |
+
hou1
|
| 476 |
+
hou2
|
| 477 |
+
hou3
|
| 478 |
+
hou4
|
| 479 |
+
hu1
|
| 480 |
+
hu2
|
| 481 |
+
hu3
|
| 482 |
+
hu4
|
| 483 |
+
hua1
|
| 484 |
+
hua2
|
| 485 |
+
hua4
|
| 486 |
+
huai2
|
| 487 |
+
huai4
|
| 488 |
+
huan1
|
| 489 |
+
huan2
|
| 490 |
+
huan3
|
| 491 |
+
huan4
|
| 492 |
+
huang1
|
| 493 |
+
huang2
|
| 494 |
+
huang3
|
| 495 |
+
huang4
|
| 496 |
+
hui1
|
| 497 |
+
hui2
|
| 498 |
+
hui3
|
| 499 |
+
hui4
|
| 500 |
+
hun1
|
| 501 |
+
hun2
|
| 502 |
+
hun4
|
| 503 |
+
huo
|
| 504 |
+
huo1
|
| 505 |
+
huo2
|
| 506 |
+
huo3
|
| 507 |
+
huo4
|
| 508 |
+
i
|
| 509 |
+
j
|
| 510 |
+
ji1
|
| 511 |
+
ji2
|
| 512 |
+
ji3
|
| 513 |
+
ji4
|
| 514 |
+
jia
|
| 515 |
+
jia1
|
| 516 |
+
jia2
|
| 517 |
+
jia3
|
| 518 |
+
jia4
|
| 519 |
+
jian1
|
| 520 |
+
jian2
|
| 521 |
+
jian3
|
| 522 |
+
jian4
|
| 523 |
+
jiang1
|
| 524 |
+
jiang2
|
| 525 |
+
jiang3
|
| 526 |
+
jiang4
|
| 527 |
+
jiao1
|
| 528 |
+
jiao2
|
| 529 |
+
jiao3
|
| 530 |
+
jiao4
|
| 531 |
+
jie1
|
| 532 |
+
jie2
|
| 533 |
+
jie3
|
| 534 |
+
jie4
|
| 535 |
+
jin1
|
| 536 |
+
jin2
|
| 537 |
+
jin3
|
| 538 |
+
jin4
|
| 539 |
+
jing1
|
| 540 |
+
jing2
|
| 541 |
+
jing3
|
| 542 |
+
jing4
|
| 543 |
+
jiong3
|
| 544 |
+
jiu1
|
| 545 |
+
jiu2
|
| 546 |
+
jiu3
|
| 547 |
+
jiu4
|
| 548 |
+
ju1
|
| 549 |
+
ju2
|
| 550 |
+
ju3
|
| 551 |
+
ju4
|
| 552 |
+
juan1
|
| 553 |
+
juan2
|
| 554 |
+
juan3
|
| 555 |
+
juan4
|
| 556 |
+
jue1
|
| 557 |
+
jue2
|
| 558 |
+
jue4
|
| 559 |
+
jun1
|
| 560 |
+
jun4
|
| 561 |
+
k
|
| 562 |
+
ka1
|
| 563 |
+
ka2
|
| 564 |
+
ka3
|
| 565 |
+
kai1
|
| 566 |
+
kai2
|
| 567 |
+
kai3
|
| 568 |
+
kai4
|
| 569 |
+
kan1
|
| 570 |
+
kan2
|
| 571 |
+
kan3
|
| 572 |
+
kan4
|
| 573 |
+
kang1
|
| 574 |
+
kang2
|
| 575 |
+
kang4
|
| 576 |
+
kao1
|
| 577 |
+
kao2
|
| 578 |
+
kao3
|
| 579 |
+
kao4
|
| 580 |
+
ke1
|
| 581 |
+
ke2
|
| 582 |
+
ke3
|
| 583 |
+
ke4
|
| 584 |
+
ken3
|
| 585 |
+
keng1
|
| 586 |
+
kong1
|
| 587 |
+
kong3
|
| 588 |
+
kong4
|
| 589 |
+
kou1
|
| 590 |
+
kou2
|
| 591 |
+
kou3
|
| 592 |
+
kou4
|
| 593 |
+
ku1
|
| 594 |
+
ku2
|
| 595 |
+
ku3
|
| 596 |
+
ku4
|
| 597 |
+
kua1
|
| 598 |
+
kua3
|
| 599 |
+
kua4
|
| 600 |
+
kuai3
|
| 601 |
+
kuai4
|
| 602 |
+
kuan1
|
| 603 |
+
kuan2
|
| 604 |
+
kuan3
|
| 605 |
+
kuang1
|
| 606 |
+
kuang2
|
| 607 |
+
kuang4
|
| 608 |
+
kui1
|
| 609 |
+
kui2
|
| 610 |
+
kui3
|
| 611 |
+
kui4
|
| 612 |
+
kun1
|
| 613 |
+
kun3
|
| 614 |
+
kun4
|
| 615 |
+
kuo4
|
| 616 |
+
l
|
| 617 |
+
la
|
| 618 |
+
la1
|
| 619 |
+
la2
|
| 620 |
+
la3
|
| 621 |
+
la4
|
| 622 |
+
lai2
|
| 623 |
+
lai4
|
| 624 |
+
lan2
|
| 625 |
+
lan3
|
| 626 |
+
lan4
|
| 627 |
+
lang1
|
| 628 |
+
lang2
|
| 629 |
+
lang3
|
| 630 |
+
lang4
|
| 631 |
+
lao1
|
| 632 |
+
lao2
|
| 633 |
+
lao3
|
| 634 |
+
lao4
|
| 635 |
+
le
|
| 636 |
+
le1
|
| 637 |
+
le4
|
| 638 |
+
lei
|
| 639 |
+
lei1
|
| 640 |
+
lei2
|
| 641 |
+
lei3
|
| 642 |
+
lei4
|
| 643 |
+
leng1
|
| 644 |
+
leng2
|
| 645 |
+
leng3
|
| 646 |
+
leng4
|
| 647 |
+
li
|
| 648 |
+
li1
|
| 649 |
+
li2
|
| 650 |
+
li3
|
| 651 |
+
li4
|
| 652 |
+
lia3
|
| 653 |
+
lian2
|
| 654 |
+
lian3
|
| 655 |
+
lian4
|
| 656 |
+
liang2
|
| 657 |
+
liang3
|
| 658 |
+
liang4
|
| 659 |
+
liao1
|
| 660 |
+
liao2
|
| 661 |
+
liao3
|
| 662 |
+
liao4
|
| 663 |
+
lie1
|
| 664 |
+
lie2
|
| 665 |
+
lie3
|
| 666 |
+
lie4
|
| 667 |
+
lin1
|
| 668 |
+
lin2
|
| 669 |
+
lin3
|
| 670 |
+
lin4
|
| 671 |
+
ling2
|
| 672 |
+
ling3
|
| 673 |
+
ling4
|
| 674 |
+
liu1
|
| 675 |
+
liu2
|
| 676 |
+
liu3
|
| 677 |
+
liu4
|
| 678 |
+
long1
|
| 679 |
+
long2
|
| 680 |
+
long3
|
| 681 |
+
long4
|
| 682 |
+
lou1
|
| 683 |
+
lou2
|
| 684 |
+
lou3
|
| 685 |
+
lou4
|
| 686 |
+
lu1
|
| 687 |
+
lu2
|
| 688 |
+
lu3
|
| 689 |
+
lu4
|
| 690 |
+
luan2
|
| 691 |
+
luan3
|
| 692 |
+
luan4
|
| 693 |
+
lun1
|
| 694 |
+
lun2
|
| 695 |
+
lun4
|
| 696 |
+
luo1
|
| 697 |
+
luo2
|
| 698 |
+
luo3
|
| 699 |
+
luo4
|
| 700 |
+
lv2
|
| 701 |
+
lv3
|
| 702 |
+
lv4
|
| 703 |
+
lve3
|
| 704 |
+
lve4
|
| 705 |
+
m
|
| 706 |
+
ma
|
| 707 |
+
ma1
|
| 708 |
+
ma2
|
| 709 |
+
ma3
|
| 710 |
+
ma4
|
| 711 |
+
mai2
|
| 712 |
+
mai3
|
| 713 |
+
mai4
|
| 714 |
+
man1
|
| 715 |
+
man2
|
| 716 |
+
man3
|
| 717 |
+
man4
|
| 718 |
+
mang2
|
| 719 |
+
mang3
|
| 720 |
+
mao1
|
| 721 |
+
mao2
|
| 722 |
+
mao3
|
| 723 |
+
mao4
|
| 724 |
+
me
|
| 725 |
+
mei2
|
| 726 |
+
mei3
|
| 727 |
+
mei4
|
| 728 |
+
men
|
| 729 |
+
men1
|
| 730 |
+
men2
|
| 731 |
+
men4
|
| 732 |
+
meng
|
| 733 |
+
meng1
|
| 734 |
+
meng2
|
| 735 |
+
meng3
|
| 736 |
+
meng4
|
| 737 |
+
mi1
|
| 738 |
+
mi2
|
| 739 |
+
mi3
|
| 740 |
+
mi4
|
| 741 |
+
mian2
|
| 742 |
+
mian3
|
| 743 |
+
mian4
|
| 744 |
+
miao1
|
| 745 |
+
miao2
|
| 746 |
+
miao3
|
| 747 |
+
miao4
|
| 748 |
+
mie1
|
| 749 |
+
mie4
|
| 750 |
+
min2
|
| 751 |
+
min3
|
| 752 |
+
ming2
|
| 753 |
+
ming3
|
| 754 |
+
ming4
|
| 755 |
+
miu4
|
| 756 |
+
mo1
|
| 757 |
+
mo2
|
| 758 |
+
mo3
|
| 759 |
+
mo4
|
| 760 |
+
mou1
|
| 761 |
+
mou2
|
| 762 |
+
mou3
|
| 763 |
+
mu2
|
| 764 |
+
mu3
|
| 765 |
+
mu4
|
| 766 |
+
n
|
| 767 |
+
n2
|
| 768 |
+
na1
|
| 769 |
+
na2
|
| 770 |
+
na3
|
| 771 |
+
na4
|
| 772 |
+
nai2
|
| 773 |
+
nai3
|
| 774 |
+
nai4
|
| 775 |
+
nan1
|
| 776 |
+
nan2
|
| 777 |
+
nan3
|
| 778 |
+
nan4
|
| 779 |
+
nang1
|
| 780 |
+
nang2
|
| 781 |
+
nang3
|
| 782 |
+
nao1
|
| 783 |
+
nao2
|
| 784 |
+
nao3
|
| 785 |
+
nao4
|
| 786 |
+
ne
|
| 787 |
+
ne2
|
| 788 |
+
ne4
|
| 789 |
+
nei3
|
| 790 |
+
nei4
|
| 791 |
+
nen4
|
| 792 |
+
neng2
|
| 793 |
+
ni1
|
| 794 |
+
ni2
|
| 795 |
+
ni3
|
| 796 |
+
ni4
|
| 797 |
+
nian1
|
| 798 |
+
nian2
|
| 799 |
+
nian3
|
| 800 |
+
nian4
|
| 801 |
+
niang2
|
| 802 |
+
niang4
|
| 803 |
+
niao2
|
| 804 |
+
niao3
|
| 805 |
+
niao4
|
| 806 |
+
nie1
|
| 807 |
+
nie4
|
| 808 |
+
nin2
|
| 809 |
+
ning2
|
| 810 |
+
ning3
|
| 811 |
+
ning4
|
| 812 |
+
niu1
|
| 813 |
+
niu2
|
| 814 |
+
niu3
|
| 815 |
+
niu4
|
| 816 |
+
nong2
|
| 817 |
+
nong4
|
| 818 |
+
nou4
|
| 819 |
+
nu2
|
| 820 |
+
nu3
|
| 821 |
+
nu4
|
| 822 |
+
nuan3
|
| 823 |
+
nuo2
|
| 824 |
+
nuo4
|
| 825 |
+
nv2
|
| 826 |
+
nv3
|
| 827 |
+
nve4
|
| 828 |
+
o
|
| 829 |
+
o1
|
| 830 |
+
o2
|
| 831 |
+
ou1
|
| 832 |
+
ou2
|
| 833 |
+
ou3
|
| 834 |
+
ou4
|
| 835 |
+
p
|
| 836 |
+
pa1
|
| 837 |
+
pa2
|
| 838 |
+
pa4
|
| 839 |
+
pai1
|
| 840 |
+
pai2
|
| 841 |
+
pai3
|
| 842 |
+
pai4
|
| 843 |
+
pan1
|
| 844 |
+
pan2
|
| 845 |
+
pan4
|
| 846 |
+
pang1
|
| 847 |
+
pang2
|
| 848 |
+
pang4
|
| 849 |
+
pao1
|
| 850 |
+
pao2
|
| 851 |
+
pao3
|
| 852 |
+
pao4
|
| 853 |
+
pei1
|
| 854 |
+
pei2
|
| 855 |
+
pei4
|
| 856 |
+
pen1
|
| 857 |
+
pen2
|
| 858 |
+
pen4
|
| 859 |
+
peng1
|
| 860 |
+
peng2
|
| 861 |
+
peng3
|
| 862 |
+
peng4
|
| 863 |
+
pi1
|
| 864 |
+
pi2
|
| 865 |
+
pi3
|
| 866 |
+
pi4
|
| 867 |
+
pian1
|
| 868 |
+
pian2
|
| 869 |
+
pian4
|
| 870 |
+
piao1
|
| 871 |
+
piao2
|
| 872 |
+
piao3
|
| 873 |
+
piao4
|
| 874 |
+
pie1
|
| 875 |
+
pie2
|
| 876 |
+
pie3
|
| 877 |
+
pin1
|
| 878 |
+
pin2
|
| 879 |
+
pin3
|
| 880 |
+
pin4
|
| 881 |
+
ping1
|
| 882 |
+
ping2
|
| 883 |
+
po1
|
| 884 |
+
po2
|
| 885 |
+
po3
|
| 886 |
+
po4
|
| 887 |
+
pou1
|
| 888 |
+
pu1
|
| 889 |
+
pu2
|
| 890 |
+
pu3
|
| 891 |
+
pu4
|
| 892 |
+
q
|
| 893 |
+
qi1
|
| 894 |
+
qi2
|
| 895 |
+
qi3
|
| 896 |
+
qi4
|
| 897 |
+
qia1
|
| 898 |
+
qia3
|
| 899 |
+
qia4
|
| 900 |
+
qian1
|
| 901 |
+
qian2
|
| 902 |
+
qian3
|
| 903 |
+
qian4
|
| 904 |
+
qiang1
|
| 905 |
+
qiang2
|
| 906 |
+
qiang3
|
| 907 |
+
qiang4
|
| 908 |
+
qiao1
|
| 909 |
+
qiao2
|
| 910 |
+
qiao3
|
| 911 |
+
qiao4
|
| 912 |
+
qie1
|
| 913 |
+
qie2
|
| 914 |
+
qie3
|
| 915 |
+
qie4
|
| 916 |
+
qin1
|
| 917 |
+
qin2
|
| 918 |
+
qin3
|
| 919 |
+
qin4
|
| 920 |
+
qing1
|
| 921 |
+
qing2
|
| 922 |
+
qing3
|
| 923 |
+
qing4
|
| 924 |
+
qiong1
|
| 925 |
+
qiong2
|
| 926 |
+
qiu1
|
| 927 |
+
qiu2
|
| 928 |
+
qiu3
|
| 929 |
+
qu1
|
| 930 |
+
qu2
|
| 931 |
+
qu3
|
| 932 |
+
qu4
|
| 933 |
+
quan1
|
| 934 |
+
quan2
|
| 935 |
+
quan3
|
| 936 |
+
quan4
|
| 937 |
+
que1
|
| 938 |
+
que2
|
| 939 |
+
que4
|
| 940 |
+
qun2
|
| 941 |
+
r
|
| 942 |
+
ran2
|
| 943 |
+
ran3
|
| 944 |
+
rang1
|
| 945 |
+
rang2
|
| 946 |
+
rang3
|
| 947 |
+
rang4
|
| 948 |
+
rao2
|
| 949 |
+
rao3
|
| 950 |
+
rao4
|
| 951 |
+
re2
|
| 952 |
+
re3
|
| 953 |
+
re4
|
| 954 |
+
ren2
|
| 955 |
+
ren3
|
| 956 |
+
ren4
|
| 957 |
+
reng1
|
| 958 |
+
reng2
|
| 959 |
+
ri4
|
| 960 |
+
rong1
|
| 961 |
+
rong2
|
| 962 |
+
rong3
|
| 963 |
+
rou2
|
| 964 |
+
rou4
|
| 965 |
+
ru2
|
| 966 |
+
ru3
|
| 967 |
+
ru4
|
| 968 |
+
ruan2
|
| 969 |
+
ruan3
|
| 970 |
+
rui3
|
| 971 |
+
rui4
|
| 972 |
+
run4
|
| 973 |
+
ruo4
|
| 974 |
+
s
|
| 975 |
+
sa1
|
| 976 |
+
sa2
|
| 977 |
+
sa3
|
| 978 |
+
sa4
|
| 979 |
+
sai1
|
| 980 |
+
sai4
|
| 981 |
+
san1
|
| 982 |
+
san2
|
| 983 |
+
san3
|
| 984 |
+
san4
|
| 985 |
+
sang1
|
| 986 |
+
sang3
|
| 987 |
+
sang4
|
| 988 |
+
sao1
|
| 989 |
+
sao2
|
| 990 |
+
sao3
|
| 991 |
+
sao4
|
| 992 |
+
se4
|
| 993 |
+
sen1
|
| 994 |
+
seng1
|
| 995 |
+
sha1
|
| 996 |
+
sha2
|
| 997 |
+
sha3
|
| 998 |
+
sha4
|
| 999 |
+
shai1
|
| 1000 |
+
shai2
|
| 1001 |
+
shai3
|
| 1002 |
+
shai4
|
| 1003 |
+
shan1
|
| 1004 |
+
shan3
|
| 1005 |
+
shan4
|
| 1006 |
+
shang
|
| 1007 |
+
shang1
|
| 1008 |
+
shang3
|
| 1009 |
+
shang4
|
| 1010 |
+
shao1
|
| 1011 |
+
shao2
|
| 1012 |
+
shao3
|
| 1013 |
+
shao4
|
| 1014 |
+
she1
|
| 1015 |
+
she2
|
| 1016 |
+
she3
|
| 1017 |
+
she4
|
| 1018 |
+
shei2
|
| 1019 |
+
shen1
|
| 1020 |
+
shen2
|
| 1021 |
+
shen3
|
| 1022 |
+
shen4
|
| 1023 |
+
sheng1
|
| 1024 |
+
sheng2
|
| 1025 |
+
sheng3
|
| 1026 |
+
sheng4
|
| 1027 |
+
shi
|
| 1028 |
+
shi1
|
| 1029 |
+
shi2
|
| 1030 |
+
shi3
|
| 1031 |
+
shi4
|
| 1032 |
+
shou1
|
| 1033 |
+
shou2
|
| 1034 |
+
shou3
|
| 1035 |
+
shou4
|
| 1036 |
+
shu1
|
| 1037 |
+
shu2
|
| 1038 |
+
shu3
|
| 1039 |
+
shu4
|
| 1040 |
+
shua1
|
| 1041 |
+
shua2
|
| 1042 |
+
shua3
|
| 1043 |
+
shua4
|
| 1044 |
+
shuai1
|
| 1045 |
+
shuai3
|
| 1046 |
+
shuai4
|
| 1047 |
+
shuan1
|
| 1048 |
+
shuan4
|
| 1049 |
+
shuang1
|
| 1050 |
+
shuang3
|
| 1051 |
+
shui2
|
| 1052 |
+
shui3
|
| 1053 |
+
shui4
|
| 1054 |
+
shun3
|
| 1055 |
+
shun4
|
| 1056 |
+
shuo1
|
| 1057 |
+
shuo4
|
| 1058 |
+
si1
|
| 1059 |
+
si2
|
| 1060 |
+
si3
|
| 1061 |
+
si4
|
| 1062 |
+
song1
|
| 1063 |
+
song3
|
| 1064 |
+
song4
|
| 1065 |
+
sou1
|
| 1066 |
+
sou3
|
| 1067 |
+
sou4
|
| 1068 |
+
su1
|
| 1069 |
+
su2
|
| 1070 |
+
su4
|
| 1071 |
+
suan1
|
| 1072 |
+
suan4
|
| 1073 |
+
sui1
|
| 1074 |
+
sui2
|
| 1075 |
+
sui3
|
| 1076 |
+
sui4
|
| 1077 |
+
sun1
|
| 1078 |
+
sun3
|
| 1079 |
+
suo
|
| 1080 |
+
suo1
|
| 1081 |
+
suo2
|
| 1082 |
+
suo3
|
| 1083 |
+
t
|
| 1084 |
+
ta1
|
| 1085 |
+
ta2
|
| 1086 |
+
ta3
|
| 1087 |
+
ta4
|
| 1088 |
+
tai1
|
| 1089 |
+
tai2
|
| 1090 |
+
tai4
|
| 1091 |
+
tan1
|
| 1092 |
+
tan2
|
| 1093 |
+
tan3
|
| 1094 |
+
tan4
|
| 1095 |
+
tang1
|
| 1096 |
+
tang2
|
| 1097 |
+
tang3
|
| 1098 |
+
tang4
|
| 1099 |
+
tao1
|
| 1100 |
+
tao2
|
| 1101 |
+
tao3
|
| 1102 |
+
tao4
|
| 1103 |
+
te4
|
| 1104 |
+
teng2
|
| 1105 |
+
ti1
|
| 1106 |
+
ti2
|
| 1107 |
+
ti3
|
| 1108 |
+
ti4
|
| 1109 |
+
tian1
|
| 1110 |
+
tian2
|
| 1111 |
+
tian3
|
| 1112 |
+
tiao1
|
| 1113 |
+
tiao2
|
| 1114 |
+
tiao3
|
| 1115 |
+
tiao4
|
| 1116 |
+
tie1
|
| 1117 |
+
tie2
|
| 1118 |
+
tie3
|
| 1119 |
+
tie4
|
| 1120 |
+
ting1
|
| 1121 |
+
ting2
|
| 1122 |
+
ting3
|
| 1123 |
+
tong1
|
| 1124 |
+
tong2
|
| 1125 |
+
tong3
|
| 1126 |
+
tong4
|
| 1127 |
+
tou
|
| 1128 |
+
tou1
|
| 1129 |
+
tou2
|
| 1130 |
+
tou4
|
| 1131 |
+
tu1
|
| 1132 |
+
tu2
|
| 1133 |
+
tu3
|
| 1134 |
+
tu4
|
| 1135 |
+
tuan1
|
| 1136 |
+
tuan2
|
| 1137 |
+
tui1
|
| 1138 |
+
tui2
|
| 1139 |
+
tui3
|
| 1140 |
+
tui4
|
| 1141 |
+
tun1
|
| 1142 |
+
tun2
|
| 1143 |
+
tun4
|
| 1144 |
+
tuo1
|
| 1145 |
+
tuo2
|
| 1146 |
+
tuo3
|
| 1147 |
+
tuo4
|
| 1148 |
+
u
|
| 1149 |
+
v
|
| 1150 |
+
w
|
| 1151 |
+
wa
|
| 1152 |
+
wa1
|
| 1153 |
+
wa2
|
| 1154 |
+
wa3
|
| 1155 |
+
wa4
|
| 1156 |
+
wai1
|
| 1157 |
+
wai3
|
| 1158 |
+
wai4
|
| 1159 |
+
wan1
|
| 1160 |
+
wan2
|
| 1161 |
+
wan3
|
| 1162 |
+
wan4
|
| 1163 |
+
wang1
|
| 1164 |
+
wang2
|
| 1165 |
+
wang3
|
| 1166 |
+
wang4
|
| 1167 |
+
wei1
|
| 1168 |
+
wei2
|
| 1169 |
+
wei3
|
| 1170 |
+
wei4
|
| 1171 |
+
wen1
|
| 1172 |
+
wen2
|
| 1173 |
+
wen3
|
| 1174 |
+
wen4
|
| 1175 |
+
weng1
|
| 1176 |
+
weng4
|
| 1177 |
+
wo1
|
| 1178 |
+
wo2
|
| 1179 |
+
wo3
|
| 1180 |
+
wo4
|
| 1181 |
+
wu1
|
| 1182 |
+
wu2
|
| 1183 |
+
wu3
|
| 1184 |
+
wu4
|
| 1185 |
+
x
|
| 1186 |
+
xi1
|
| 1187 |
+
xi2
|
| 1188 |
+
xi3
|
| 1189 |
+
xi4
|
| 1190 |
+
xia1
|
| 1191 |
+
xia2
|
| 1192 |
+
xia4
|
| 1193 |
+
xian1
|
| 1194 |
+
xian2
|
| 1195 |
+
xian3
|
| 1196 |
+
xian4
|
| 1197 |
+
xiang1
|
| 1198 |
+
xiang2
|
| 1199 |
+
xiang3
|
| 1200 |
+
xiang4
|
| 1201 |
+
xiao1
|
| 1202 |
+
xiao2
|
| 1203 |
+
xiao3
|
| 1204 |
+
xiao4
|
| 1205 |
+
xie1
|
| 1206 |
+
xie2
|
| 1207 |
+
xie3
|
| 1208 |
+
xie4
|
| 1209 |
+
xin1
|
| 1210 |
+
xin2
|
| 1211 |
+
xin4
|
| 1212 |
+
xing1
|
| 1213 |
+
xing2
|
| 1214 |
+
xing3
|
| 1215 |
+
xing4
|
| 1216 |
+
xiong1
|
| 1217 |
+
xiong2
|
| 1218 |
+
xiu1
|
| 1219 |
+
xiu3
|
| 1220 |
+
xiu4
|
| 1221 |
+
xu
|
| 1222 |
+
xu1
|
| 1223 |
+
xu2
|
| 1224 |
+
xu3
|
| 1225 |
+
xu4
|
| 1226 |
+
xuan1
|
| 1227 |
+
xuan2
|
| 1228 |
+
xuan3
|
| 1229 |
+
xuan4
|
| 1230 |
+
xue1
|
| 1231 |
+
xue2
|
| 1232 |
+
xue3
|
| 1233 |
+
xue4
|
| 1234 |
+
xun1
|
| 1235 |
+
xun2
|
| 1236 |
+
xun4
|
| 1237 |
+
y
|
| 1238 |
+
ya
|
| 1239 |
+
ya1
|
| 1240 |
+
ya2
|
| 1241 |
+
ya3
|
| 1242 |
+
ya4
|
| 1243 |
+
yan1
|
| 1244 |
+
yan2
|
| 1245 |
+
yan3
|
| 1246 |
+
yan4
|
| 1247 |
+
yang1
|
| 1248 |
+
yang2
|
| 1249 |
+
yang3
|
| 1250 |
+
yang4
|
| 1251 |
+
yao1
|
| 1252 |
+
yao2
|
| 1253 |
+
yao3
|
| 1254 |
+
yao4
|
| 1255 |
+
ye1
|
| 1256 |
+
ye2
|
| 1257 |
+
ye3
|
| 1258 |
+
ye4
|
| 1259 |
+
yi
|
| 1260 |
+
yi1
|
| 1261 |
+
yi2
|
| 1262 |
+
yi3
|
| 1263 |
+
yi4
|
| 1264 |
+
yin1
|
| 1265 |
+
yin2
|
| 1266 |
+
yin3
|
| 1267 |
+
yin4
|
| 1268 |
+
ying1
|
| 1269 |
+
ying2
|
| 1270 |
+
ying3
|
| 1271 |
+
ying4
|
| 1272 |
+
yo1
|
| 1273 |
+
yong1
|
| 1274 |
+
yong2
|
| 1275 |
+
yong3
|
| 1276 |
+
yong4
|
| 1277 |
+
you1
|
| 1278 |
+
you2
|
| 1279 |
+
you3
|
| 1280 |
+
you4
|
| 1281 |
+
yu1
|
| 1282 |
+
yu2
|
| 1283 |
+
yu3
|
| 1284 |
+
yu4
|
| 1285 |
+
yuan1
|
| 1286 |
+
yuan2
|
| 1287 |
+
yuan3
|
| 1288 |
+
yuan4
|
| 1289 |
+
yue1
|
| 1290 |
+
yue4
|
| 1291 |
+
yun1
|
| 1292 |
+
yun2
|
| 1293 |
+
yun3
|
| 1294 |
+
yun4
|
| 1295 |
+
z
|
| 1296 |
+
za1
|
| 1297 |
+
za2
|
| 1298 |
+
za3
|
| 1299 |
+
zai1
|
| 1300 |
+
zai3
|
| 1301 |
+
zai4
|
| 1302 |
+
zan1
|
| 1303 |
+
zan2
|
| 1304 |
+
zan3
|
| 1305 |
+
zan4
|
| 1306 |
+
zang1
|
| 1307 |
+
zang4
|
| 1308 |
+
zao1
|
| 1309 |
+
zao2
|
| 1310 |
+
zao3
|
| 1311 |
+
zao4
|
| 1312 |
+
ze2
|
| 1313 |
+
ze4
|
| 1314 |
+
zei2
|
| 1315 |
+
zen3
|
| 1316 |
+
zeng1
|
| 1317 |
+
zeng4
|
| 1318 |
+
zha1
|
| 1319 |
+
zha2
|
| 1320 |
+
zha3
|
| 1321 |
+
zha4
|
| 1322 |
+
zhai1
|
| 1323 |
+
zhai2
|
| 1324 |
+
zhai3
|
| 1325 |
+
zhai4
|
| 1326 |
+
zhan1
|
| 1327 |
+
zhan2
|
| 1328 |
+
zhan3
|
| 1329 |
+
zhan4
|
| 1330 |
+
zhang1
|
| 1331 |
+
zhang2
|
| 1332 |
+
zhang3
|
| 1333 |
+
zhang4
|
| 1334 |
+
zhao1
|
| 1335 |
+
zhao2
|
| 1336 |
+
zhao3
|
| 1337 |
+
zhao4
|
| 1338 |
+
zhe
|
| 1339 |
+
zhe1
|
| 1340 |
+
zhe2
|
| 1341 |
+
zhe3
|
| 1342 |
+
zhe4
|
| 1343 |
+
zhen1
|
| 1344 |
+
zhen2
|
| 1345 |
+
zhen3
|
| 1346 |
+
zhen4
|
| 1347 |
+
zheng1
|
| 1348 |
+
zheng2
|
| 1349 |
+
zheng3
|
| 1350 |
+
zheng4
|
| 1351 |
+
zhi1
|
| 1352 |
+
zhi2
|
| 1353 |
+
zhi3
|
| 1354 |
+
zhi4
|
| 1355 |
+
zhong1
|
| 1356 |
+
zhong2
|
| 1357 |
+
zhong3
|
| 1358 |
+
zhong4
|
| 1359 |
+
zhou1
|
| 1360 |
+
zhou2
|
| 1361 |
+
zhou3
|
| 1362 |
+
zhou4
|
| 1363 |
+
zhu1
|
| 1364 |
+
zhu2
|
| 1365 |
+
zhu3
|
| 1366 |
+
zhu4
|
| 1367 |
+
zhua1
|
| 1368 |
+
zhua2
|
| 1369 |
+
zhua3
|
| 1370 |
+
zhuai1
|
| 1371 |
+
zhuai3
|
| 1372 |
+
zhuai4
|
| 1373 |
+
zhuan1
|
| 1374 |
+
zhuan2
|
| 1375 |
+
zhuan3
|
| 1376 |
+
zhuan4
|
| 1377 |
+
zhuang1
|
| 1378 |
+
zhuang4
|
| 1379 |
+
zhui1
|
| 1380 |
+
zhui4
|
| 1381 |
+
zhun1
|
| 1382 |
+
zhun2
|
| 1383 |
+
zhun3
|
| 1384 |
+
zhuo1
|
| 1385 |
+
zhuo2
|
| 1386 |
+
zi
|
| 1387 |
+
zi1
|
| 1388 |
+
zi2
|
| 1389 |
+
zi3
|
| 1390 |
+
zi4
|
| 1391 |
+
zong1
|
| 1392 |
+
zong2
|
| 1393 |
+
zong3
|
| 1394 |
+
zong4
|
| 1395 |
+
zou1
|
| 1396 |
+
zou2
|
| 1397 |
+
zou3
|
| 1398 |
+
zou4
|
| 1399 |
+
zu1
|
| 1400 |
+
zu2
|
| 1401 |
+
zu3
|
| 1402 |
+
zuan1
|
| 1403 |
+
zuan3
|
| 1404 |
+
zuan4
|
| 1405 |
+
zui2
|
| 1406 |
+
zui3
|
| 1407 |
+
zui4
|
| 1408 |
+
zun1
|
| 1409 |
+
zuo
|
| 1410 |
+
zuo1
|
| 1411 |
+
zuo2
|
| 1412 |
+
zuo3
|
| 1413 |
+
zuo4
|
| 1414 |
+
{
|
| 1415 |
+
~
|
| 1416 |
+
¡
|
| 1417 |
+
¢
|
| 1418 |
+
£
|
| 1419 |
+
¥
|
| 1420 |
+
§
|
| 1421 |
+
¨
|
| 1422 |
+
©
|
| 1423 |
+
«
|
| 1424 |
+
®
|
| 1425 |
+
¯
|
| 1426 |
+
°
|
| 1427 |
+
±
|
| 1428 |
+
²
|
| 1429 |
+
³
|
| 1430 |
+
´
|
| 1431 |
+
µ
|
| 1432 |
+
·
|
| 1433 |
+
¹
|
| 1434 |
+
º
|
| 1435 |
+
»
|
| 1436 |
+
¼
|
| 1437 |
+
½
|
| 1438 |
+
¾
|
| 1439 |
+
¿
|
| 1440 |
+
À
|
| 1441 |
+
Á
|
| 1442 |
+
Â
|
| 1443 |
+
Ã
|
| 1444 |
+
Ä
|
| 1445 |
+
Å
|
| 1446 |
+
Æ
|
| 1447 |
+
Ç
|
| 1448 |
+
È
|
| 1449 |
+
É
|
| 1450 |
+
Ê
|
| 1451 |
+
Í
|
| 1452 |
+
Î
|
| 1453 |
+
Ñ
|
| 1454 |
+
Ó
|
| 1455 |
+
Ö
|
| 1456 |
+
×
|
| 1457 |
+
Ø
|
| 1458 |
+
Ú
|
| 1459 |
+
Ü
|
| 1460 |
+
Ý
|
| 1461 |
+
Þ
|
| 1462 |
+
ß
|
| 1463 |
+
à
|
| 1464 |
+
á
|
| 1465 |
+
â
|
| 1466 |
+
ã
|
| 1467 |
+
ä
|
| 1468 |
+
å
|
| 1469 |
+
æ
|
| 1470 |
+
ç
|
| 1471 |
+
è
|
| 1472 |
+
é
|
| 1473 |
+
ê
|
| 1474 |
+
ë
|
| 1475 |
+
ì
|
| 1476 |
+
í
|
| 1477 |
+
î
|
| 1478 |
+
ï
|
| 1479 |
+
ð
|
| 1480 |
+
ñ
|
| 1481 |
+
ò
|
| 1482 |
+
ó
|
| 1483 |
+
ô
|
| 1484 |
+
õ
|
| 1485 |
+
ö
|
| 1486 |
+
ø
|
| 1487 |
+
ù
|
| 1488 |
+
ú
|
| 1489 |
+
û
|
| 1490 |
+
ü
|
| 1491 |
+
ý
|
| 1492 |
+
Ā
|
| 1493 |
+
ā
|
| 1494 |
+
ă
|
| 1495 |
+
ą
|
| 1496 |
+
ć
|
| 1497 |
+
Č
|
| 1498 |
+
č
|
| 1499 |
+
Đ
|
| 1500 |
+
đ
|
| 1501 |
+
ē
|
| 1502 |
+
ė
|
| 1503 |
+
ę
|
| 1504 |
+
ě
|
| 1505 |
+
ĝ
|
| 1506 |
+
ğ
|
| 1507 |
+
ħ
|
| 1508 |
+
ī
|
| 1509 |
+
į
|
| 1510 |
+
İ
|
| 1511 |
+
ı
|
| 1512 |
+
Ł
|
| 1513 |
+
ł
|
| 1514 |
+
ń
|
| 1515 |
+
ņ
|
| 1516 |
+
ň
|
| 1517 |
+
ŋ
|
| 1518 |
+
Ō
|
| 1519 |
+
ō
|
| 1520 |
+
ő
|
| 1521 |
+
œ
|
| 1522 |
+
ř
|
| 1523 |
+
Ś
|
| 1524 |
+
ś
|
| 1525 |
+
Ş
|
| 1526 |
+
ş
|
| 1527 |
+
Š
|
| 1528 |
+
š
|
| 1529 |
+
Ť
|
| 1530 |
+
ť
|
| 1531 |
+
ũ
|
| 1532 |
+
ū
|
| 1533 |
+
ź
|
| 1534 |
+
Ż
|
| 1535 |
+
ż
|
| 1536 |
+
Ž
|
| 1537 |
+
ž
|
| 1538 |
+
ơ
|
| 1539 |
+
ư
|
| 1540 |
+
ǎ
|
| 1541 |
+
ǐ
|
| 1542 |
+
ǒ
|
| 1543 |
+
ǔ
|
| 1544 |
+
ǚ
|
| 1545 |
+
ș
|
| 1546 |
+
ț
|
| 1547 |
+
ɑ
|
| 1548 |
+
ɔ
|
| 1549 |
+
ɕ
|
| 1550 |
+
ə
|
| 1551 |
+
ɛ
|
| 1552 |
+
ɜ
|
| 1553 |
+
ɡ
|
| 1554 |
+
ɣ
|
| 1555 |
+
ɪ
|
| 1556 |
+
ɫ
|
| 1557 |
+
ɴ
|
| 1558 |
+
ɹ
|
| 1559 |
+
ɾ
|
| 1560 |
+
ʃ
|
| 1561 |
+
ʊ
|
| 1562 |
+
ʌ
|
| 1563 |
+
ʒ
|
| 1564 |
+
ʔ
|
| 1565 |
+
ʰ
|
| 1566 |
+
ʷ
|
| 1567 |
+
ʻ
|
| 1568 |
+
ʾ
|
| 1569 |
+
ʿ
|
| 1570 |
+
ˈ
|
| 1571 |
+
ː
|
| 1572 |
+
˙
|
| 1573 |
+
˜
|
| 1574 |
+
ˢ
|
| 1575 |
+
́
|
| 1576 |
+
̅
|
| 1577 |
+
Α
|
| 1578 |
+
Β
|
| 1579 |
+
Δ
|
| 1580 |
+
Ε
|
| 1581 |
+
Θ
|
| 1582 |
+
Κ
|
| 1583 |
+
Λ
|
| 1584 |
+
Μ
|
| 1585 |
+
Ξ
|
| 1586 |
+
Π
|
| 1587 |
+
Σ
|
| 1588 |
+
Τ
|
| 1589 |
+
Φ
|
| 1590 |
+
Χ
|
| 1591 |
+
Ψ
|
| 1592 |
+
Ω
|
| 1593 |
+
ά
|
| 1594 |
+
έ
|
| 1595 |
+
ή
|
| 1596 |
+
ί
|
| 1597 |
+
α
|
| 1598 |
+
β
|
| 1599 |
+
γ
|
| 1600 |
+
δ
|
| 1601 |
+
ε
|
| 1602 |
+
ζ
|
| 1603 |
+
η
|
| 1604 |
+
θ
|
| 1605 |
+
ι
|
| 1606 |
+
κ
|
| 1607 |
+
λ
|
| 1608 |
+
μ
|
| 1609 |
+
ν
|
| 1610 |
+
ξ
|
| 1611 |
+
ο
|
| 1612 |
+
π
|
| 1613 |
+
ρ
|
| 1614 |
+
ς
|
| 1615 |
+
σ
|
| 1616 |
+
τ
|
| 1617 |
+
υ
|
| 1618 |
+
φ
|
| 1619 |
+
χ
|
| 1620 |
+
ψ
|
| 1621 |
+
ω
|
| 1622 |
+
ϊ
|
| 1623 |
+
ό
|
| 1624 |
+
ύ
|
| 1625 |
+
ώ
|
| 1626 |
+
ϕ
|
| 1627 |
+
ϵ
|
| 1628 |
+
Ё
|
| 1629 |
+
А
|
| 1630 |
+
Б
|
| 1631 |
+
В
|
| 1632 |
+
Г
|
| 1633 |
+
Д
|
| 1634 |
+
Е
|
| 1635 |
+
Ж
|
| 1636 |
+
З
|
| 1637 |
+
И
|
| 1638 |
+
Й
|
| 1639 |
+
К
|
| 1640 |
+
Л
|
| 1641 |
+
М
|
| 1642 |
+
Н
|
| 1643 |
+
О
|
| 1644 |
+
П
|
| 1645 |
+
Р
|
| 1646 |
+
С
|
| 1647 |
+
Т
|
| 1648 |
+
У
|
| 1649 |
+
Ф
|
| 1650 |
+
Х
|
| 1651 |
+
Ц
|
| 1652 |
+
Ч
|
| 1653 |
+
Ш
|
| 1654 |
+
Щ
|
| 1655 |
+
Ы
|
| 1656 |
+
Ь
|
| 1657 |
+
Э
|
| 1658 |
+
Ю
|
| 1659 |
+
Я
|
| 1660 |
+
а
|
| 1661 |
+
б
|
| 1662 |
+
в
|
| 1663 |
+
г
|
| 1664 |
+
д
|
| 1665 |
+
е
|
| 1666 |
+
ж
|
| 1667 |
+
з
|
| 1668 |
+
и
|
| 1669 |
+
й
|
| 1670 |
+
к
|
| 1671 |
+
л
|
| 1672 |
+
м
|
| 1673 |
+
н
|
| 1674 |
+
о
|
| 1675 |
+
п
|
| 1676 |
+
р
|
| 1677 |
+
с
|
| 1678 |
+
т
|
| 1679 |
+
у
|
| 1680 |
+
ф
|
| 1681 |
+
х
|
| 1682 |
+
ц
|
| 1683 |
+
ч
|
| 1684 |
+
ш
|
| 1685 |
+
щ
|
| 1686 |
+
ъ
|
| 1687 |
+
ы
|
| 1688 |
+
ь
|
| 1689 |
+
э
|
| 1690 |
+
ю
|
| 1691 |
+
я
|
| 1692 |
+
ё
|
| 1693 |
+
і
|
| 1694 |
+
ְ
|
| 1695 |
+
ִ
|
| 1696 |
+
ֵ
|
| 1697 |
+
ֶ
|
| 1698 |
+
ַ
|
| 1699 |
+
ָ
|
| 1700 |
+
ֹ
|
| 1701 |
+
ּ
|
| 1702 |
+
־
|
| 1703 |
+
ׁ
|
| 1704 |
+
א
|
| 1705 |
+
ב
|
| 1706 |
+
ג
|
| 1707 |
+
ד
|
| 1708 |
+
ה
|
| 1709 |
+
ו
|
| 1710 |
+
ז
|
| 1711 |
+
ח
|
| 1712 |
+
ט
|
| 1713 |
+
י
|
| 1714 |
+
כ
|
| 1715 |
+
ל
|
| 1716 |
+
ם
|
| 1717 |
+
מ
|
| 1718 |
+
ן
|
| 1719 |
+
נ
|
| 1720 |
+
ס
|
| 1721 |
+
ע
|
| 1722 |
+
פ
|
| 1723 |
+
ק
|
| 1724 |
+
ר
|
| 1725 |
+
ש
|
| 1726 |
+
ת
|
| 1727 |
+
أ
|
| 1728 |
+
ب
|
| 1729 |
+
ة
|
| 1730 |
+
ت
|
| 1731 |
+
ج
|
| 1732 |
+
ح
|
| 1733 |
+
د
|
| 1734 |
+
ر
|
| 1735 |
+
ز
|
| 1736 |
+
س
|
| 1737 |
+
ص
|
| 1738 |
+
ط
|
| 1739 |
+
ع
|
| 1740 |
+
ق
|
| 1741 |
+
ك
|
| 1742 |
+
ل
|
| 1743 |
+
م
|
| 1744 |
+
ن
|
| 1745 |
+
ه
|
| 1746 |
+
و
|
| 1747 |
+
ي
|
| 1748 |
+
َ
|
| 1749 |
+
ُ
|
| 1750 |
+
ِ
|
| 1751 |
+
ْ
|
| 1752 |
+
ก
|
| 1753 |
+
ข
|
| 1754 |
+
ง
|
| 1755 |
+
จ
|
| 1756 |
+
ต
|
| 1757 |
+
ท
|
| 1758 |
+
น
|
| 1759 |
+
ป
|
| 1760 |
+
ย
|
| 1761 |
+
ร
|
| 1762 |
+
ว
|
| 1763 |
+
ส
|
| 1764 |
+
ห
|
| 1765 |
+
อ
|
| 1766 |
+
ฮ
|
| 1767 |
+
ั
|
| 1768 |
+
า
|
| 1769 |
+
ี
|
| 1770 |
+
ึ
|
| 1771 |
+
โ
|
| 1772 |
+
ใ
|
| 1773 |
+
ไ
|
| 1774 |
+
่
|
| 1775 |
+
้
|
| 1776 |
+
์
|
| 1777 |
+
ḍ
|
| 1778 |
+
Ḥ
|
| 1779 |
+
ḥ
|
| 1780 |
+
ṁ
|
| 1781 |
+
ṃ
|
| 1782 |
+
ṅ
|
| 1783 |
+
ṇ
|
| 1784 |
+
Ṛ
|
| 1785 |
+
ṛ
|
| 1786 |
+
Ṣ
|
| 1787 |
+
ṣ
|
| 1788 |
+
Ṭ
|
| 1789 |
+
ṭ
|
| 1790 |
+
ạ
|
| 1791 |
+
ả
|
| 1792 |
+
Ấ
|
| 1793 |
+
ấ
|
| 1794 |
+
ầ
|
| 1795 |
+
ậ
|
| 1796 |
+
ắ
|
| 1797 |
+
ằ
|
| 1798 |
+
ẻ
|
| 1799 |
+
ẽ
|
| 1800 |
+
ế
|
| 1801 |
+
ề
|
| 1802 |
+
ể
|
| 1803 |
+
ễ
|
| 1804 |
+
ệ
|
| 1805 |
+
ị
|
| 1806 |
+
ọ
|
| 1807 |
+
ỏ
|
| 1808 |
+
ố
|
| 1809 |
+
ồ
|
| 1810 |
+
ộ
|
| 1811 |
+
ớ
|
| 1812 |
+
ờ
|
| 1813 |
+
ở
|
| 1814 |
+
ụ
|
| 1815 |
+
ủ
|
| 1816 |
+
ứ
|
| 1817 |
+
ữ
|
| 1818 |
+
ἀ
|
| 1819 |
+
ἁ
|
| 1820 |
+
Ἀ
|
| 1821 |
+
ἐ
|
| 1822 |
+
ἔ
|
| 1823 |
+
ἰ
|
| 1824 |
+
ἱ
|
| 1825 |
+
ὀ
|
| 1826 |
+
ὁ
|
| 1827 |
+
ὐ
|
| 1828 |
+
ὲ
|
| 1829 |
+
ὸ
|
| 1830 |
+
���
|
| 1831 |
+
᾽
|
| 1832 |
+
ῆ
|
| 1833 |
+
ῇ
|
| 1834 |
+
ῶ
|
| 1835 |
+
|
| 1836 |
+
‑
|
| 1837 |
+
‒
|
| 1838 |
+
–
|
| 1839 |
+
—
|
| 1840 |
+
―
|
| 1841 |
+
‖
|
| 1842 |
+
†
|
| 1843 |
+
‡
|
| 1844 |
+
•
|
| 1845 |
+
…
|
| 1846 |
+
‧
|
| 1847 |
+
|
| 1848 |
+
′
|
| 1849 |
+
″
|
| 1850 |
+
⁄
|
| 1851 |
+
|
| 1852 |
+
⁰
|
| 1853 |
+
⁴
|
| 1854 |
+
⁵
|
| 1855 |
+
⁶
|
| 1856 |
+
⁷
|
| 1857 |
+
⁸
|
| 1858 |
+
⁹
|
| 1859 |
+
₁
|
| 1860 |
+
₂
|
| 1861 |
+
₃
|
| 1862 |
+
€
|
| 1863 |
+
₱
|
| 1864 |
+
₹
|
| 1865 |
+
₽
|
| 1866 |
+
℃
|
| 1867 |
+
ℏ
|
| 1868 |
+
ℓ
|
| 1869 |
+
№
|
| 1870 |
+
ℝ
|
| 1871 |
+
™
|
| 1872 |
+
⅓
|
| 1873 |
+
⅔
|
| 1874 |
+
⅛
|
| 1875 |
+
→
|
| 1876 |
+
∂
|
| 1877 |
+
∈
|
| 1878 |
+
∑
|
| 1879 |
+
−
|
| 1880 |
+
∗
|
| 1881 |
+
√
|
| 1882 |
+
∞
|
| 1883 |
+
∫
|
| 1884 |
+
≈
|
| 1885 |
+
≠
|
| 1886 |
+
≡
|
| 1887 |
+
≤
|
| 1888 |
+
≥
|
| 1889 |
+
⋅
|
| 1890 |
+
⋯
|
| 1891 |
+
█
|
| 1892 |
+
♪
|
| 1893 |
+
⟨
|
| 1894 |
+
⟩
|
| 1895 |
+
、
|
| 1896 |
+
。
|
| 1897 |
+
《
|
| 1898 |
+
》
|
| 1899 |
+
「
|
| 1900 |
+
」
|
| 1901 |
+
【
|
| 1902 |
+
】
|
| 1903 |
+
あ
|
| 1904 |
+
う
|
| 1905 |
+
え
|
| 1906 |
+
お
|
| 1907 |
+
か
|
| 1908 |
+
が
|
| 1909 |
+
き
|
| 1910 |
+
ぎ
|
| 1911 |
+
く
|
| 1912 |
+
ぐ
|
| 1913 |
+
け
|
| 1914 |
+
げ
|
| 1915 |
+
こ
|
| 1916 |
+
ご
|
| 1917 |
+
さ
|
| 1918 |
+
し
|
| 1919 |
+
じ
|
| 1920 |
+
す
|
| 1921 |
+
ず
|
| 1922 |
+
せ
|
| 1923 |
+
ぜ
|
| 1924 |
+
そ
|
| 1925 |
+
ぞ
|
| 1926 |
+
た
|
| 1927 |
+
だ
|
| 1928 |
+
ち
|
| 1929 |
+
っ
|
| 1930 |
+
つ
|
| 1931 |
+
で
|
| 1932 |
+
と
|
| 1933 |
+
ど
|
| 1934 |
+
な
|
| 1935 |
+
に
|
| 1936 |
+
ね
|
| 1937 |
+
の
|
| 1938 |
+
は
|
| 1939 |
+
ば
|
| 1940 |
+
ひ
|
| 1941 |
+
ぶ
|
| 1942 |
+
へ
|
| 1943 |
+
べ
|
| 1944 |
+
ま
|
| 1945 |
+
み
|
| 1946 |
+
む
|
| 1947 |
+
め
|
| 1948 |
+
も
|
| 1949 |
+
ゃ
|
| 1950 |
+
や
|
| 1951 |
+
ゆ
|
| 1952 |
+
ょ
|
| 1953 |
+
よ
|
| 1954 |
+
ら
|
| 1955 |
+
り
|
| 1956 |
+
る
|
| 1957 |
+
れ
|
| 1958 |
+
ろ
|
| 1959 |
+
わ
|
| 1960 |
+
を
|
| 1961 |
+
ん
|
| 1962 |
+
ァ
|
| 1963 |
+
ア
|
| 1964 |
+
ィ
|
| 1965 |
+
イ
|
| 1966 |
+
ウ
|
| 1967 |
+
ェ
|
| 1968 |
+
エ
|
| 1969 |
+
オ
|
| 1970 |
+
カ
|
| 1971 |
+
ガ
|
| 1972 |
+
キ
|
| 1973 |
+
ク
|
| 1974 |
+
ケ
|
| 1975 |
+
ゲ
|
| 1976 |
+
コ
|
| 1977 |
+
ゴ
|
| 1978 |
+
サ
|
| 1979 |
+
ザ
|
| 1980 |
+
シ
|
| 1981 |
+
ジ
|
| 1982 |
+
ス
|
| 1983 |
+
ズ
|
| 1984 |
+
セ
|
| 1985 |
+
ゾ
|
| 1986 |
+
タ
|
| 1987 |
+
ダ
|
| 1988 |
+
チ
|
| 1989 |
+
ッ
|
| 1990 |
+
ツ
|
| 1991 |
+
テ
|
| 1992 |
+
デ
|
| 1993 |
+
ト
|
| 1994 |
+
ド
|
| 1995 |
+
ナ
|
| 1996 |
+
ニ
|
| 1997 |
+
ネ
|
| 1998 |
+
ノ
|
| 1999 |
+
バ
|
| 2000 |
+
パ
|
| 2001 |
+
ビ
|
| 2002 |
+
ピ
|
| 2003 |
+
フ
|
| 2004 |
+
プ
|
| 2005 |
+
ヘ
|
| 2006 |
+
ベ
|
| 2007 |
+
ペ
|
| 2008 |
+
ホ
|
| 2009 |
+
ボ
|
| 2010 |
+
ポ
|
| 2011 |
+
マ
|
| 2012 |
+
ミ
|
| 2013 |
+
ム
|
| 2014 |
+
メ
|
| 2015 |
+
モ
|
| 2016 |
+
ャ
|
| 2017 |
+
ヤ
|
| 2018 |
+
ュ
|
| 2019 |
+
ユ
|
| 2020 |
+
ョ
|
| 2021 |
+
ヨ
|
| 2022 |
+
ラ
|
| 2023 |
+
リ
|
| 2024 |
+
ル
|
| 2025 |
+
レ
|
| 2026 |
+
ロ
|
| 2027 |
+
ワ
|
| 2028 |
+
ン
|
| 2029 |
+
・
|
| 2030 |
+
ー
|
| 2031 |
+
ㄋ
|
| 2032 |
+
ㄍ
|
| 2033 |
+
ㄎ
|
| 2034 |
+
ㄏ
|
| 2035 |
+
ㄓ
|
| 2036 |
+
ㄕ
|
| 2037 |
+
ㄚ
|
| 2038 |
+
ㄜ
|
| 2039 |
+
ㄟ
|
| 2040 |
+
ㄤ
|
| 2041 |
+
ㄥ
|
| 2042 |
+
ㄧ
|
| 2043 |
+
ㄱ
|
| 2044 |
+
ㄴ
|
| 2045 |
+
ㄷ
|
| 2046 |
+
ㄹ
|
| 2047 |
+
ㅁ
|
| 2048 |
+
ㅂ
|
| 2049 |
+
ㅅ
|
| 2050 |
+
ㅈ
|
| 2051 |
+
ㅍ
|
| 2052 |
+
ㅎ
|
| 2053 |
+
ㅏ
|
| 2054 |
+
ㅓ
|
| 2055 |
+
ㅗ
|
| 2056 |
+
ㅜ
|
| 2057 |
+
ㅡ
|
| 2058 |
+
ㅣ
|
| 2059 |
+
㗎
|
| 2060 |
+
가
|
| 2061 |
+
각
|
| 2062 |
+
간
|
| 2063 |
+
갈
|
| 2064 |
+
감
|
| 2065 |
+
갑
|
| 2066 |
+
갓
|
| 2067 |
+
갔
|
| 2068 |
+
강
|
| 2069 |
+
같
|
| 2070 |
+
개
|
| 2071 |
+
거
|
| 2072 |
+
건
|
| 2073 |
+
걸
|
| 2074 |
+
겁
|
| 2075 |
+
것
|
| 2076 |
+
겉
|
| 2077 |
+
게
|
| 2078 |
+
겠
|
| 2079 |
+
겨
|
| 2080 |
+
결
|
| 2081 |
+
겼
|
| 2082 |
+
경
|
| 2083 |
+
계
|
| 2084 |
+
고
|
| 2085 |
+
곤
|
| 2086 |
+
골
|
| 2087 |
+
곱
|
| 2088 |
+
공
|
| 2089 |
+
과
|
| 2090 |
+
관
|
| 2091 |
+
광
|
| 2092 |
+
교
|
| 2093 |
+
구
|
| 2094 |
+
국
|
| 2095 |
+
굴
|
| 2096 |
+
귀
|
| 2097 |
+
귄
|
| 2098 |
+
그
|
| 2099 |
+
근
|
| 2100 |
+
글
|
| 2101 |
+
금
|
| 2102 |
+
기
|
| 2103 |
+
긴
|
| 2104 |
+
길
|
| 2105 |
+
까
|
| 2106 |
+
깍
|
| 2107 |
+
깔
|
| 2108 |
+
깜
|
| 2109 |
+
깨
|
| 2110 |
+
께
|
| 2111 |
+
꼬
|
| 2112 |
+
꼭
|
| 2113 |
+
꽃
|
| 2114 |
+
꾸
|
| 2115 |
+
꿔
|
| 2116 |
+
끔
|
| 2117 |
+
끗
|
| 2118 |
+
끝
|
| 2119 |
+
끼
|
| 2120 |
+
나
|
| 2121 |
+
난
|
| 2122 |
+
날
|
| 2123 |
+
남
|
| 2124 |
+
납
|
| 2125 |
+
내
|
| 2126 |
+
냐
|
| 2127 |
+
냥
|
| 2128 |
+
너
|
| 2129 |
+
넘
|
| 2130 |
+
넣
|
| 2131 |
+
네
|
| 2132 |
+
녁
|
| 2133 |
+
년
|
| 2134 |
+
녕
|
| 2135 |
+
노
|
| 2136 |
+
녹
|
| 2137 |
+
놀
|
| 2138 |
+
누
|
| 2139 |
+
눈
|
| 2140 |
+
느
|
| 2141 |
+
는
|
| 2142 |
+
늘
|
| 2143 |
+
니
|
| 2144 |
+
님
|
| 2145 |
+
닙
|
| 2146 |
+
다
|
| 2147 |
+
닥
|
| 2148 |
+
단
|
| 2149 |
+
달
|
| 2150 |
+
닭
|
| 2151 |
+
당
|
| 2152 |
+
대
|
| 2153 |
+
더
|
| 2154 |
+
덕
|
| 2155 |
+
던
|
| 2156 |
+
덥
|
| 2157 |
+
데
|
| 2158 |
+
도
|
| 2159 |
+
독
|
| 2160 |
+
동
|
| 2161 |
+
돼
|
| 2162 |
+
됐
|
| 2163 |
+
되
|
| 2164 |
+
된
|
| 2165 |
+
될
|
| 2166 |
+
두
|
| 2167 |
+
둑
|
| 2168 |
+
둥
|
| 2169 |
+
드
|
| 2170 |
+
들
|
| 2171 |
+
등
|
| 2172 |
+
디
|
| 2173 |
+
따
|
| 2174 |
+
딱
|
| 2175 |
+
딸
|
| 2176 |
+
땅
|
| 2177 |
+
때
|
| 2178 |
+
떤
|
| 2179 |
+
떨
|
| 2180 |
+
떻
|
| 2181 |
+
또
|
| 2182 |
+
똑
|
| 2183 |
+
뚱
|
| 2184 |
+
뛰
|
| 2185 |
+
뜻
|
| 2186 |
+
띠
|
| 2187 |
+
라
|
| 2188 |
+
락
|
| 2189 |
+
란
|
| 2190 |
+
람
|
| 2191 |
+
랍
|
| 2192 |
+
랑
|
| 2193 |
+
래
|
| 2194 |
+
랜
|
| 2195 |
+
러
|
| 2196 |
+
런
|
| 2197 |
+
럼
|
| 2198 |
+
렇
|
| 2199 |
+
레
|
| 2200 |
+
려
|
| 2201 |
+
력
|
| 2202 |
+
렵
|
| 2203 |
+
렸
|
| 2204 |
+
로
|
| 2205 |
+
록
|
| 2206 |
+
롬
|
| 2207 |
+
루
|
| 2208 |
+
르
|
| 2209 |
+
른
|
| 2210 |
+
를
|
| 2211 |
+
름
|
| 2212 |
+
릉
|
| 2213 |
+
리
|
| 2214 |
+
릴
|
| 2215 |
+
림
|
| 2216 |
+
마
|
| 2217 |
+
막
|
| 2218 |
+
만
|
| 2219 |
+
많
|
| 2220 |
+
말
|
| 2221 |
+
맑
|
| 2222 |
+
맙
|
| 2223 |
+
맛
|
| 2224 |
+
매
|
| 2225 |
+
머
|
| 2226 |
+
먹
|
| 2227 |
+
멍
|
| 2228 |
+
메
|
| 2229 |
+
면
|
| 2230 |
+
명
|
| 2231 |
+
몇
|
| 2232 |
+
모
|
| 2233 |
+
목
|
| 2234 |
+
몸
|
| 2235 |
+
못
|
| 2236 |
+
무
|
| 2237 |
+
문
|
| 2238 |
+
물
|
| 2239 |
+
뭐
|
| 2240 |
+
뭘
|
| 2241 |
+
미
|
| 2242 |
+
민
|
| 2243 |
+
밌
|
| 2244 |
+
밑
|
| 2245 |
+
바
|
| 2246 |
+
박
|
| 2247 |
+
밖
|
| 2248 |
+
반
|
| 2249 |
+
받
|
| 2250 |
+
발
|
| 2251 |
+
밤
|
| 2252 |
+
밥
|
| 2253 |
+
방
|
| 2254 |
+
배
|
| 2255 |
+
백
|
| 2256 |
+
밸
|
| 2257 |
+
뱀
|
| 2258 |
+
버
|
| 2259 |
+
번
|
| 2260 |
+
벌
|
| 2261 |
+
벚
|
| 2262 |
+
베
|
| 2263 |
+
벼
|
| 2264 |
+
벽
|
| 2265 |
+
별
|
| 2266 |
+
병
|
| 2267 |
+
보
|
| 2268 |
+
복
|
| 2269 |
+
본
|
| 2270 |
+
볼
|
| 2271 |
+
봐
|
| 2272 |
+
봤
|
| 2273 |
+
부
|
| 2274 |
+
분
|
| 2275 |
+
불
|
| 2276 |
+
비
|
| 2277 |
+
빔
|
| 2278 |
+
빛
|
| 2279 |
+
빠
|
| 2280 |
+
빨
|
| 2281 |
+
뼈
|
| 2282 |
+
뽀
|
| 2283 |
+
뿅
|
| 2284 |
+
쁘
|
| 2285 |
+
사
|
| 2286 |
+
산
|
| 2287 |
+
살
|
| 2288 |
+
삼
|
| 2289 |
+
샀
|
| 2290 |
+
상
|
| 2291 |
+
새
|
| 2292 |
+
색
|
| 2293 |
+
생
|
| 2294 |
+
서
|
| 2295 |
+
선
|
| 2296 |
+
설
|
| 2297 |
+
섭
|
| 2298 |
+
섰
|
| 2299 |
+
성
|
| 2300 |
+
세
|
| 2301 |
+
셔
|
| 2302 |
+
션
|
| 2303 |
+
셨
|
| 2304 |
+
소
|
| 2305 |
+
속
|
| 2306 |
+
손
|
| 2307 |
+
송
|
| 2308 |
+
수
|
| 2309 |
+
숙
|
| 2310 |
+
순
|
| 2311 |
+
술
|
| 2312 |
+
숫
|
| 2313 |
+
숭
|
| 2314 |
+
숲
|
| 2315 |
+
쉬
|
| 2316 |
+
쉽
|
| 2317 |
+
스
|
| 2318 |
+
슨
|
| 2319 |
+
습
|
| 2320 |
+
슷
|
| 2321 |
+
시
|
| 2322 |
+
식
|
| 2323 |
+
신
|
| 2324 |
+
실
|
| 2325 |
+
싫
|
| 2326 |
+
심
|
| 2327 |
+
십
|
| 2328 |
+
싶
|
| 2329 |
+
싸
|
| 2330 |
+
써
|
| 2331 |
+
쓰
|
| 2332 |
+
쓴
|
| 2333 |
+
씌
|
| 2334 |
+
씨
|
| 2335 |
+
씩
|
| 2336 |
+
씬
|
| 2337 |
+
아
|
| 2338 |
+
악
|
| 2339 |
+
안
|
| 2340 |
+
않
|
| 2341 |
+
알
|
| 2342 |
+
야
|
| 2343 |
+
약
|
| 2344 |
+
얀
|
| 2345 |
+
양
|
| 2346 |
+
얘
|
| 2347 |
+
어
|
| 2348 |
+
언
|
| 2349 |
+
얼
|
| 2350 |
+
엄
|
| 2351 |
+
업
|
| 2352 |
+
없
|
| 2353 |
+
었
|
| 2354 |
+
엉
|
| 2355 |
+
에
|
| 2356 |
+
여
|
| 2357 |
+
역
|
| 2358 |
+
연
|
| 2359 |
+
염
|
| 2360 |
+
엽
|
| 2361 |
+
영
|
| 2362 |
+
옆
|
| 2363 |
+
예
|
| 2364 |
+
옛
|
| 2365 |
+
오
|
| 2366 |
+
온
|
| 2367 |
+
올
|
| 2368 |
+
옷
|
| 2369 |
+
옹
|
| 2370 |
+
와
|
| 2371 |
+
왔
|
| 2372 |
+
왜
|
| 2373 |
+
요
|
| 2374 |
+
욕
|
| 2375 |
+
용
|
| 2376 |
+
우
|
| 2377 |
+
운
|
| 2378 |
+
울
|
| 2379 |
+
웃
|
| 2380 |
+
워
|
| 2381 |
+
원
|
| 2382 |
+
월
|
| 2383 |
+
웠
|
| 2384 |
+
위
|
| 2385 |
+
윙
|
| 2386 |
+
유
|
| 2387 |
+
육
|
| 2388 |
+
윤
|
| 2389 |
+
으
|
| 2390 |
+
은
|
| 2391 |
+
을
|
| 2392 |
+
음
|
| 2393 |
+
응
|
| 2394 |
+
의
|
| 2395 |
+
이
|
| 2396 |
+
익
|
| 2397 |
+
인
|
| 2398 |
+
일
|
| 2399 |
+
읽
|
| 2400 |
+
임
|
| 2401 |
+
입
|
| 2402 |
+
있
|
| 2403 |
+
자
|
| 2404 |
+
작
|
| 2405 |
+
잔
|
| 2406 |
+
잖
|
| 2407 |
+
잘
|
| 2408 |
+
잡
|
| 2409 |
+
잤
|
| 2410 |
+
장
|
| 2411 |
+
재
|
| 2412 |
+
저
|
| 2413 |
+
전
|
| 2414 |
+
점
|
| 2415 |
+
정
|
| 2416 |
+
제
|
| 2417 |
+
져
|
| 2418 |
+
졌
|
| 2419 |
+
조
|
| 2420 |
+
족
|
| 2421 |
+
좀
|
| 2422 |
+
종
|
| 2423 |
+
좋
|
| 2424 |
+
죠
|
| 2425 |
+
주
|
| 2426 |
+
준
|
| 2427 |
+
줄
|
| 2428 |
+
중
|
| 2429 |
+
줘
|
| 2430 |
+
즈
|
| 2431 |
+
즐
|
| 2432 |
+
즘
|
| 2433 |
+
지
|
| 2434 |
+
진
|
| 2435 |
+
집
|
| 2436 |
+
짜
|
| 2437 |
+
짝
|
| 2438 |
+
쩌
|
| 2439 |
+
쪼
|
| 2440 |
+
쪽
|
| 2441 |
+
쫌
|
| 2442 |
+
쭈
|
| 2443 |
+
쯔
|
| 2444 |
+
찌
|
| 2445 |
+
찍
|
| 2446 |
+
차
|
| 2447 |
+
착
|
| 2448 |
+
찾
|
| 2449 |
+
책
|
| 2450 |
+
처
|
| 2451 |
+
천
|
| 2452 |
+
철
|
| 2453 |
+
체
|
| 2454 |
+
쳐
|
| 2455 |
+
쳤
|
| 2456 |
+
초
|
| 2457 |
+
촌
|
| 2458 |
+
추
|
| 2459 |
+
출
|
| 2460 |
+
춤
|
| 2461 |
+
춥
|
| 2462 |
+
춰
|
| 2463 |
+
치
|
| 2464 |
+
친
|
| 2465 |
+
칠
|
| 2466 |
+
침
|
| 2467 |
+
칩
|
| 2468 |
+
칼
|
| 2469 |
+
커
|
| 2470 |
+
켓
|
| 2471 |
+
코
|
| 2472 |
+
콩
|
| 2473 |
+
쿠
|
| 2474 |
+
퀴
|
| 2475 |
+
크
|
| 2476 |
+
큰
|
| 2477 |
+
큽
|
| 2478 |
+
키
|
| 2479 |
+
킨
|
| 2480 |
+
타
|
| 2481 |
+
태
|
| 2482 |
+
터
|
| 2483 |
+
턴
|
| 2484 |
+
털
|
| 2485 |
+
테
|
| 2486 |
+
토
|
| 2487 |
+
통
|
| 2488 |
+
투
|
| 2489 |
+
트
|
| 2490 |
+
특
|
| 2491 |
+
튼
|
| 2492 |
+
틀
|
| 2493 |
+
티
|
| 2494 |
+
팀
|
| 2495 |
+
파
|
| 2496 |
+
팔
|
| 2497 |
+
패
|
| 2498 |
+
페
|
| 2499 |
+
펜
|
| 2500 |
+
펭
|
| 2501 |
+
평
|
| 2502 |
+
포
|
| 2503 |
+
폭
|
| 2504 |
+
표
|
| 2505 |
+
품
|
| 2506 |
+
풍
|
| 2507 |
+
프
|
| 2508 |
+
플
|
| 2509 |
+
피
|
| 2510 |
+
필
|
| 2511 |
+
하
|
| 2512 |
+
학
|
| 2513 |
+
한
|
| 2514 |
+
할
|
| 2515 |
+
함
|
| 2516 |
+
합
|
| 2517 |
+
항
|
| 2518 |
+
해
|
| 2519 |
+
햇
|
| 2520 |
+
했
|
| 2521 |
+
행
|
| 2522 |
+
허
|
| 2523 |
+
험
|
| 2524 |
+
형
|
| 2525 |
+
혜
|
| 2526 |
+
호
|
| 2527 |
+
혼
|
| 2528 |
+
홀
|
| 2529 |
+
화
|
| 2530 |
+
회
|
| 2531 |
+
획
|
| 2532 |
+
후
|
| 2533 |
+
휴
|
| 2534 |
+
흐
|
| 2535 |
+
흔
|
| 2536 |
+
희
|
| 2537 |
+
히
|
| 2538 |
+
힘
|
| 2539 |
+
ﷺ
|
| 2540 |
+
ﷻ
|
| 2541 |
+
!
|
| 2542 |
+
,
|
| 2543 |
+
?
|
| 2544 |
+
�
|
| 2545 |
+
𠮶
|
src/f5_tts/infer/infer_cli.py
ADDED
|
@@ -0,0 +1,388 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import codecs
|
| 3 |
+
import os
|
| 4 |
+
import re
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
from importlib.resources import files
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
import numpy as np
|
| 10 |
+
import soundfile as sf
|
| 11 |
+
import tomli
|
| 12 |
+
from cached_path import cached_path
|
| 13 |
+
from hydra.utils import get_class
|
| 14 |
+
from omegaconf import OmegaConf
|
| 15 |
+
from unidecode import unidecode
|
| 16 |
+
|
| 17 |
+
from f5_tts.infer.utils_infer import (
|
| 18 |
+
cfg_strength,
|
| 19 |
+
cross_fade_duration,
|
| 20 |
+
device,
|
| 21 |
+
fix_duration,
|
| 22 |
+
infer_process,
|
| 23 |
+
load_model,
|
| 24 |
+
load_vocoder,
|
| 25 |
+
mel_spec_type,
|
| 26 |
+
nfe_step,
|
| 27 |
+
preprocess_ref_audio_text,
|
| 28 |
+
remove_silence_for_generated_wav,
|
| 29 |
+
speed,
|
| 30 |
+
sway_sampling_coef,
|
| 31 |
+
target_rms,
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
parser = argparse.ArgumentParser(
|
| 36 |
+
prog="python3 infer-cli.py",
|
| 37 |
+
description="Commandline interface for E2/F5 TTS with Advanced Batch Processing.",
|
| 38 |
+
epilog="Specify options above to override one or more settings from config.",
|
| 39 |
+
)
|
| 40 |
+
parser.add_argument(
|
| 41 |
+
"-c",
|
| 42 |
+
"--config",
|
| 43 |
+
type=str,
|
| 44 |
+
default=os.path.join(files("f5_tts").joinpath("infer/examples/basic"), "basic.toml"),
|
| 45 |
+
help="The configuration file, default see infer/examples/basic/basic.toml",
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# Note. Not to provide default value here in order to read default from config file
|
| 50 |
+
|
| 51 |
+
parser.add_argument(
|
| 52 |
+
"-m",
|
| 53 |
+
"--model",
|
| 54 |
+
type=str,
|
| 55 |
+
help="The model name: F5TTS_v1_Base | F5TTS_Base | E2TTS_Base | etc.",
|
| 56 |
+
)
|
| 57 |
+
parser.add_argument(
|
| 58 |
+
"-mc",
|
| 59 |
+
"--model_cfg",
|
| 60 |
+
type=str,
|
| 61 |
+
help="The path to F5-TTS model config file .yaml",
|
| 62 |
+
)
|
| 63 |
+
parser.add_argument(
|
| 64 |
+
"-p",
|
| 65 |
+
"--ckpt_file",
|
| 66 |
+
type=str,
|
| 67 |
+
help="The path to model checkpoint .pt, leave blank to use default",
|
| 68 |
+
)
|
| 69 |
+
parser.add_argument(
|
| 70 |
+
"--use_ema",
|
| 71 |
+
action="store_true",
|
| 72 |
+
help="To use ema model",
|
| 73 |
+
)
|
| 74 |
+
parser.add_argument(
|
| 75 |
+
"-v",
|
| 76 |
+
"--vocab_file",
|
| 77 |
+
type=str,
|
| 78 |
+
help="The path to vocab file .txt, leave blank to use default",
|
| 79 |
+
)
|
| 80 |
+
parser.add_argument(
|
| 81 |
+
"-r",
|
| 82 |
+
"--ref_audio",
|
| 83 |
+
type=str,
|
| 84 |
+
help="The reference audio file.",
|
| 85 |
+
)
|
| 86 |
+
parser.add_argument(
|
| 87 |
+
"-s",
|
| 88 |
+
"--ref_text",
|
| 89 |
+
type=str,
|
| 90 |
+
help="The transcript/subtitle for the reference audio",
|
| 91 |
+
)
|
| 92 |
+
parser.add_argument(
|
| 93 |
+
"-t",
|
| 94 |
+
"--gen_text",
|
| 95 |
+
type=str,
|
| 96 |
+
help="The text to make model synthesize a speech",
|
| 97 |
+
)
|
| 98 |
+
parser.add_argument(
|
| 99 |
+
"-f",
|
| 100 |
+
"--gen_file",
|
| 101 |
+
type=str,
|
| 102 |
+
help="The file with text to generate, will ignore --gen_text",
|
| 103 |
+
)
|
| 104 |
+
parser.add_argument(
|
| 105 |
+
"-o",
|
| 106 |
+
"--output_dir",
|
| 107 |
+
type=str,
|
| 108 |
+
help="The path to output folder",
|
| 109 |
+
)
|
| 110 |
+
parser.add_argument(
|
| 111 |
+
"-w",
|
| 112 |
+
"--output_file",
|
| 113 |
+
type=str,
|
| 114 |
+
help="The name of output file",
|
| 115 |
+
)
|
| 116 |
+
parser.add_argument(
|
| 117 |
+
"--save_chunk",
|
| 118 |
+
action="store_true",
|
| 119 |
+
help="To save each audio chunks during inference",
|
| 120 |
+
)
|
| 121 |
+
parser.add_argument(
|
| 122 |
+
"--no_legacy_text",
|
| 123 |
+
action="store_false",
|
| 124 |
+
help="Not to use lossy ASCII transliterations of unicode text in saved file names.",
|
| 125 |
+
)
|
| 126 |
+
parser.add_argument(
|
| 127 |
+
"--remove_silence",
|
| 128 |
+
action="store_true",
|
| 129 |
+
help="To remove long silence found in ouput",
|
| 130 |
+
)
|
| 131 |
+
parser.add_argument(
|
| 132 |
+
"--load_vocoder_from_local",
|
| 133 |
+
action="store_true",
|
| 134 |
+
help="To load vocoder from local dir, default to ../checkpoints/vocos-mel-24khz",
|
| 135 |
+
)
|
| 136 |
+
parser.add_argument(
|
| 137 |
+
"--vocoder_name",
|
| 138 |
+
type=str,
|
| 139 |
+
choices=["vocos", "bigvgan"],
|
| 140 |
+
help=f"Used vocoder name: vocos | bigvgan, default {mel_spec_type}",
|
| 141 |
+
)
|
| 142 |
+
parser.add_argument(
|
| 143 |
+
"--target_rms",
|
| 144 |
+
type=float,
|
| 145 |
+
help=f"Target output speech loudness normalization value, default {target_rms}",
|
| 146 |
+
)
|
| 147 |
+
parser.add_argument(
|
| 148 |
+
"--cross_fade_duration",
|
| 149 |
+
type=float,
|
| 150 |
+
help=f"Duration of cross-fade between audio segments in seconds, default {cross_fade_duration}",
|
| 151 |
+
)
|
| 152 |
+
parser.add_argument(
|
| 153 |
+
"--nfe_step",
|
| 154 |
+
type=int,
|
| 155 |
+
help=f"The number of function evaluation (denoising steps), default {nfe_step}",
|
| 156 |
+
)
|
| 157 |
+
parser.add_argument(
|
| 158 |
+
"--cfg_strength",
|
| 159 |
+
type=float,
|
| 160 |
+
help=f"Classifier-free guidance strength, default {cfg_strength}",
|
| 161 |
+
)
|
| 162 |
+
parser.add_argument(
|
| 163 |
+
"--sway_sampling_coef",
|
| 164 |
+
type=float,
|
| 165 |
+
help=f"Sway Sampling coefficient, default {sway_sampling_coef}",
|
| 166 |
+
)
|
| 167 |
+
parser.add_argument(
|
| 168 |
+
"--speed",
|
| 169 |
+
type=float,
|
| 170 |
+
help=f"The speed of the generated audio, default {speed}",
|
| 171 |
+
)
|
| 172 |
+
parser.add_argument(
|
| 173 |
+
"--fix_duration",
|
| 174 |
+
type=float,
|
| 175 |
+
help=f"Fix the total duration (ref and gen audios) in seconds, default {fix_duration}",
|
| 176 |
+
)
|
| 177 |
+
parser.add_argument(
|
| 178 |
+
"--device",
|
| 179 |
+
type=str,
|
| 180 |
+
help="Specify the device to run on",
|
| 181 |
+
)
|
| 182 |
+
args = parser.parse_args()
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
# config file
|
| 186 |
+
|
| 187 |
+
config = tomli.load(open(args.config, "rb"))
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
# command-line interface parameters
|
| 191 |
+
|
| 192 |
+
model = args.model or config.get("model", "F5TTS_v1_Base")
|
| 193 |
+
ckpt_file = args.ckpt_file or config.get("ckpt_file", "")
|
| 194 |
+
vocab_file = args.vocab_file or config.get("vocab_file", "")
|
| 195 |
+
|
| 196 |
+
ref_audio = args.ref_audio or config.get("ref_audio", "infer/examples/basic/basic_ref_en.wav")
|
| 197 |
+
ref_text = (
|
| 198 |
+
args.ref_text
|
| 199 |
+
if args.ref_text is not None
|
| 200 |
+
else config.get("ref_text", "Some call me nature, others call me mother nature.")
|
| 201 |
+
)
|
| 202 |
+
gen_text = args.gen_text or config.get("gen_text", "Here we generate something just for test.")
|
| 203 |
+
gen_file = args.gen_file or config.get("gen_file", "")
|
| 204 |
+
|
| 205 |
+
output_dir = args.output_dir or config.get("output_dir", "tests")
|
| 206 |
+
output_file = args.output_file or config.get(
|
| 207 |
+
"output_file", f"infer_cli_{datetime.now().strftime(r'%Y%m%d_%H%M%S')}.wav"
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
save_chunk = args.save_chunk or config.get("save_chunk", False)
|
| 211 |
+
use_legacy_text = args.no_legacy_text or config.get("no_legacy_text", False) # no_legacy_text is a store_false arg
|
| 212 |
+
if save_chunk and use_legacy_text:
|
| 213 |
+
print(
|
| 214 |
+
"\nWarning to --save_chunk: lossy ASCII transliterations of unicode text for legacy (.wav) file names, --no_legacy_text to disable.\n"
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
remove_silence = args.remove_silence or config.get("remove_silence", False)
|
| 218 |
+
load_vocoder_from_local = args.load_vocoder_from_local or config.get("load_vocoder_from_local", False)
|
| 219 |
+
|
| 220 |
+
vocoder_name = args.vocoder_name or config.get("vocoder_name", mel_spec_type)
|
| 221 |
+
target_rms = args.target_rms or config.get("target_rms", target_rms)
|
| 222 |
+
cross_fade_duration = args.cross_fade_duration or config.get("cross_fade_duration", cross_fade_duration)
|
| 223 |
+
nfe_step = args.nfe_step or config.get("nfe_step", nfe_step)
|
| 224 |
+
cfg_strength = args.cfg_strength or config.get("cfg_strength", cfg_strength)
|
| 225 |
+
sway_sampling_coef = args.sway_sampling_coef or config.get("sway_sampling_coef", sway_sampling_coef)
|
| 226 |
+
speed = args.speed or config.get("speed", speed)
|
| 227 |
+
fix_duration = args.fix_duration or config.get("fix_duration", fix_duration)
|
| 228 |
+
device = args.device or config.get("device", device)
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
# patches for pip pkg user
|
| 232 |
+
if "infer/examples/" in ref_audio:
|
| 233 |
+
ref_audio = str(files("f5_tts").joinpath(f"{ref_audio}"))
|
| 234 |
+
if "infer/examples/" in gen_file:
|
| 235 |
+
gen_file = str(files("f5_tts").joinpath(f"{gen_file}"))
|
| 236 |
+
if "voices" in config:
|
| 237 |
+
for voice in config["voices"]:
|
| 238 |
+
voice_ref_audio = config["voices"][voice]["ref_audio"]
|
| 239 |
+
if "infer/examples/" in voice_ref_audio:
|
| 240 |
+
config["voices"][voice]["ref_audio"] = str(files("f5_tts").joinpath(f"{voice_ref_audio}"))
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
# ignore gen_text if gen_file provided
|
| 244 |
+
|
| 245 |
+
if gen_file:
|
| 246 |
+
gen_text = codecs.open(gen_file, "r", "utf-8").read()
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
# output path
|
| 250 |
+
|
| 251 |
+
wave_path = Path(output_dir) / output_file
|
| 252 |
+
# spectrogram_path = Path(output_dir) / "infer_cli_out.png"
|
| 253 |
+
if save_chunk:
|
| 254 |
+
output_chunk_dir = os.path.join(output_dir, f"{Path(output_file).stem}_chunks")
|
| 255 |
+
if not os.path.exists(output_chunk_dir):
|
| 256 |
+
os.makedirs(output_chunk_dir)
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
# load vocoder
|
| 260 |
+
|
| 261 |
+
if vocoder_name == "vocos":
|
| 262 |
+
vocoder_local_path = "../checkpoints/vocos-mel-24khz"
|
| 263 |
+
elif vocoder_name == "bigvgan":
|
| 264 |
+
vocoder_local_path = "../checkpoints/bigvgan_v2_24khz_100band_256x"
|
| 265 |
+
|
| 266 |
+
vocoder = load_vocoder(
|
| 267 |
+
vocoder_name=vocoder_name, is_local=load_vocoder_from_local, local_path=vocoder_local_path, device=device
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
|
| 271 |
+
# load TTS model
|
| 272 |
+
|
| 273 |
+
model_cfg = OmegaConf.load(
|
| 274 |
+
args.model_cfg or config.get("model_cfg", str(files("f5_tts").joinpath(f"configs/{model}.yaml")))
|
| 275 |
+
)
|
| 276 |
+
model_cls = get_class(f"f5_tts.model.{model_cfg.model.backbone}")
|
| 277 |
+
model_arc = model_cfg.model.arch
|
| 278 |
+
|
| 279 |
+
repo_name, ckpt_step, ckpt_type = "F5-TTS", 1250000, "safetensors"
|
| 280 |
+
|
| 281 |
+
if model != "F5TTS_Base":
|
| 282 |
+
assert vocoder_name == model_cfg.model.mel_spec.mel_spec_type
|
| 283 |
+
|
| 284 |
+
# override for previous models
|
| 285 |
+
if model == "F5TTS_Base":
|
| 286 |
+
if vocoder_name == "vocos":
|
| 287 |
+
ckpt_step = 1200000
|
| 288 |
+
elif vocoder_name == "bigvgan":
|
| 289 |
+
model = "F5TTS_Base_bigvgan"
|
| 290 |
+
ckpt_type = "pt"
|
| 291 |
+
elif model == "E2TTS_Base":
|
| 292 |
+
repo_name = "E2-TTS"
|
| 293 |
+
ckpt_step = 1200000
|
| 294 |
+
|
| 295 |
+
if not ckpt_file:
|
| 296 |
+
ckpt_file = str(cached_path(f"hf://SWivid/{repo_name}/{model}/model_{ckpt_step}.{ckpt_type}"))
|
| 297 |
+
|
| 298 |
+
print(f"Using {model}...")
|
| 299 |
+
ema_model = load_model(
|
| 300 |
+
model_cls, model_arc, ckpt_file, mel_spec_type=vocoder_name, vocab_file=vocab_file, device=device, use_ema=args.use_ema,
|
| 301 |
+
)
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
# inference process
|
| 305 |
+
|
| 306 |
+
|
| 307 |
+
def main():
|
| 308 |
+
main_voice = {"ref_audio": ref_audio, "ref_text": ref_text}
|
| 309 |
+
if "voices" not in config:
|
| 310 |
+
voices = {"main": main_voice}
|
| 311 |
+
else:
|
| 312 |
+
voices = config["voices"]
|
| 313 |
+
voices["main"] = main_voice
|
| 314 |
+
for voice in voices:
|
| 315 |
+
print("Voice:", voice)
|
| 316 |
+
print("ref_audio ", voices[voice]["ref_audio"])
|
| 317 |
+
voices[voice]["ref_audio"], voices[voice]["ref_text"] = preprocess_ref_audio_text(
|
| 318 |
+
voices[voice]["ref_audio"], voices[voice]["ref_text"]
|
| 319 |
+
)
|
| 320 |
+
print("ref_audio_", voices[voice]["ref_audio"], "\n\n")
|
| 321 |
+
|
| 322 |
+
generated_audio_segments = []
|
| 323 |
+
reg1 = r"(?=\[\w+\])"
|
| 324 |
+
chunks = re.split(reg1, gen_text)
|
| 325 |
+
reg2 = r"\[(\w+)\]"
|
| 326 |
+
for text in chunks:
|
| 327 |
+
if not text.strip():
|
| 328 |
+
continue
|
| 329 |
+
match = re.match(reg2, text)
|
| 330 |
+
if match:
|
| 331 |
+
voice = match[1]
|
| 332 |
+
else:
|
| 333 |
+
print("No voice tag found, using main.")
|
| 334 |
+
voice = "main"
|
| 335 |
+
if voice not in voices:
|
| 336 |
+
print(f"Voice {voice} not found, using main.")
|
| 337 |
+
voice = "main"
|
| 338 |
+
text = re.sub(reg2, "", text)
|
| 339 |
+
ref_audio_ = voices[voice]["ref_audio"]
|
| 340 |
+
ref_text_ = voices[voice]["ref_text"]
|
| 341 |
+
local_speed = voices[voice].get("speed", speed)
|
| 342 |
+
gen_text_ = text.strip()
|
| 343 |
+
print(f"Voice: {voice}")
|
| 344 |
+
audio_segment, final_sample_rate, spectrogram = infer_process(
|
| 345 |
+
ref_audio_,
|
| 346 |
+
ref_text_,
|
| 347 |
+
gen_text_,
|
| 348 |
+
ema_model,
|
| 349 |
+
vocoder,
|
| 350 |
+
mel_spec_type=vocoder_name,
|
| 351 |
+
target_rms=target_rms,
|
| 352 |
+
cross_fade_duration=cross_fade_duration,
|
| 353 |
+
nfe_step=nfe_step,
|
| 354 |
+
cfg_strength=cfg_strength,
|
| 355 |
+
sway_sampling_coef=sway_sampling_coef,
|
| 356 |
+
speed=local_speed,
|
| 357 |
+
fix_duration=fix_duration,
|
| 358 |
+
device=device,
|
| 359 |
+
)
|
| 360 |
+
generated_audio_segments.append(audio_segment)
|
| 361 |
+
|
| 362 |
+
if save_chunk:
|
| 363 |
+
if len(gen_text_) > 200:
|
| 364 |
+
gen_text_ = gen_text_[:200] + " ... "
|
| 365 |
+
if use_legacy_text:
|
| 366 |
+
gen_text_ = unidecode(gen_text_)
|
| 367 |
+
sf.write(
|
| 368 |
+
os.path.join(output_chunk_dir, f"{len(generated_audio_segments) - 1}_{gen_text_}.wav"),
|
| 369 |
+
audio_segment,
|
| 370 |
+
final_sample_rate,
|
| 371 |
+
)
|
| 372 |
+
|
| 373 |
+
if generated_audio_segments:
|
| 374 |
+
final_wave = np.concatenate(generated_audio_segments)
|
| 375 |
+
|
| 376 |
+
if not os.path.exists(output_dir):
|
| 377 |
+
os.makedirs(output_dir)
|
| 378 |
+
|
| 379 |
+
with open(wave_path, "wb") as f:
|
| 380 |
+
sf.write(f.name, final_wave, final_sample_rate)
|
| 381 |
+
# Remove silence
|
| 382 |
+
if remove_silence:
|
| 383 |
+
remove_silence_for_generated_wav(f.name)
|
| 384 |
+
print(f.name)
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
if __name__ == "__main__":
|
| 388 |
+
main()
|
src/f5_tts/infer/infer_cli_emotion.py
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""CLI emotion inference using F5-TTS-Emotional-CFG (emotion-conditioned)."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import argparse
|
| 6 |
+
import time
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
import torch
|
| 10 |
+
import torchaudio
|
| 11 |
+
|
| 12 |
+
from f5_tts.infer.infer_emotion import (
|
| 13 |
+
CFM,
|
| 14 |
+
CFMConditioned,
|
| 15 |
+
DiT,
|
| 16 |
+
DiTConditioned,
|
| 17 |
+
TTSModel,
|
| 18 |
+
compute_mel_from_wav,
|
| 19 |
+
cfg_strength,
|
| 20 |
+
hop_length,
|
| 21 |
+
mel_spec_type,
|
| 22 |
+
n_fft,
|
| 23 |
+
n_mel_channels,
|
| 24 |
+
nfe_step,
|
| 25 |
+
sway_sampling_coef,
|
| 26 |
+
target_sample_rate,
|
| 27 |
+
tokenizer,
|
| 28 |
+
win_length,
|
| 29 |
+
)
|
| 30 |
+
from f5_tts.model.utils import get_tokenizer
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def build_arg_parser() -> argparse.ArgumentParser:
|
| 34 |
+
p = argparse.ArgumentParser(
|
| 35 |
+
description="CLI emotion inference using F5-TTS-Emotional-CFG (emotion-conditioned)."
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# --- Reference inputs ---
|
| 39 |
+
p.add_argument(
|
| 40 |
+
"-ref",
|
| 41 |
+
"--ref-audio-path",
|
| 42 |
+
type=str,
|
| 43 |
+
required=True,
|
| 44 |
+
help="Path to reference .wav for voice cloning.",
|
| 45 |
+
)
|
| 46 |
+
p.add_argument(
|
| 47 |
+
"-rt",
|
| 48 |
+
"--ref-text",
|
| 49 |
+
type=str,
|
| 50 |
+
required=True,
|
| 51 |
+
help="Transcription text for the reference audio.",
|
| 52 |
+
)
|
| 53 |
+
p.add_argument(
|
| 54 |
+
"-re",
|
| 55 |
+
"--ref-emotion",
|
| 56 |
+
type=str,
|
| 57 |
+
default="Neutral",
|
| 58 |
+
choices=["Angry", "Surprise", "Neutral", "Sad", "Happy"],
|
| 59 |
+
help='Reference emotion label (emotion in the reference audio).',
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
# --- Inference target ---
|
| 63 |
+
p.add_argument(
|
| 64 |
+
"-it",
|
| 65 |
+
"--inference-text",
|
| 66 |
+
type=str,
|
| 67 |
+
required=True,
|
| 68 |
+
help="New text to synthesize (will be appended after ref_text).",
|
| 69 |
+
)
|
| 70 |
+
p.add_argument(
|
| 71 |
+
"-ie",
|
| 72 |
+
"--inference-emotion",
|
| 73 |
+
type=str,
|
| 74 |
+
required=True,
|
| 75 |
+
choices=["Angry", "Surprise", "Neutral", "Sad", "Happy"],
|
| 76 |
+
help='Target emotion label for the new speech.',
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# --- Output ---
|
| 80 |
+
p.add_argument(
|
| 81 |
+
"-o",
|
| 82 |
+
"--output-path",
|
| 83 |
+
type=str,
|
| 84 |
+
default="data/output.wav",
|
| 85 |
+
help="Path to the generated audio (.wav).",
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
# --- Checkpoints ---
|
| 89 |
+
p.add_argument(
|
| 90 |
+
"--checkpoint-path-emotion",
|
| 91 |
+
type=str,
|
| 92 |
+
default="ckpts/model_emo.pt",
|
| 93 |
+
help="Path to the trained emotion-conditioned model checkpoint (.pt).",
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
# --- Tokenizer / vocab ---
|
| 97 |
+
p.add_argument(
|
| 98 |
+
"--vocab-dataset-name",
|
| 99 |
+
type=str,
|
| 100 |
+
default="EmiliaPetite_dataset_ZH_EN",
|
| 101 |
+
help="Dataset name used for tokenizer building.",
|
| 102 |
+
)
|
| 103 |
+
p.add_argument(
|
| 104 |
+
"--tokenizer",
|
| 105 |
+
type=str,
|
| 106 |
+
default=tokenizer,
|
| 107 |
+
choices=["pinyin", "char", "custom"],
|
| 108 |
+
help="Tokenizer type.",
|
| 109 |
+
)
|
| 110 |
+
p.add_argument(
|
| 111 |
+
"--tokenizer-path",
|
| 112 |
+
type=str,
|
| 113 |
+
default=None,
|
| 114 |
+
help="Path to custom tokenizer vocab.txt (if tokenizer='custom').",
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
# --- Sampling / guidance params ---
|
| 118 |
+
p.add_argument(
|
| 119 |
+
"--nfe",
|
| 120 |
+
type=int,
|
| 121 |
+
default=nfe_step,
|
| 122 |
+
help="# function evaluations (steps). Lower = faster, lower quality.",
|
| 123 |
+
)
|
| 124 |
+
p.add_argument(
|
| 125 |
+
"--cfg-strength",
|
| 126 |
+
type=float,
|
| 127 |
+
default=cfg_strength,
|
| 128 |
+
help="Classifier-free guidance for content/text.",
|
| 129 |
+
)
|
| 130 |
+
p.add_argument(
|
| 131 |
+
"--cfg-strength2",
|
| 132 |
+
type=float,
|
| 133 |
+
default=10.0,
|
| 134 |
+
help="Emotion guidance strength; higher = stronger emotion, less natural.",
|
| 135 |
+
)
|
| 136 |
+
p.add_argument(
|
| 137 |
+
"--sway-sampling-coef",
|
| 138 |
+
type=float,
|
| 139 |
+
default=sway_sampling_coef,
|
| 140 |
+
help="Sway sampling coefficient.",
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
# --- Emotion conditioning block ---
|
| 144 |
+
p.add_argument(
|
| 145 |
+
"--emotion-condition-type",
|
| 146 |
+
type=str,
|
| 147 |
+
default="text_mirror",
|
| 148 |
+
choices=["text_mirror", "cross_attention", "text_early_fusion"],
|
| 149 |
+
help="How emotion is injected into the transformer.",
|
| 150 |
+
)
|
| 151 |
+
p.add_argument(
|
| 152 |
+
"--emotion-dim",
|
| 153 |
+
type=int,
|
| 154 |
+
default=128,
|
| 155 |
+
help="Dimension of emotion embedding.",
|
| 156 |
+
)
|
| 157 |
+
p.add_argument(
|
| 158 |
+
"--emotion-conv-layers",
|
| 159 |
+
type=int,
|
| 160 |
+
default=4,
|
| 161 |
+
help="# of conv layers used in emotion path.",
|
| 162 |
+
)
|
| 163 |
+
p.add_argument(
|
| 164 |
+
"--init-type",
|
| 165 |
+
type=str,
|
| 166 |
+
default="xavier_reduced",
|
| 167 |
+
help="(text_mirror only) initialization method for new emotion weights.",
|
| 168 |
+
)
|
| 169 |
+
p.add_argument(
|
| 170 |
+
"--weight-reduction-scale",
|
| 171 |
+
type=float,
|
| 172 |
+
default=1.0,
|
| 173 |
+
help="(text_mirror only) scale for reduced Xavier init.",
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
# --- Audio & mel spec ---
|
| 177 |
+
p.add_argument(
|
| 178 |
+
"--mel-spec-type",
|
| 179 |
+
type=str,
|
| 180 |
+
default=mel_spec_type,
|
| 181 |
+
choices=["vocos", "bigvgan"],
|
| 182 |
+
help="Vocoder/mel type.",
|
| 183 |
+
)
|
| 184 |
+
p.add_argument("--target-sr", type=int, default=target_sample_rate)
|
| 185 |
+
p.add_argument("--n-mel", type=int, default=n_mel_channels)
|
| 186 |
+
p.add_argument("--n-fft", type=int, default=n_fft)
|
| 187 |
+
p.add_argument("--hop-length", type=int, default=hop_length)
|
| 188 |
+
p.add_argument("--win-length", type=int, default=win_length)
|
| 189 |
+
|
| 190 |
+
# --- Device ---
|
| 191 |
+
p.add_argument(
|
| 192 |
+
"--device",
|
| 193 |
+
type=str,
|
| 194 |
+
default="cuda",
|
| 195 |
+
choices=["cuda", "mps", "cpu"],
|
| 196 |
+
help="Inference device.",
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
return p
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
def main():
|
| 203 |
+
args = build_arg_parser().parse_args()
|
| 204 |
+
|
| 205 |
+
mel_spec_kwargs = dict(
|
| 206 |
+
n_fft=args.n_fft,
|
| 207 |
+
hop_length=args.hop_length,
|
| 208 |
+
win_length=args.win_length,
|
| 209 |
+
n_mel_channels=args.n_mel,
|
| 210 |
+
target_sample_rate=args.target_sr,
|
| 211 |
+
mel_spec_type=args.mel_spec_type,
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
if args.tokenizer == "custom":
|
| 215 |
+
if not args.tokenizer_path:
|
| 216 |
+
raise ValueError("tokenizer='custom' requires --tokenizer-path (vocab.txt).")
|
| 217 |
+
vocab_char_map, vocab_size = get_tokenizer(args.tokenizer_path, args.tokenizer)
|
| 218 |
+
else:
|
| 219 |
+
vocab_char_map, vocab_size = get_tokenizer(args.vocab_dataset_name, args.tokenizer)
|
| 220 |
+
|
| 221 |
+
emotion_conditioning_parameters = {
|
| 222 |
+
"emotion_condition_type": args.emotion_condition_type,
|
| 223 |
+
"init_type": args.init_type,
|
| 224 |
+
"weight_reduction_scale": args.weight_reduction_scale,
|
| 225 |
+
"emotion_dim": args.emotion_dim,
|
| 226 |
+
"emotion_conv_layers": args.emotion_conv_layers,
|
| 227 |
+
"load_emotion_weights": False,
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
model_cfg_emotion = dict(
|
| 231 |
+
dim=1024,
|
| 232 |
+
depth=22,
|
| 233 |
+
heads=16,
|
| 234 |
+
ff_mult=2,
|
| 235 |
+
text_dim=512,
|
| 236 |
+
emotion_dim=args.emotion_dim,
|
| 237 |
+
conv_layers=args.emotion_conv_layers,
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
transformer = DiTConditioned(
|
| 241 |
+
**model_cfg_emotion,
|
| 242 |
+
text_num_embeds=vocab_size,
|
| 243 |
+
mel_dim=args.n_mel,
|
| 244 |
+
emotion_conditioning=emotion_conditioning_parameters,
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
model_emotion = CFMConditioned(
|
| 248 |
+
transformer=transformer,
|
| 249 |
+
mel_spec_kwargs=mel_spec_kwargs,
|
| 250 |
+
vocab_char_map=vocab_char_map,
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
tts = TTSModel(
|
| 254 |
+
model=model_emotion,
|
| 255 |
+
vocoder_name=args.mel_spec_type,
|
| 256 |
+
checkpoint_path=args.checkpoint_path_emotion,
|
| 257 |
+
emotion_conditioning_parameters=emotion_conditioning_parameters,
|
| 258 |
+
device=args.device,
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
mel = compute_mel_from_wav(args.ref_audio_path, mel_spec_kwargs, device=args.device)
|
| 262 |
+
|
| 263 |
+
start = time.perf_counter()
|
| 264 |
+
gen_mel, gen_audio = tts.infer(
|
| 265 |
+
inference_text=args.inference_text,
|
| 266 |
+
inference_emotion=args.inference_emotion,
|
| 267 |
+
ref_mel=mel,
|
| 268 |
+
ref_text=args.ref_text,
|
| 269 |
+
ref_emotion=args.ref_emotion,
|
| 270 |
+
steps=args.nfe,
|
| 271 |
+
cfg_strength=args.cfg_strength,
|
| 272 |
+
cfg_strength2=args.cfg_strength2,
|
| 273 |
+
sway_sampling_coef=args.sway_sampling_coef,
|
| 274 |
+
)
|
| 275 |
+
dur = time.perf_counter() - start
|
| 276 |
+
|
| 277 |
+
outpath = Path(args.output_path)
|
| 278 |
+
outpath.parent.mkdir(parents=True, exist_ok=True)
|
| 279 |
+
torchaudio.save(str(outpath), gen_audio.cpu(), args.target_sr)
|
| 280 |
+
|
| 281 |
+
print(f"[OK] Saved: {outpath} | duration: {dur:.2f}s")
|
| 282 |
+
print(f" Inference emotion: {args.inference_emotion}")
|
| 283 |
+
print(f" Steps (nfe): {args.nfe} | cfg_strength: {args.cfg_strength} | cfg_strength2: {args.cfg_strength2}")
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
if __name__ == "__main__":
|
| 287 |
+
main()
|
src/f5_tts/infer/infer_elevenlabs.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ElevenLabs Voice Cloning Inference Script
|
| 3 |
+
|
| 4 |
+
Uses ElevenLabs API to clone a voice from reference audio and generate speech.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import argparse
|
| 8 |
+
import os
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
from elevenlabs import ElevenLabs
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
parser = argparse.ArgumentParser(description="ElevenLabs Voice Cloning Inference")
|
| 19 |
+
parser.add_argument("--ref_audio", type=str, required=True, help="Path to reference audio file")
|
| 20 |
+
parser.add_argument("--ref_text", type=str, default=None, help="Reference text (unused, for API compatibility)")
|
| 21 |
+
parser.add_argument("--gen_text", type=str, required=True, help="Text to generate speech for")
|
| 22 |
+
parser.add_argument("--output_file", type=str, required=True, help="Output filename")
|
| 23 |
+
parser.add_argument("--output_dir", type=str, default=".", help="Output directory")
|
| 24 |
+
parser.add_argument("--model", type=str, default="eleven_multilingual_v2", help="ElevenLabs model ID")
|
| 25 |
+
parser.add_argument("--keep_voice", action="store_true", help="Keep cloned voice after generation")
|
| 26 |
+
args = parser.parse_args()
|
| 27 |
+
|
| 28 |
+
api_key = os.getenv("ELEVENLABS_API_KEY")
|
| 29 |
+
if not api_key:
|
| 30 |
+
raise ValueError("ELEVENLABS_API_KEY not found in environment. Add it to .env file.")
|
| 31 |
+
|
| 32 |
+
client = ElevenLabs(api_key=api_key)
|
| 33 |
+
|
| 34 |
+
# Create voice clone from reference audio (instant voice clone)
|
| 35 |
+
print(f"Creating voice clone from: {args.ref_audio}")
|
| 36 |
+
with open(args.ref_audio, "rb") as audio_file:
|
| 37 |
+
voice = client.voices.ivc.create(
|
| 38 |
+
name="temp_clone_voice",
|
| 39 |
+
files=[audio_file],
|
| 40 |
+
)
|
| 41 |
+
print(f"Voice created with ID: {voice.voice_id}")
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
# Generate speech
|
| 45 |
+
print(f"Generating speech for text: {args.gen_text[:50]}...")
|
| 46 |
+
audio_generator = client.text_to_speech.convert(
|
| 47 |
+
voice_id=voice.voice_id,
|
| 48 |
+
text=args.gen_text,
|
| 49 |
+
model_id=args.model,
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# Save output
|
| 53 |
+
output_path = Path(args.output_dir) / args.output_file
|
| 54 |
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 55 |
+
|
| 56 |
+
with open(output_path, "wb") as f:
|
| 57 |
+
for chunk in audio_generator:
|
| 58 |
+
f.write(chunk)
|
| 59 |
+
|
| 60 |
+
print(f"Audio saved to: {output_path}")
|
| 61 |
+
|
| 62 |
+
finally:
|
| 63 |
+
# Cleanup: delete the cloned voice
|
| 64 |
+
if not args.keep_voice:
|
| 65 |
+
print(f"Deleting cloned voice: {voice.voice_id}")
|
| 66 |
+
client.voices.delete(voice.voice_id)
|
| 67 |
+
print("Voice deleted.")
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
if __name__ == "__main__":
|
| 71 |
+
main()
|
src/f5_tts/infer/infer_emotion.py
ADDED
|
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Inference script for emotion-conditioned F5-TTS."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
import torchaudio
|
| 9 |
+
|
| 10 |
+
from f5_tts.model import CFM
|
| 11 |
+
from f5_tts.model.cfm_emotion import CFMConditioned
|
| 12 |
+
from f5_tts.model.backbones.dit import DiT
|
| 13 |
+
from f5_tts.model.backbones.dit_emotion import DiTConditioned
|
| 14 |
+
from f5_tts.model.backbones.unett import UNetT
|
| 15 |
+
from f5_tts.model.modules import MelSpec
|
| 16 |
+
from f5_tts.model.utils import get_tokenizer
|
| 17 |
+
from f5_tts.infer.utils_infer import cfg_strength, load_vocoder, nfe_step, sway_sampling_coef
|
| 18 |
+
|
| 19 |
+
# Dataset Settings
|
| 20 |
+
target_sample_rate = 24000
|
| 21 |
+
n_mel_channels = 100
|
| 22 |
+
hop_length = 256
|
| 23 |
+
win_length = 1024
|
| 24 |
+
n_fft = 1024
|
| 25 |
+
mel_spec_type = "vocos"
|
| 26 |
+
|
| 27 |
+
tokenizer = "pinyin"
|
| 28 |
+
tokenizer_path = None
|
| 29 |
+
|
| 30 |
+
emotion_dict = {
|
| 31 |
+
"Angry": 1,
|
| 32 |
+
"Neutral": 2,
|
| 33 |
+
"Sad": 3,
|
| 34 |
+
"Surprise": 4,
|
| 35 |
+
"Happy": 5,
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
# Model params
|
| 39 |
+
model_cls_emotion = DiTConditioned
|
| 40 |
+
model_cls_pretrained = DiT
|
| 41 |
+
model_cfg_pretrained = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def compute_mel_from_wav(
|
| 45 |
+
audio_path: str,
|
| 46 |
+
mel_spec_kwargs: dict,
|
| 47 |
+
device: str = "cpu",
|
| 48 |
+
) -> torch.Tensor:
|
| 49 |
+
"""Compute mel spectrogram from a .wav file using parameters in mel_spec_kwargs."""
|
| 50 |
+
audio, sample_rate = torchaudio.load(audio_path)
|
| 51 |
+
if audio.shape[0] > 1:
|
| 52 |
+
audio = torch.mean(audio, dim=0, keepdim=True)
|
| 53 |
+
|
| 54 |
+
if sample_rate != mel_spec_kwargs["target_sample_rate"]:
|
| 55 |
+
resampler = torchaudio.transforms.Resample(
|
| 56 |
+
orig_freq=sample_rate, new_freq=mel_spec_kwargs["target_sample_rate"]
|
| 57 |
+
)
|
| 58 |
+
audio = resampler(audio)
|
| 59 |
+
|
| 60 |
+
audio = audio.to(device)
|
| 61 |
+
|
| 62 |
+
mel_processor = MelSpec(**mel_spec_kwargs).to(device)
|
| 63 |
+
mel = mel_processor(audio)
|
| 64 |
+
return mel.squeeze(0).permute(1, 0)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class TTSModel:
|
| 68 |
+
def __init__(self, model, vocoder_name, checkpoint_path: str, emotion_conditioning_parameters, device: str = "cuda"):
|
| 69 |
+
self.device = device
|
| 70 |
+
self.model = model
|
| 71 |
+
self._load_checkpoint(checkpoint_path)
|
| 72 |
+
self.emotion_conditioning_parameters = emotion_conditioning_parameters
|
| 73 |
+
self.vocoder_name = vocoder_name
|
| 74 |
+
self.vocoder = load_vocoder(vocoder_name=self.vocoder_name)
|
| 75 |
+
|
| 76 |
+
def _load_checkpoint(self, path: str):
|
| 77 |
+
checkpoint = torch.load(path, weights_only=True, map_location="cpu")
|
| 78 |
+
|
| 79 |
+
if "step" in checkpoint:
|
| 80 |
+
for key in ["mel_spec.mel_stft.mel_scale.fb", "mel_spec.mel_stft.spectrogram.window"]:
|
| 81 |
+
if key in checkpoint["model_state_dict"]:
|
| 82 |
+
del checkpoint["model_state_dict"][key]
|
| 83 |
+
self.model.load_state_dict(checkpoint["model_state_dict"], strict=False)
|
| 84 |
+
else:
|
| 85 |
+
checkpoint["model_state_dict"] = {
|
| 86 |
+
k.replace("ema_model.", ""): v
|
| 87 |
+
for k, v in checkpoint["ema_model_state_dict"].items()
|
| 88 |
+
if k not in ["initted", "step"]
|
| 89 |
+
}
|
| 90 |
+
self.model.load_state_dict(checkpoint["model_state_dict"], strict=False)
|
| 91 |
+
|
| 92 |
+
self.model = self.model.to(self.device)
|
| 93 |
+
self.model.eval()
|
| 94 |
+
del checkpoint
|
| 95 |
+
|
| 96 |
+
def remove_leading_value(self, spec, value=0.0):
|
| 97 |
+
"""Remove leading rows of 'value' elements in a melspectrogram."""
|
| 98 |
+
gen_flat = spec[0]
|
| 99 |
+
is_row_of_ones = torch.all(gen_flat == value, dim=1)
|
| 100 |
+
num_rows_to_remove = torch.sum(is_row_of_ones).item()
|
| 101 |
+
spec = spec[:, num_rows_to_remove:, :]
|
| 102 |
+
return spec
|
| 103 |
+
|
| 104 |
+
@torch.inference_mode()
|
| 105 |
+
def infer(
|
| 106 |
+
self,
|
| 107 |
+
inference_text: str,
|
| 108 |
+
inference_emotion: str,
|
| 109 |
+
ref_mel: torch.Tensor,
|
| 110 |
+
ref_text: str,
|
| 111 |
+
ref_emotion: str,
|
| 112 |
+
steps: int,
|
| 113 |
+
cfg_strength,
|
| 114 |
+
cfg_strength2,
|
| 115 |
+
sway_sampling_coef,
|
| 116 |
+
seed: int = 50,
|
| 117 |
+
) -> torch.Tensor:
|
| 118 |
+
text_input = [ref_text + " " + inference_text]
|
| 119 |
+
emotion_input = [[ref_emotion, inference_emotion]]
|
| 120 |
+
first_phrase_length = [len(ref_text)]
|
| 121 |
+
|
| 122 |
+
mel_lengths = torch.LongTensor([ref_mel.shape[0]])
|
| 123 |
+
ref_audio_len = mel_lengths.item()
|
| 124 |
+
estimated_duration = ref_audio_len + int(ref_audio_len * len(inference_text) / len(ref_text))
|
| 125 |
+
|
| 126 |
+
start = time.perf_counter()
|
| 127 |
+
if inference_emotion is not None:
|
| 128 |
+
generated_melspec, _ = self.model.sample(
|
| 129 |
+
cond=ref_mel.to(self.device).unsqueeze(0),
|
| 130 |
+
text=text_input,
|
| 131 |
+
emotion=emotion_input,
|
| 132 |
+
first_phrase_length=first_phrase_length,
|
| 133 |
+
duration=estimated_duration,
|
| 134 |
+
steps=steps,
|
| 135 |
+
cfg_strength=cfg_strength,
|
| 136 |
+
cfg_strength2=cfg_strength2,
|
| 137 |
+
sway_sampling_coef=sway_sampling_coef,
|
| 138 |
+
seed=seed,
|
| 139 |
+
)
|
| 140 |
+
else:
|
| 141 |
+
generated_melspec, _ = self.model.sample(
|
| 142 |
+
cond=ref_mel.to(self.device).unsqueeze(0),
|
| 143 |
+
text=text_input,
|
| 144 |
+
duration=estimated_duration,
|
| 145 |
+
steps=steps,
|
| 146 |
+
cfg_strength=cfg_strength,
|
| 147 |
+
sway_sampling_coef=sway_sampling_coef,
|
| 148 |
+
seed=seed,
|
| 149 |
+
)
|
| 150 |
+
end = time.perf_counter()
|
| 151 |
+
|
| 152 |
+
generated_melspec = self.remove_leading_value(generated_melspec)
|
| 153 |
+
generated_melspec_2ndhalf = generated_melspec[:, ref_mel.shape[0] :, :]
|
| 154 |
+
|
| 155 |
+
start = time.perf_counter()
|
| 156 |
+
generated_audio = self.vocode(generated_melspec_2ndhalf)
|
| 157 |
+
end = time.perf_counter()
|
| 158 |
+
print(f"TIME vocoder ({len(text_input[0])}): ", end - start)
|
| 159 |
+
|
| 160 |
+
return generated_melspec_2ndhalf, generated_audio
|
| 161 |
+
|
| 162 |
+
def vocode(self, mel: torch.Tensor) -> torch.Tensor:
|
| 163 |
+
mel = mel.unsqueeze(0) if mel.ndim == 2 else mel
|
| 164 |
+
return self.vocoder.decode(mel.float().permute(0, 2, 1).to(self.device))
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
if __name__ == "__main__":
|
| 168 |
+
ref_audio_path = "data/0011_angry.wav"
|
| 169 |
+
ref_emotion = "Angry"
|
| 170 |
+
ref_text = "The nine, the eggs, I keep."
|
| 171 |
+
|
| 172 |
+
inference_text = "Hello, this is a text to check emotion."
|
| 173 |
+
inference_emotion = "Surprise"
|
| 174 |
+
output_path = "data/output.wav"
|
| 175 |
+
|
| 176 |
+
nfe = nfe_step
|
| 177 |
+
cfg_strength2 = 10
|
| 178 |
+
|
| 179 |
+
emotion_conditioning_parameters = {
|
| 180 |
+
"emotion_condition_type": "text_mirror",
|
| 181 |
+
"init_type": "xavier_reduced",
|
| 182 |
+
"weight_reduction_scale": 1,
|
| 183 |
+
"emotion_dim": 128,
|
| 184 |
+
"emotion_conv_layers": 4,
|
| 185 |
+
"load_emotion_weights": False,
|
| 186 |
+
}
|
| 187 |
+
|
| 188 |
+
tokenizer_path = "ckpts/vocab.txt"
|
| 189 |
+
vocab_char_map, vocab_size = get_tokenizer("EmiliaPetite_dataset_ZH_EN", "pinyin")
|
| 190 |
+
device = "cuda"
|
| 191 |
+
|
| 192 |
+
checkpoint_path_emotion = "ckpts/model_emo.pt"
|
| 193 |
+
checkpoint_path_pretrained = "ckpts/model_0.pt"
|
| 194 |
+
|
| 195 |
+
mel_spec_kwargs = dict(
|
| 196 |
+
n_fft=n_fft,
|
| 197 |
+
hop_length=hop_length,
|
| 198 |
+
win_length=win_length,
|
| 199 |
+
n_mel_channels=n_mel_channels,
|
| 200 |
+
target_sample_rate=target_sample_rate,
|
| 201 |
+
mel_spec_type=mel_spec_type,
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
model_cfg_emotion = dict(
|
| 205 |
+
dim=1024,
|
| 206 |
+
depth=22,
|
| 207 |
+
heads=16,
|
| 208 |
+
ff_mult=2,
|
| 209 |
+
text_dim=512,
|
| 210 |
+
emotion_dim=emotion_conditioning_parameters["emotion_dim"],
|
| 211 |
+
conv_layers=emotion_conditioning_parameters["emotion_conv_layers"],
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
model_emotion = CFMConditioned(
|
| 215 |
+
transformer=model_cls_emotion(
|
| 216 |
+
**model_cfg_emotion,
|
| 217 |
+
text_num_embeds=vocab_size,
|
| 218 |
+
mel_dim=n_mel_channels,
|
| 219 |
+
emotion_conditioning=emotion_conditioning_parameters,
|
| 220 |
+
),
|
| 221 |
+
mel_spec_kwargs=mel_spec_kwargs,
|
| 222 |
+
vocab_char_map=vocab_char_map,
|
| 223 |
+
)
|
| 224 |
+
model_wrapper_emotion = TTSModel(
|
| 225 |
+
model_emotion, mel_spec_type, checkpoint_path_emotion, emotion_conditioning_parameters, device
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
model_pretrained = CFM(
|
| 229 |
+
transformer=model_cls_pretrained(**model_cfg_pretrained, text_num_embeds=vocab_size, mel_dim=n_mel_channels),
|
| 230 |
+
mel_spec_kwargs=mel_spec_kwargs,
|
| 231 |
+
vocab_char_map=vocab_char_map,
|
| 232 |
+
)
|
| 233 |
+
model_wrapper_pretrained = TTSModel(
|
| 234 |
+
model_pretrained, mel_spec_type, checkpoint_path_pretrained, emotion_conditioning_parameters, device
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
mel = compute_mel_from_wav(ref_audio_path, mel_spec_kwargs, device="cuda")
|
| 238 |
+
|
| 239 |
+
generated_melspec, generated_audio = model_wrapper_emotion.infer(
|
| 240 |
+
inference_text=inference_text,
|
| 241 |
+
inference_emotion=inference_emotion,
|
| 242 |
+
ref_mel=mel,
|
| 243 |
+
ref_text=ref_text,
|
| 244 |
+
ref_emotion=ref_emotion,
|
| 245 |
+
steps=nfe,
|
| 246 |
+
cfg_strength=cfg_strength,
|
| 247 |
+
cfg_strength2=cfg_strength2,
|
| 248 |
+
sway_sampling_coef=sway_sampling_coef,
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
torchaudio.save(output_path.replace(".wav", f"_{inference_emotion}.wav"), generated_audio.cpu(), target_sample_rate)
|
| 252 |
+
|
| 253 |
+
generated_melspec, generated_audio = model_wrapper_pretrained.infer(
|
| 254 |
+
inference_text=inference_text,
|
| 255 |
+
inference_emotion=None,
|
| 256 |
+
ref_mel=mel,
|
| 257 |
+
ref_text=ref_text,
|
| 258 |
+
ref_emotion=None,
|
| 259 |
+
steps=nfe,
|
| 260 |
+
cfg_strength=cfg_strength,
|
| 261 |
+
cfg_strength2=None,
|
| 262 |
+
sway_sampling_coef=sway_sampling_coef,
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
torchaudio.save(output_path.replace(".wav", "_NOemotion.wav"), generated_audio.cpu(), target_sample_rate)
|