voice / f5_tts_loader.py
minhanh1122's picture
Upload 3 files
330f4a2 verified
raw
history blame contribute delete
987 Bytes
import torch
import torchaudio
from transformers import AutoTokenizer
class F5TTS:
def __init__(self, model_name="hynt/F5-TTS-Vietnamese-100h"):
self.device = torch.device("cpu")
# Load tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
# Load model weights
self.model = torch.load(
self._download_model(model_name),
map_location=self.device
)
self.model.eval()
def _download_model(self, repo):
"""Download model file from HuggingFace repo."""
from huggingface_hub import hf_hub_download
return hf_hub_download(repo_id=repo, filename="model.safetensors")
def tts(self, text, sample_rate=22050):
tokens = self.tokenizer(text, return_tensors="pt")["input_ids"].to(self.device)
with torch.no_grad():
audio = self.model.generate(tokens)[0].cpu().numpy()
return audio, sample_rate