minhanh1122 commited on
Commit
330f4a2
·
verified ·
1 Parent(s): 71cc66e

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +31 -0
  2. f5_tts_loader.py +32 -0
  3. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import soundfile as sf
3
+ import tempfile
4
+ from f5_tts_loader import F5TTS
5
+
6
+ tts = F5TTS("hynt/F5-TTS-Vietnamese-100h")
7
+
8
+
9
+ def generate_speech(text):
10
+ if not text.strip():
11
+ return None
12
+
13
+ audio, sr = tts.tts(text)
14
+
15
+ # Save to temp WAV
16
+ out_path = tempfile.mktemp(suffix=".wav")
17
+ sf.write(out_path, audio, sr)
18
+
19
+ return out_path
20
+
21
+
22
+ with gr.Blocks(title="Vietnamese TTS Free") as demo:
23
+ gr.Markdown("# 🇻🇳 Vietnamese Text-to-Speech (F5-TTS, Free)\nNhập tiếng Việt để tạo giọng nói:")
24
+
25
+ text_input = gr.Textbox(lines=5, label="Văn bản")
26
+ audio_output = gr.Audio(label="Âm thanh", type="filepath")
27
+ btn = gr.Button("🎤 Convert")
28
+
29
+ btn.click(fn=generate_speech, inputs=text_input, outputs=audio_output)
30
+
31
+ demo.launch()
f5_tts_loader.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ from transformers import AutoTokenizer
4
+
5
+
6
+ class F5TTS:
7
+ def __init__(self, model_name="hynt/F5-TTS-Vietnamese-100h"):
8
+ self.device = torch.device("cpu")
9
+
10
+ # Load tokenizer
11
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+
13
+ # Load model weights
14
+ self.model = torch.load(
15
+ self._download_model(model_name),
16
+ map_location=self.device
17
+ )
18
+
19
+ self.model.eval()
20
+
21
+ def _download_model(self, repo):
22
+ """Download model file from HuggingFace repo."""
23
+ from huggingface_hub import hf_hub_download
24
+ return hf_hub_download(repo_id=repo, filename="model.safetensors")
25
+
26
+ def tts(self, text, sample_rate=22050):
27
+ tokens = self.tokenizer(text, return_tensors="pt")["input_ids"].to(self.device)
28
+
29
+ with torch.no_grad():
30
+ audio = self.model.generate(tokens)[0].cpu().numpy()
31
+
32
+ return audio, sample_rate
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ torchaudio
3
+ gradio
4
+ numpy
5
+ soundfile
6
+ einops
7
+ transformers
8
+ accelerate
9
+ sentencepiece