|
|
import gradio as gr |
|
|
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor |
|
|
import torch |
|
|
import tempfile |
|
|
|
|
|
|
|
|
MODEL_ID = "harveenchadha/vits-fa" |
|
|
|
|
|
processor = AutoProcessor.from_pretrained(MODEL_ID) |
|
|
model = AutoModelForSpeechSeq2Seq.from_pretrained(MODEL_ID) |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
model = model.to(device) |
|
|
|
|
|
def tts_batch(texts): |
|
|
outputs = [] |
|
|
for idx, text in enumerate(texts): |
|
|
if not text.strip(): |
|
|
outputs.append(None) |
|
|
continue |
|
|
inputs = processor(text=text, return_tensors="pt").to(device) |
|
|
with torch.no_grad(): |
|
|
speech = model.generate(**inputs) |
|
|
|
|
|
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") |
|
|
processor.save_wav(speech, temp_wav.name) |
|
|
outputs.append(temp_wav.name) |
|
|
return outputs |
|
|
|
|
|
|
|
|
with gr.Blocks(title="Persian Multi-Text TTS") as demo: |
|
|
gr.Markdown("## 🎙️ مبدل همزمان متن به گفتار فارسی (۵ ورودی)") |
|
|
gr.Markdown("متنهای زیر را پر کن تا پنج فایل صوتی جداگانه بسازد:") |
|
|
|
|
|
with gr.Row(): |
|
|
text1 = gr.Textbox(label="متن ۱", lines=2) |
|
|
text2 = gr.Textbox(label="متن ۲", lines=2) |
|
|
text3 = gr.Textbox(label="متن ۳", lines=2) |
|
|
text4 = gr.Textbox(label="متن ۴", lines=2) |
|
|
text5 = gr.Textbox(label="متن ۵", lines=2) |
|
|
|
|
|
btn = gr.Button("🔊 تبدیل به صدا") |
|
|
|
|
|
with gr.Row(): |
|
|
out1 = gr.Audio(label="خروجی ۱", type="filepath") |
|
|
out2 = gr.Audio(label="خروجی ۲", type="filepath") |
|
|
out3 = gr.Audio(label="خروجی ۳", type="filepath") |
|
|
out4 = gr.Audio(label="خروجی ۴", type="filepath") |
|
|
out5 = gr.Audio(label="خروجی ۵", type="filepath") |
|
|
|
|
|
btn.click( |
|
|
fn=tts_batch, |
|
|
inputs=[text1, text2, text3, text4, text5], |
|
|
outputs=[out1, out2, out3, out4, out5] |
|
|
) |
|
|
|
|
|
demo.launch() |