import tempfile from pydub import AudioSegment import edge_tts import gradio as gr language_dict = { "Persian": { "Dilara (Female)": "fa-IR-DilaraNeural", "Farid (Male)": "fa-IR-FaridNeural" } } async def tts_dialogue_persian(dialogue_text): lines = dialogue_text.strip().split("\n") audio_segments = [] for line in lines: if ':' not in line: continue speaker, text = line.split(":", 1) text = text.strip() voice = language_dict["Persian"]["Dilara (Female)"] if "زن" in speaker else language_dict["Persian"]["Farid (Male)"] communicate = edge_tts.Communicate(text, voice) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: tmp_path = tmp_file.name await communicate.save(tmp_path) segment = AudioSegment.from_file(tmp_path) audio_segments.append(segment) # Combine all segments final_audio = sum(audio_segments) with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: final_path = tmp_file.name final_audio.export(final_path, format="mp3") return final_path Gradio interface with gr.Blocks(title="Persian TTS Dialogue") as demo: gr.HTML("

Persian TTS Dialogue (Edge TTS)

") gr.Markdown("Use 'زن:' and 'مرد:' as prefixes for lines to select voice.") with gr.Row(): with gr.Column(): input_text = gr.Textbox(lines=10, label="Input Dialogue", placeholder="مرد: سلام\nزن: سلام، خوبی؟") run_btn = gr.Button(value="Generate Audio", variant="primary") with gr.Column(): output_audio = gr.Audio(type="filepath", label="Generated Dialogue") run_btn.click(tts_dialogue_persian, inputs=[input_text], outputs=[output_audio]) if name == "main": demo.queue().launch(share=True)