| import gradio as gr | |
| from config import custom_css | |
| from synthesis import generate_speech | |
| from GE2PE import GE2PE | |
| MODEL_PATHS = { | |
| "Homo-GE2PE": "./homo-ge2pe", | |
| "Homo-T5": "./homo-t5", | |
| } | |
| _g2p_cache = {} | |
| def _get_g2p(model_name: str) -> GE2PE: | |
| if model_name not in _g2p_cache: | |
| path = MODEL_PATHS.get(model_name) | |
| if path is None: | |
| raise ValueError(f"Unknown model: {model_name}") | |
| _g2p_cache[model_name] = GE2PE(model_path=path, GPU=False) | |
| return _g2p_cache[model_name] | |
| def ge2pe_infer(model_name: str, text: str, use_rules: bool, use_dict: bool): | |
| if not text or not text.strip(): | |
| return "" | |
| try: | |
| model = _get_g2p(model_name) | |
| result = model.generate([text], use_rules=use_rules, use_dict=use_dict) | |
| return result[0] if result else "" | |
| except Exception as e: | |
| return f"⚠️ Error: {str(e)}" | |
| def create_interface(): | |
| with gr.Blocks(title="Persian Speech Suite", css=custom_css) as demo: | |
| gr.Markdown( | |
| "# Persian Speech Suite: GE2PE & TTS\n" | |
| "A unified playground for Persian grapheme‑to‑phoneme conversion (GE2PE) **and** text‑to‑speech synthesis (Mana TTS).\n\n" | |
| "✨ **Now supports long texts!** The TTS system automatically splits long texts into natural segments. And also converts numbers to Persian text for better pronunciation." | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("Grapheme → Phoneme (GE2PE)"): | |
| gr.Markdown( | |
| "Convert Persian text to its phonemic transcription. " | |
| "Choose between **Homo‑GE2PE** and **Homo‑T5**, optionally applying short‑vowel rules and/or a custom dictionary." | |
| ) | |
| with gr.Row(): | |
| model_selector = gr.Radio( | |
| choices=list(MODEL_PATHS.keys()), | |
| value="Homo-GE2PE", | |
| label="G2P Model", | |
| ) | |
| g2p_input = gr.Textbox( | |
| label="Persian Text", | |
| placeholder="مثال: این کتابِ علی است", | |
| lines=4, | |
| ) | |
| with gr.Row(): | |
| g2p_use_rules = gr.Checkbox(value=True, label="Apply short‑vowel rules (optional)") | |
| g2p_use_dict = gr.Checkbox(value=False, label="Use custom dictionary (optional)") | |
| g2p_button = gr.Button("Convert", variant="primary") | |
| g2p_output = gr.Textbox(label="Phoneme Output", interactive=False) | |
| g2p_button.click( | |
| fn=ge2pe_infer, | |
| inputs=[model_selector, g2p_input, g2p_use_rules, g2p_use_dict], | |
| outputs=[g2p_output], | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["او مرد خوبی است."], | |
| ["او مرد."], | |
| ["این کتابِ علی است."], | |
| ["به خانه آمد."] | |
| ], | |
| inputs=[g2p_input], | |
| ) | |
| with gr.TabItem("Text‑to‑Speech"): | |
| gr.Markdown( | |
| "Generate natural‑sounding Persian speech from your text using Tacotron2 + HiFiGAN.\n\n" | |
| "✨ **New features:**\n" | |
| "- **Long text support:** Automatically splits text into natural segments with optional pauses\n" | |
| "- **Smart number conversion:** Numbers (۱۴۰۲, 2025, ۵۰۰۰) are automatically converted to text\n" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| tts_input = gr.Textbox( | |
| label="Persian Text", | |
| placeholder="متن فارسی خود را اینجا بنویسید...", | |
| lines=8, | |
| ) | |
| with gr.Row(): | |
| tts_add_pauses = gr.Checkbox( | |
| value=True, | |
| label="Add pauses between segments", | |
| info="Adds 300ms pause between text segments for natural flow" | |
| ) | |
| tts_button = gr.Button("Generate Speech", variant="primary", size="lg") | |
| tts_output = gr.Audio(label="Generated Speech", type="filepath") | |
| tts_button.click( | |
| fn=generate_speech, | |
| inputs=[tts_input, gr.State(None), tts_add_pauses], | |
| outputs=[tts_output], | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["سلام، چطور هستید؟"], | |
| ["ایران سرزمین زیباییها و افتخارات است."], | |
| ["فناوری هوش مصنوعی به سرعت در حال پیشرفت است."], | |
| ["مدل تولید گفتار با دادگان نسل مانا"], | |
| ["در سال 1402 تعداد 5000 دانشجو در دانشگاه ثبتنام کردند."], | |
| ["شماره تماس من 912 345 6789 است."], | |
| [ | |
| "هوش مصنوعی یکی از شگفتانگیزترین دستاوردهای بشر در قرن بیست و یکم است. " | |
| "این فناوری توانایی یادگیری، استدلال و حل مسئله را به ماشینها میدهد. " | |
| "از پردازش زبان طبیعی گرفته تا بینایی کامپیوتری، هوش مصنوعی در حال تغییر دنیای ماست." | |
| ], | |
| ], | |
| inputs=[tts_input], | |
| ) | |
| gr.Markdown( | |
| """ | |
| ### Acknowledgments | |
| - [**Nasl‑e‑Mana**](https://naslemana.com/), the monthly magazine of the blind community of Iran | |
| - [ManaTTS Dataset](https://huggingface.co/datasets/MahtaFetrat/Mana-TTS) | |
| - [Persian‑MultiSpeaker‑Tacotron2](https://github.com/MahtaFetrat/Persian-MultiSpeaker-Tacotron2/) | |
| - [Homo-GE2PE (Github)](https://github.com/MahtaFetrat/Homo-GE2PE-Persian/) | |
| - [Base GE2PE Paper](https://aclanthology.org/2024.findings-emnlp.196/) | |
| - [Base GE2PE Model](https://github.com/Sharif-SLPL/GE2PE) | |
| - [HomoRich Dataset (Huggingface)](https://huggingface.co/datasets/MahtaFetrat/HomoRich-G2P-Persian) | |
| - [HomoRich Dataset (Github)](https://github.com/MahtaFetrat/HomoRich-G2P-Persian) | |
| - [SentenceBench Persian G2P Benchmark](https://huggingface.co/datasets/MahtaFetrat/SentenceBench) | |
| ### Citation | |
| ```bibtex | |
| @misc{qharabagh2025fastfancyrethinkingg2p, | |
| title={Fast, Not Fancy: Rethinking G2P with Rich Data and Rule-Based Models}, | |
| author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee}, | |
| year={2025}, | |
| eprint={2505.12973}, | |
| archivePrefix={arXiv}, | |
| primaryClass={cs.CL}, | |
| } | |
| @article{fetrat2024manatts, | |
| title={ManaTTS Persian: A Recipe for Creating TTS Datasets for Lower-Resource Languages}, | |
| author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee}, | |
| journal={arXiv preprint arXiv:2409.07259}, | |
| year={2024}, | |
| } | |
| ``` | |
| """ | |
| ) | |
| return demo | |