File size: 7,756 Bytes
10e72d3
 
 
eb57aa1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10e72d3
 
eb57aa1
da2ee9a
 
 
766414c
da2ee9a
eb57aa1
 
 
da2ee9a
 
 
 
eb57aa1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da2ee9a
 
766414c
 
 
10e72d3
eb57aa1
da2ee9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb57aa1
 
 
da2ee9a
eb57aa1
 
 
 
 
 
 
 
 
766414c
 
da2ee9a
 
 
 
 
eb57aa1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10e72d3
 
 
eb57aa1
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
import gradio as gr
from config import custom_css
from synthesis import generate_speech
from GE2PE import GE2PE

MODEL_PATHS = {
    "Homo-GE2PE": "./homo-ge2pe",
    "Homo-T5": "./homo-t5",
}

_g2p_cache = {}

def _get_g2p(model_name: str) -> GE2PE:
    if model_name not in _g2p_cache:
        path = MODEL_PATHS.get(model_name)
        if path is None:
            raise ValueError(f"Unknown model: {model_name}")
        _g2p_cache[model_name] = GE2PE(model_path=path, GPU=False)
    return _g2p_cache[model_name]


def ge2pe_infer(model_name: str, text: str, use_rules: bool, use_dict: bool):
    if not text or not text.strip():
        return ""
    try:
        model = _get_g2p(model_name)
        result = model.generate([text], use_rules=use_rules, use_dict=use_dict)
        return result[0] if result else ""
    except Exception as e:
        return f"⚠️ Error: {str(e)}"


def create_interface():
    with gr.Blocks(title="Persian Speech Suite", css=custom_css) as demo:
        gr.Markdown(
            "# Persian Speech Suite: GE2PE & TTS\n"
            "A unified playground for Persian grapheme‑to‑phoneme conversion (GE2PE) **and** text‑to‑speech synthesis (Mana TTS).\n\n"
            "✨ **Now supports long texts!** The TTS system automatically splits long texts into natural segments. And also converts numbers to Persian text for better pronunciation."
        )

        with gr.Tabs():
            with gr.TabItem("Grapheme → Phoneme (GE2PE)"):
                gr.Markdown(
                    "Convert Persian text to its phonemic transcription. "
                    "Choose between **Homo‑GE2PE** and **Homo‑T5**, optionally applying short‑vowel rules and/or a custom dictionary."
                )

                with gr.Row():
                    model_selector = gr.Radio(
                        choices=list(MODEL_PATHS.keys()),
                        value="Homo-GE2PE",
                        label="G2P Model",
                    )

                g2p_input = gr.Textbox(
                    label="Persian Text",
                    placeholder="مثال: این کتابِ علی است",
                    lines=4,
                )

                with gr.Row():
                    g2p_use_rules = gr.Checkbox(value=True, label="Apply short‑vowel rules (optional)")
                    g2p_use_dict = gr.Checkbox(value=False, label="Use custom dictionary (optional)")

                g2p_button = gr.Button("Convert", variant="primary")
                g2p_output = gr.Textbox(label="Phoneme Output", interactive=False)

                g2p_button.click(
                    fn=ge2pe_infer,
                    inputs=[model_selector, g2p_input, g2p_use_rules, g2p_use_dict],
                    outputs=[g2p_output],
                )

                gr.Examples(
                    examples=[
                        ["او مرد خوبی است."],
                        ["او مرد."],
                        ["این کتابِ علی است."],
                        ["به خانه آمد."]
                    ],
                    inputs=[g2p_input],
                )

            with gr.TabItem("Text‑to‑Speech"):
                gr.Markdown(
                    "Generate natural‑sounding Persian speech from your text using Tacotron2 + HiFiGAN.\n\n"
                    "✨ **New features:**\n"
                    "- **Long text support:** Automatically splits text into natural segments with optional pauses\n"
                    "- **Smart number conversion:** Numbers (۱۴۰۲, 2025, ۵۰۰۰) are automatically converted to text\n"
                )

                with gr.Row():
                    with gr.Column(scale=2):
                        tts_input = gr.Textbox(
                            label="Persian Text",
                            placeholder="متن فارسی خود را اینجا بنویسید...",
                            lines=8,
                        )

                        with gr.Row():
                            tts_add_pauses = gr.Checkbox(
                                value=True,
                                label="Add pauses between segments",
                                info="Adds 300ms pause between text segments for natural flow"
                            )

                        tts_button = gr.Button("Generate Speech", variant="primary", size="lg")

                tts_output = gr.Audio(label="Generated Speech", type="filepath")

                tts_button.click(
                    fn=generate_speech,
                    inputs=[tts_input, gr.State(None), tts_add_pauses],
                    outputs=[tts_output],
                )

                gr.Examples(
                    examples=[
                        ["سلام، چطور هستید؟"],
                        ["ایران سرزمین زیبایی‌ها و افتخارات است."],
                        ["فناوری هوش مصنوعی به سرعت در حال پیشرفت است."],
                        ["مدل تولید گفتار با دادگان نسل مانا"],
                        ["در سال 1402 تعداد 5000 دانشجو در دانشگاه ثبت‌نام کردند."],
                        ["شماره تماس من 912 345 6789 است."],
                        [
                            "هوش مصنوعی یکی از شگفت‌انگیزترین دستاوردهای بشر در قرن بیست و یکم است. "
                            "این فناوری توانایی یادگیری، استدلال و حل مسئله را به ماشین‌ها می‌دهد. "
                            "از پردازش زبان طبیعی گرفته تا بینایی کامپیوتری، هوش مصنوعی در حال تغییر دنیای ماست."
                        ],
                    ],
                    inputs=[tts_input],
                )

        gr.Markdown(
            """
            ### Acknowledgments

            - [**Nasl‑e‑Mana**](https://naslemana.com/), the monthly magazine of the blind community of Iran
            - [ManaTTS Dataset](https://huggingface.co/datasets/MahtaFetrat/Mana-TTS)
            - [Persian‑MultiSpeaker‑Tacotron2](https://github.com/MahtaFetrat/Persian-MultiSpeaker-Tacotron2/)
            - [Homo-GE2PE (Github)](https://github.com/MahtaFetrat/Homo-GE2PE-Persian/)
            - [Base GE2PE Paper](https://aclanthology.org/2024.findings-emnlp.196/)
            - [Base GE2PE Model](https://github.com/Sharif-SLPL/GE2PE)
            - [HomoRich Dataset (Huggingface)](https://huggingface.co/datasets/MahtaFetrat/HomoRich-G2P-Persian)
            - [HomoRich Dataset (Github)](https://github.com/MahtaFetrat/HomoRich-G2P-Persian)
            - [SentenceBench Persian G2P Benchmark](https://huggingface.co/datasets/MahtaFetrat/SentenceBench)
            ### Citation

            ```bibtex
            @misc{qharabagh2025fastfancyrethinkingg2p,
              title={Fast, Not Fancy: Rethinking G2P with Rich Data and Rule-Based Models},
              author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee},
              year={2025},
              eprint={2505.12973},
              archivePrefix={arXiv},
              primaryClass={cs.CL},
            }

            @article{fetrat2024manatts,
              title={ManaTTS Persian: A Recipe for Creating TTS Datasets for Lower-Resource Languages},
              author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee},
              journal={arXiv preprint arXiv:2409.07259},
              year={2024},
            }
            ```
            """
        )

    return demo