Spaces:

abreza
/

mana-tts

Running on Zero

App Files Files Community

mana-tts / interface.py

abreza

add some example

766414c about 1 month ago

raw

history blame contribute delete

7.76 kB

	import gradio as gr
	from config import custom_css
	from synthesis import generate_speech
	from GE2PE import GE2PE

	MODEL_PATHS = {
	"Homo-GE2PE": "./homo-ge2pe",
	"Homo-T5": "./homo-t5",
	}

	_g2p_cache = {}

	def _get_g2p(model_name: str) -> GE2PE:
	if model_name not in _g2p_cache:
	path = MODEL_PATHS.get(model_name)
	if path is None:
	raise ValueError(f"Unknown model: {model_name}")
	_g2p_cache[model_name] = GE2PE(model_path=path, GPU=False)
	return _g2p_cache[model_name]


	def ge2pe_infer(model_name: str, text: str, use_rules: bool, use_dict: bool):
	if not text or not text.strip():
	return ""
	try:
	model = _get_g2p(model_name)
	result = model.generate([text], use_rules=use_rules, use_dict=use_dict)
	return result[0] if result else ""
	except Exception as e:
	return f"⚠️ Error: {str(e)}"


	def create_interface():
	with gr.Blocks(title="Persian Speech Suite", css=custom_css) as demo:
	gr.Markdown(
	"# Persian Speech Suite: GE2PE & TTS\n"
	"A unified playground for Persian grapheme‑to‑phoneme conversion (GE2PE) and text‑to‑speech synthesis (Mana TTS).\n\n"
	"✨ Now supports long texts! The TTS system automatically splits long texts into natural segments. And also converts numbers to Persian text for better pronunciation."
	)

	with gr.Tabs():
	with gr.TabItem("Grapheme → Phoneme (GE2PE)"):
	gr.Markdown(
	"Convert Persian text to its phonemic transcription. "
	"Choose between Homo‑GE2PE and Homo‑T5, optionally applying short‑vowel rules and/or a custom dictionary."
	)

	with gr.Row():
	model_selector = gr.Radio(
	choices=list(MODEL_PATHS.keys()),
	value="Homo-GE2PE",
	label="G2P Model",
	)

	g2p_input = gr.Textbox(
	label="Persian Text",
	placeholder="مثال: این کتابِ علی است",
	lines=4,
	)

	with gr.Row():
	g2p_use_rules = gr.Checkbox(value=True, label="Apply short‑vowel rules (optional)")
	g2p_use_dict = gr.Checkbox(value=False, label="Use custom dictionary (optional)")

	g2p_button = gr.Button("Convert", variant="primary")
	g2p_output = gr.Textbox(label="Phoneme Output", interactive=False)

	g2p_button.click(
	fn=ge2pe_infer,
	inputs=[model_selector, g2p_input, g2p_use_rules, g2p_use_dict],
	outputs=[g2p_output],
	)

	gr.Examples(
	examples=[
	["او مرد خوبی است."],
	["او مرد."],
	["این کتابِ علی است."],
	["به خانه آمد."]
	],
	inputs=[g2p_input],
	)

	with gr.TabItem("Text‑to‑Speech"):
	gr.Markdown(
	"Generate natural‑sounding Persian speech from your text using Tacotron2 + HiFiGAN.\n\n"
	"✨ New features:\n"
	"- Long text support: Automatically splits text into natural segments with optional pauses\n"
	"- Smart number conversion: Numbers (۱۴۰۲, 2025, ۵۰۰۰) are automatically converted to text\n"
	)

	with gr.Row():
	with gr.Column(scale=2):
	tts_input = gr.Textbox(
	label="Persian Text",
	placeholder="متن فارسی خود را اینجا بنویسید...",
	lines=8,
	)

	with gr.Row():
	tts_add_pauses = gr.Checkbox(
	value=True,
	label="Add pauses between segments",
	info="Adds 300ms pause between text segments for natural flow"
	)

	tts_button = gr.Button("Generate Speech", variant="primary", size="lg")

	tts_output = gr.Audio(label="Generated Speech", type="filepath")

	tts_button.click(
	fn=generate_speech,
	inputs=[tts_input, gr.State(None), tts_add_pauses],
	outputs=[tts_output],
	)

	gr.Examples(
	examples=[
	["سلام، چطور هستید؟"],
	["ایران سرزمین زیبایی‌ها و افتخارات است."],
	["فناوری هوش مصنوعی به سرعت در حال پیشرفت است."],
	["مدل تولید گفتار با دادگان نسل مانا"],
	["در سال 1402 تعداد 5000 دانشجو در دانشگاه ثبت‌نام کردند."],
	["شماره تماس من 912 345 6789 است."],
	[
	"هوش مصنوعی یکی از شگفت‌انگیزترین دستاوردهای بشر در قرن بیست و یکم است. "
	"این فناوری توانایی یادگیری، استدلال و حل مسئله را به ماشین‌ها می‌دهد. "
	"از پردازش زبان طبیعی گرفته تا بینایی کامپیوتری، هوش مصنوعی در حال تغییر دنیای ماست."
	],
	],
	inputs=[tts_input],
	)

	gr.Markdown(
	"""
	### Acknowledgments

	- [Nasl‑e‑Mana](https://naslemana.com/), the monthly magazine of the blind community of Iran
	- [ManaTTS Dataset](https://huggingface.co/datasets/MahtaFetrat/Mana-TTS)
	- [Persian‑MultiSpeaker‑Tacotron2](https://github.com/MahtaFetrat/Persian-MultiSpeaker-Tacotron2/)
	- [Homo-GE2PE (Github)](https://github.com/MahtaFetrat/Homo-GE2PE-Persian/)
	- [Base GE2PE Paper](https://aclanthology.org/2024.findings-emnlp.196/)
	- [Base GE2PE Model](https://github.com/Sharif-SLPL/GE2PE)
	- [HomoRich Dataset (Huggingface)](https://huggingface.co/datasets/MahtaFetrat/HomoRich-G2P-Persian)
	- [HomoRich Dataset (Github)](https://github.com/MahtaFetrat/HomoRich-G2P-Persian)
	- [SentenceBench Persian G2P Benchmark](https://huggingface.co/datasets/MahtaFetrat/SentenceBench)
	### Citation

	```bibtex
	@misc{qharabagh2025fastfancyrethinkingg2p,
	title={Fast, Not Fancy: Rethinking G2P with Rich Data and Rule-Based Models},
	author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee},
	year={2025},
	eprint={2505.12973},
	archivePrefix={arXiv},
	primaryClass={cs.CL},
	}

	@article{fetrat2024manatts,
	title={ManaTTS Persian: A Recipe for Creating TTS Datasets for Lower-Resource Languages},
	author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee},
	journal={arXiv preprint arXiv:2409.07259},
	year={2024},
	}
	```
	"""
	)

	return demo