Spaces:

armish
/

antep-agzi

Sleeping

Arman Aksoy

Simplify UI and translate to Turkish

9c29482 6 months ago

6.78 kB

	import os
	import subprocess
	import tempfile
	from functools import lru_cache

	import gradio as gr
	import numpy as np
	from transformers import pipeline

	# Optional torch import for tensor -> numpy safety (won't error if missing)
	try:
	import torch
	except Exception: # pragma: no cover
	torch = None

	# ------------------------------------------------------------
	# Model list (label → Hugging Face model id)
	# ------------------------------------------------------------
	MODEL_CHOICES = [
	("Facebook's Original Turkish (facebook/mms-tts-tur)", "facebook/mms-tts-tur"),
	("Custom checkpoint for fine tuning (armish/mms-tts-tur-train)", "armish/mms-tts-tur-train"),
	("Gaziantepagzi.com - Asim Mihcioglu - 50 epoch (armish/mms-tts-antep-agzi1)", "armish/mms-tts-antep-agzi1"),
	("Gaziantepagzi.com - Hatice Barazi - 50 epoch (armish/mms-tts-antep-agzi2)", "armish/mms-tts-antep-agzi2"),
	("Antepagzindan.com - 50 epoch (armish/mms-tts-antep-agzi3)", "armish/mms-tts-antep-agzi3"),
	("Antepagzindan.com - 50 epoch - uroman (armish/mms-tts-antep-agzi3_uroman)", "armish/mms-tts-antep-agzi3_uroman"),
	("Antepagzindan.com - 100 epoch - uroman (armish/mms-tts-antep-agzi3_uroman_100)", "armish/mms-tts-antep-agzi3_uroman_100"),
	]

	# Default model → last one in list
	DEFAULT_MODEL = "armish/mms-tts-antep-agzi3_uroman_100"
	DEFAULT_LABEL = [lbl for (lbl, mid) in MODEL_CHOICES if mid == DEFAULT_MODEL][0]


	# ------------------------------------------------------------
	# Helpers
	# ------------------------------------------------------------
	def _uromanize(text: str) -> str:
	"""Romanize non-Latin text using the uroman Perl package."""
	uroman_path = os.environ.get("UROMAN")
	if not uroman_path:
	raise RuntimeError(
	"UROMAN environment variable is not set. "
	"Add uroman to the repo and set UROMAN to its path in Space settings."
	)
	script = os.path.join(uroman_path, "bin", "uroman.pl")
	proc = subprocess.run(
	["perl", script],
	input=text.encode("utf-8"),
	stdout=subprocess.PIPE,
	stderr=subprocess.PIPE,
	)
	if proc.returncode != 0:
	raise RuntimeError(f"uroman error: {proc.stderr.decode('utf-8')}")
	return proc.stdout.decode("utf-8").strip()


	@lru_cache(maxsize=4)
	def get_tts(model_name: str, device: int = -1):
	"""Cache and return a TTS pipeline."""
	return pipeline("text-to-speech", model=model_name, device=device)


	def _to_numpy_1d(audio):
	"""Convert HF output audio to a clean 1D float32 numpy array in [-1, 1]."""
	# Convert torch -> numpy if needed
	if torch is not None and isinstance(audio, torch.Tensor):
	audio = audio.detach().cpu().numpy()

	audio = np.asarray(audio)

	# If batched or shaped (1, T) / (T, 1), squeeze to (T,)
	if audio.ndim > 1:
	audio = np.squeeze(audio)

	# Ensure 1D
	if audio.ndim != 1:
	# Fall back: flatten to mono
	audio = audio.reshape(-1)

	# dtype & clean
	audio = audio.astype(np.float32, copy=False)
	# guard against NaNs/Infs
	audio = np.nan_to_num(audio, nan=0.0, posinf=1.0, neginf=-1.0)
	# clip to valid range
	audio = np.clip(audio, -1.0, 1.0)

	# If it's all zeros (rare), return as-is
	return audio


	def _write_wav_to_temp(sr: int, audio_f32: np.ndarray) -> str:
	"""Write int16 WAV to a NamedTemporaryFile and return its filepath."""
	# Scale to int16
	audio_i16 = (audio_f32 * 32767.0).astype(np.int16)

	# Write to a temp file using scipy to get a correct WAV header
	import scipy.io.wavfile as wavfile # local import to keep import time light

	sr = int(sr)
	if sr <= 0:
	raise ValueError(f"Invalid sampling rate: {sr}")

	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
	wavfile.write(f.name, sr, audio_i16)
	return f.name


	def synthesize(text, model_label):
	"""Main synthesis function."""
	try:
	# Map label → model id
	model_map = dict(MODEL_CHOICES)
	model_name = model_map.get(model_label, DEFAULT_MODEL)

	# Always use CPU
	device = -1
	tts = get_tts(model_name, device=device)
	out = tts(text)

	# HF pipelines differ slightly; standard MMS returns dict with keys 'audio' and 'sampling_rate'
	sr = int(out.get("sampling_rate") if isinstance(out, dict) else getattr(out, "sampling_rate", 16000))
	audio = out.get("audio") if isinstance(out, dict) else getattr(out, "audio", None)

	# Some pipelines return [audio] for batch size 1
	if isinstance(audio, (list, tuple)):
	audio = audio[0]

	audio = _to_numpy_1d(audio)
	wav_path = _write_wav_to_temp(sr, audio)

	# Return FILEPATH (robust) instead of (sr, numpy) to avoid pydub header issues
	return wav_path, gr.update(value=f"✅ Synthesized with {model_name}")
	except Exception as e:
	return None, gr.update(value=f"❌ {type(e).__name__}: {e}")




	# ------------------------------------------------------------
	# Gradio UI
	# ------------------------------------------------------------
	with gr.Blocks(title="Gaziantep Ağzı ile Seslendirme Servisi") as demo:
	gr.Markdown(
	"""
	# Gaziantep Ağzı ile Seslendirme Servisi
	Seslendirmek istediğiniz cümleyi yazın ve seslendir butonuna basın.
	"""
	)

	text_in = gr.Textbox(
	label="Metin",
	value="Ben Antepli bir yapay zekayım.",
	lines=3,
	placeholder="Sentezlenecek metni girin…",
	)

	model_in = gr.Dropdown(
	label="Model",
	choices=[lbl for (lbl, _id) in MODEL_CHOICES],
	value=DEFAULT_LABEL,
	interactive=True,
	)

	synth_btn = gr.Button("🎙️ Seslendir", variant="primary")

	# IMPORTANT: accept a FILEPATH from the function
	audio_out = gr.Audio(label="Ses dosyası", type="filepath", autoplay=True)
	status = gr.Textbox(label="Durum", value="", interactive=False)

	synth_btn.click(
	fn=synthesize,
	inputs=[text_in, model_in],
	outputs=[audio_out, status],
	)

	# --------------------------------------------------------
	# Examples (per your request)
	# --------------------------------------------------------
	gr.Examples(
	examples=[
	["Ben Antepli bir yapay zekayım.", DEFAULT_LABEL],
	["Kepeği kesilesice.", DEFAULT_LABEL],
	["Gaziantep.", DEFAULT_LABEL],
	["Ben sana ne hanek anlatıyorum?", DEFAULT_LABEL],
	],
	inputs=[text_in, model_in],
	label="Örnekler",
	)

	if __name__ == "__main__":
	# If your Space shows SSR warning, you can disable SSR explicitly if desired:
	# demo.launch(ssr_mode=False)
	demo.launch()