Spaces:
Sleeping
Sleeping
| import os | |
| import subprocess | |
| import tempfile | |
| from functools import lru_cache | |
| import gradio as gr | |
| import numpy as np | |
| from transformers import pipeline | |
| # Optional torch import for tensor -> numpy safety (won't error if missing) | |
| try: | |
| import torch | |
| except Exception: # pragma: no cover | |
| torch = None | |
| # ------------------------------------------------------------ | |
| # Model list (label → Hugging Face model id) | |
| # ------------------------------------------------------------ | |
| MODEL_CHOICES = [ | |
| ("Facebook's Original Turkish (facebook/mms-tts-tur)", "facebook/mms-tts-tur"), | |
| ("Custom checkpoint for fine tuning (armish/mms-tts-tur-train)", "armish/mms-tts-tur-train"), | |
| ("Gaziantepagzi.com - Asim Mihcioglu - 50 epoch (armish/mms-tts-antep-agzi1)", "armish/mms-tts-antep-agzi1"), | |
| ("Gaziantepagzi.com - Hatice Barazi - 50 epoch (armish/mms-tts-antep-agzi2)", "armish/mms-tts-antep-agzi2"), | |
| ("Antepagzindan.com - 50 epoch (armish/mms-tts-antep-agzi3)", "armish/mms-tts-antep-agzi3"), | |
| ("Antepagzindan.com - 50 epoch - uroman (armish/mms-tts-antep-agzi3_uroman)", "armish/mms-tts-antep-agzi3_uroman"), | |
| ("Antepagzindan.com - 100 epoch - uroman (armish/mms-tts-antep-agzi3_uroman_100)", "armish/mms-tts-antep-agzi3_uroman_100"), | |
| ] | |
| # Default model → last one in list | |
| DEFAULT_MODEL = "armish/mms-tts-antep-agzi3_uroman_100" | |
| DEFAULT_LABEL = [lbl for (lbl, mid) in MODEL_CHOICES if mid == DEFAULT_MODEL][0] | |
| # ------------------------------------------------------------ | |
| # Helpers | |
| # ------------------------------------------------------------ | |
| def _uromanize(text: str) -> str: | |
| """Romanize non-Latin text using the uroman Perl package.""" | |
| uroman_path = os.environ.get("UROMAN") | |
| if not uroman_path: | |
| raise RuntimeError( | |
| "UROMAN environment variable is not set. " | |
| "Add uroman to the repo and set UROMAN to its path in Space settings." | |
| ) | |
| script = os.path.join(uroman_path, "bin", "uroman.pl") | |
| proc = subprocess.run( | |
| ["perl", script], | |
| input=text.encode("utf-8"), | |
| stdout=subprocess.PIPE, | |
| stderr=subprocess.PIPE, | |
| ) | |
| if proc.returncode != 0: | |
| raise RuntimeError(f"uroman error: {proc.stderr.decode('utf-8')}") | |
| return proc.stdout.decode("utf-8").strip() | |
| def get_tts(model_name: str, device: int = -1): | |
| """Cache and return a TTS pipeline.""" | |
| return pipeline("text-to-speech", model=model_name, device=device) | |
| def _to_numpy_1d(audio): | |
| """Convert HF output audio to a clean 1D float32 numpy array in [-1, 1].""" | |
| # Convert torch -> numpy if needed | |
| if torch is not None and isinstance(audio, torch.Tensor): | |
| audio = audio.detach().cpu().numpy() | |
| audio = np.asarray(audio) | |
| # If batched or shaped (1, T) / (T, 1), squeeze to (T,) | |
| if audio.ndim > 1: | |
| audio = np.squeeze(audio) | |
| # Ensure 1D | |
| if audio.ndim != 1: | |
| # Fall back: flatten to mono | |
| audio = audio.reshape(-1) | |
| # dtype & clean | |
| audio = audio.astype(np.float32, copy=False) | |
| # guard against NaNs/Infs | |
| audio = np.nan_to_num(audio, nan=0.0, posinf=1.0, neginf=-1.0) | |
| # clip to valid range | |
| audio = np.clip(audio, -1.0, 1.0) | |
| # If it's all zeros (rare), return as-is | |
| return audio | |
| def _write_wav_to_temp(sr: int, audio_f32: np.ndarray) -> str: | |
| """Write int16 WAV to a NamedTemporaryFile and return its filepath.""" | |
| # Scale to int16 | |
| audio_i16 = (audio_f32 * 32767.0).astype(np.int16) | |
| # Write to a temp file using scipy to get a correct WAV header | |
| import scipy.io.wavfile as wavfile # local import to keep import time light | |
| sr = int(sr) | |
| if sr <= 0: | |
| raise ValueError(f"Invalid sampling rate: {sr}") | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
| wavfile.write(f.name, sr, audio_i16) | |
| return f.name | |
| def synthesize(text, model_label): | |
| """Main synthesis function.""" | |
| try: | |
| # Map label → model id | |
| model_map = dict(MODEL_CHOICES) | |
| model_name = model_map.get(model_label, DEFAULT_MODEL) | |
| # Always use CPU | |
| device = -1 | |
| tts = get_tts(model_name, device=device) | |
| out = tts(text) | |
| # HF pipelines differ slightly; standard MMS returns dict with keys 'audio' and 'sampling_rate' | |
| sr = int(out.get("sampling_rate") if isinstance(out, dict) else getattr(out, "sampling_rate", 16000)) | |
| audio = out.get("audio") if isinstance(out, dict) else getattr(out, "audio", None) | |
| # Some pipelines return [audio] for batch size 1 | |
| if isinstance(audio, (list, tuple)): | |
| audio = audio[0] | |
| audio = _to_numpy_1d(audio) | |
| wav_path = _write_wav_to_temp(sr, audio) | |
| # Return FILEPATH (robust) instead of (sr, numpy) to avoid pydub header issues | |
| return wav_path, gr.update(value=f"✅ Synthesized with {model_name}") | |
| except Exception as e: | |
| return None, gr.update(value=f"❌ {type(e).__name__}: {e}") | |
| # ------------------------------------------------------------ | |
| # Gradio UI | |
| # ------------------------------------------------------------ | |
| with gr.Blocks(title="Gaziantep Ağzı ile Seslendirme Servisi") as demo: | |
| gr.Markdown( | |
| """ | |
| # Gaziantep Ağzı ile Seslendirme Servisi | |
| Seslendirmek istediğiniz cümleyi yazın ve seslendir butonuna basın. | |
| """ | |
| ) | |
| text_in = gr.Textbox( | |
| label="Metin", | |
| value="Ben Antepli bir yapay zekayım.", | |
| lines=3, | |
| placeholder="Sentezlenecek metni girin…", | |
| ) | |
| model_in = gr.Dropdown( | |
| label="Model", | |
| choices=[lbl for (lbl, _id) in MODEL_CHOICES], | |
| value=DEFAULT_LABEL, | |
| interactive=True, | |
| ) | |
| synth_btn = gr.Button("🎙️ Seslendir", variant="primary") | |
| # IMPORTANT: accept a FILEPATH from the function | |
| audio_out = gr.Audio(label="Ses dosyası", type="filepath", autoplay=True) | |
| status = gr.Textbox(label="Durum", value="", interactive=False) | |
| synth_btn.click( | |
| fn=synthesize, | |
| inputs=[text_in, model_in], | |
| outputs=[audio_out, status], | |
| ) | |
| # -------------------------------------------------------- | |
| # Examples (per your request) | |
| # -------------------------------------------------------- | |
| gr.Examples( | |
| examples=[ | |
| ["Ben Antepli bir yapay zekayım.", DEFAULT_LABEL], | |
| ["Kepeği kesilesice.", DEFAULT_LABEL], | |
| ["Gaziantep.", DEFAULT_LABEL], | |
| ["Ben sana ne hanek anlatıyorum?", DEFAULT_LABEL], | |
| ], | |
| inputs=[text_in, model_in], | |
| label="Örnekler", | |
| ) | |
| if __name__ == "__main__": | |
| # If your Space shows SSR warning, you can disable SSR explicitly if desired: | |
| # demo.launch(ssr_mode=False) | |
| demo.launch() | |