Spaces:

drixo
/

Translator

Sleeping

drixo commited on Sep 3, 2025

Commit

7b5105b

verified ·

1 Parent(s): 8aeefb3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,8 +2,16 @@ import gradio as gr
 from transformers import MarianMTModel, MarianTokenizer, pipeline
 import torch
 import numpy as np
-from huggingface_hub import hf_hub_download
-from Index-TTS.infer import IndexTTS  # import from local clone
 # --------------------------
 # Translation models
@@ -17,24 +25,20 @@ tokenizer = MarianTokenizer.from_pretrained(current_model_name)
 model = MarianMTModel.from_pretrained(current_model_name)
 # --------------------------
-# ASR
 # --------------------------
 asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
-# --------------------------
-# IndexTTS setup
-# --------------------------
-ckpt_path = hf_hub_download("IndexTeam/Index-TTS", "checkpoints/index_tts_small.ckpt")
-cfg_path = hf_hub_download("IndexTeam/Index-TTS", "configs/config.yaml")
-tts = IndexTTS(model_dir=ckpt_path, cfg_path=cfg_path)
 # --------------------------
 # Helpers
 # --------------------------
 def text_to_speech(text: str, ref_audio_path):
-    waveform = tts.generate(text, ref_audio=ref_audio_path)
-    audio_np = waveform.cpu().numpy() if torch.is_tensor(waveform) else np.array(waveform, dtype=np.float32)
-    return 16000, audio_np
 def translate_with_voice(audio, lang_pair, ref_voice):
     text_input = asr(audio)["text"]

 from transformers import MarianMTModel, MarianTokenizer, pipeline
 import torch
 import numpy as np
+from huggingface_hub import snapshot_download
+from indextts.infer import IndexTTS
+# --------------------------
+# Download Index-TTS from Hugging Face
+# --------------------------
+snapshot_download("IndexTeam/Index-TTS", local_dir="checkpoints")
+# Initialize TTS
+tts = IndexTTS(model_dir="checkpoints", cfg_path="checkpoints/config.yaml")
 # --------------------------
 # Translation models
 model = MarianMTModel.from_pretrained(current_model_name)
 # --------------------------
+# Speech-to-text
 # --------------------------
 asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
 # --------------------------
 # Helpers
 # --------------------------
 def text_to_speech(text: str, ref_audio_path):
+    output_path = "output.wav"
+    tts.infer(ref_audio_path, text, output_path)
+    # Load waveform for Gradio
+    import soundfile as sf
+    data, samplerate = sf.read(output_path)
+    return samplerate, data
 def translate_with_voice(audio, lang_pair, ref_voice):
     text_input = asr(audio)["text"]