Spaces:

drixo
/

Translator

Sleeping

App Files Files Community

drixo commited on Sep 3, 2025

Commit

0f1723d

verified ·

1 Parent(s): 7b5105b

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -75

app.py CHANGED Viewed

@@ -1,81 +1,15 @@
-import gradio as gr
-from transformers import MarianMTModel, MarianTokenizer, pipeline
-import torch
-import numpy as np
 from huggingface_hub import snapshot_download
-from indextts.infer import IndexTTS
-# --------------------------
-# Download Index-TTS from Hugging Face
-# --------------------------
-snapshot_download("IndexTeam/Index-TTS", local_dir="checkpoints")
-# Initialize TTS
-tts = IndexTTS(model_dir="checkpoints", cfg_path="checkpoints/config.yaml")
-# --------------------------
-# Translation models
-# --------------------------
-language_models = {
-    "Spanish → English": "Helsinki-NLP/opus-mt-es-en",
-    "English → Spanish": "Helsinki-NLP/opus-mt-en-es"
-}
-current_model_name = language_models["Spanish → English"]
-tokenizer = MarianTokenizer.from_pretrained(current_model_name)
-model = MarianMTModel.from_pretrained(current_model_name)
-# --------------------------
-# Speech-to-text
-# --------------------------
-asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
-# --------------------------
-# Helpers
-# --------------------------
-def text_to_speech(text: str, ref_audio_path):
-    output_path = "output.wav"
-    tts.infer(ref_audio_path, text, output_path)
-    # Load waveform for Gradio
-    import soundfile as sf
-    data, samplerate = sf.read(output_path)
-    return samplerate, data
-def translate_with_voice(audio, lang_pair, ref_voice):
-    text_input = asr(audio)["text"]
-    global tokenizer, model, current_model_name
-    if language_models[lang_pair] != current_model_name:
-        current_model_name = language_models[lang_pair]
-        tokenizer = MarianTokenizer.from_pretrained(current_model_name)
-        model = MarianMTModel.from_pretrained(current_model_name)
-    inputs = tokenizer(text_input, return_tensors="pt", padding=True)
-    translated = model.generate(**inputs)
-    translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
-    sr, audio_array = text_to_speech(translated_text, ref_audio_path=ref_voice)
-    return translated_text, (sr, audio_array)
-# --------------------------
-# Gradio UI
-# --------------------------
-with gr.Blocks() as demo:
-    gr.Markdown("## 🗣 Voice-Cloned Translator (English ↔ Spanish)")
-    with gr.Row():
-        with gr.Column():
-            audio_input = gr.Audio(sources=["microphone"], type="filepath", label="🎙 Speak")
-            lang_dropdown = gr.Dropdown(list(language_models.keys()), label="🌍 Target Language", value="Spanish → English")
-            ref_voice_input = gr.Audio(sources=["upload"], type="filepath", label="🎧 Reference Voice (5–10s)")
-            btn = gr.Button("Translate & Speak")
-        with gr.Column():
-            text_output = gr.Textbox(label="Translated Text")
-            audio_output = gr.Audio(label="🔊 Translated Audio", type="numpy")
-    btn.click(
-        fn=translate_with_voice,
-        inputs=[audio_input, lang_dropdown, ref_voice_input],
-        outputs=[text_output, audio_output]
-    )
-demo.launch()

+# Download model
 from huggingface_hub import snapshot_download
+snapshot_download(IndexTeam/Index-TTS, local_dir="checkpoints")
+from indextts.infer import IndexTTS
+# Ensure config.yaml is present in the checkpoints directory
+tts = IndexTTS(model_dir="checkpoints", cfg_path="checkpoints/config.yaml")
+voice = "path/to/your/reference_voice.wav"  # Path to the voice reference audio file
+text = "Hello, how are you?"
+output_path = "output_index.wav"
+tts.infer(voice, text, output_path)