drixo commited on
Commit
7b5105b
·
verified ·
1 Parent(s): 8aeefb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -13
app.py CHANGED
@@ -2,8 +2,16 @@ import gradio as gr
2
  from transformers import MarianMTModel, MarianTokenizer, pipeline
3
  import torch
4
  import numpy as np
5
- from huggingface_hub import hf_hub_download
6
- from Index-TTS.infer import IndexTTS # import from local clone
 
 
 
 
 
 
 
 
7
 
8
  # --------------------------
9
  # Translation models
@@ -17,24 +25,20 @@ tokenizer = MarianTokenizer.from_pretrained(current_model_name)
17
  model = MarianMTModel.from_pretrained(current_model_name)
18
 
19
  # --------------------------
20
- # ASR
21
  # --------------------------
22
  asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
23
 
24
- # --------------------------
25
- # IndexTTS setup
26
- # --------------------------
27
- ckpt_path = hf_hub_download("IndexTeam/Index-TTS", "checkpoints/index_tts_small.ckpt")
28
- cfg_path = hf_hub_download("IndexTeam/Index-TTS", "configs/config.yaml")
29
- tts = IndexTTS(model_dir=ckpt_path, cfg_path=cfg_path)
30
-
31
  # --------------------------
32
  # Helpers
33
  # --------------------------
34
  def text_to_speech(text: str, ref_audio_path):
35
- waveform = tts.generate(text, ref_audio=ref_audio_path)
36
- audio_np = waveform.cpu().numpy() if torch.is_tensor(waveform) else np.array(waveform, dtype=np.float32)
37
- return 16000, audio_np
 
 
 
38
 
39
  def translate_with_voice(audio, lang_pair, ref_voice):
40
  text_input = asr(audio)["text"]
 
2
  from transformers import MarianMTModel, MarianTokenizer, pipeline
3
  import torch
4
  import numpy as np
5
+ from huggingface_hub import snapshot_download
6
+ from indextts.infer import IndexTTS
7
+
8
+ # --------------------------
9
+ # Download Index-TTS from Hugging Face
10
+ # --------------------------
11
+ snapshot_download("IndexTeam/Index-TTS", local_dir="checkpoints")
12
+
13
+ # Initialize TTS
14
+ tts = IndexTTS(model_dir="checkpoints", cfg_path="checkpoints/config.yaml")
15
 
16
  # --------------------------
17
  # Translation models
 
25
  model = MarianMTModel.from_pretrained(current_model_name)
26
 
27
  # --------------------------
28
+ # Speech-to-text
29
  # --------------------------
30
  asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
31
 
 
 
 
 
 
 
 
32
  # --------------------------
33
  # Helpers
34
  # --------------------------
35
  def text_to_speech(text: str, ref_audio_path):
36
+ output_path = "output.wav"
37
+ tts.infer(ref_audio_path, text, output_path)
38
+ # Load waveform for Gradio
39
+ import soundfile as sf
40
+ data, samplerate = sf.read(output_path)
41
+ return samplerate, data
42
 
43
  def translate_with_voice(audio, lang_pair, ref_voice):
44
  text_input = asr(audio)["text"]