sedrukjglfhsdlkf commited on
Commit
7b4a7c6
verified
1 Parent(s): 1e63917

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -20
app.py CHANGED
@@ -1,33 +1,36 @@
1
  import os
2
  import sys
3
  import logging
4
- import json
5
  import tempfile
6
- import shutil
7
  import numpy as np
8
  import torch
9
- import librosa
10
  import soundfile as sf
11
  import gradio as gr
12
  from pathlib import Path
13
- from scipy.io import wavfile
14
 
 
 
 
 
 
 
 
 
 
15
  try:
16
  from TTS.api import TTS
17
  from TTS.config.shared_configs import BaseDatasetConfig
18
  torch.serialization.add_safe_globals([BaseDatasetConfig])
19
  except ImportError:
20
  pass
 
 
21
 
22
- from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
 
23
  from demucs.pretrained import get_model
24
  from demucs.apply import apply_model
25
-
26
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
27
- logger = logging.getLogger(__name__)
28
-
29
- os.environ["COQUI_TOS_AGREED"] = "1"
30
- os.environ["CUDA_MODULE_LOADING"] = "LAZY"
31
 
32
  class ProcessingManager:
33
  def __init__(self):
@@ -54,6 +57,7 @@ class ProcessingManager:
54
  model_name = f"Helsinki-NLP/opus-mt-{src}-{tgt}"
55
  self.models[key] = pipeline("translation", model=model_name, device=self.device)
56
  except Exception:
 
57
  self.models[key] = pipeline(
58
  "translation",
59
  model="facebook/nllb-200-distilled-600M",
@@ -89,6 +93,7 @@ def process_audio_pipeline(
89
  if not audio_path:
90
  raise ValueError("No audio file provided")
91
 
 
92
  progress(0.1, desc="Separating Vocals...")
93
  demucs_model = manager.get_demucs()
94
  wav, sr = librosa.load(audio_path, sr=44100, mono=False)
@@ -101,7 +106,7 @@ def process_audio_pipeline(
101
 
102
  sources = sources.cpu().numpy()
103
  vocals = sources[3]
104
- instrumental = sources[0] + sources[1] + sources[2]
105
 
106
  vocal_path = manager.temp_dir / "vocals.wav"
107
  inst_path = manager.temp_dir / "instrumental.wav"
@@ -109,20 +114,25 @@ def process_audio_pipeline(
109
  sf.write(vocal_path, vocals.T, 44100)
110
  sf.write(inst_path, instrumental.T, 44100)
111
 
 
112
  progress(0.3, desc="Transcribing...")
113
  whisper = manager.get_whisper()
114
  transcription = whisper(str(vocal_path), generate_kwargs={"task": "transcribe", "language": src_lang})
115
  original_text = transcription["text"]
116
 
 
117
  progress(0.5, desc="Translating...")
118
  translator = manager.get_translator(src_lang, tgt_lang)
119
- translated_text = translator(original_text)[0]['translation_text']
 
 
120
 
 
121
  progress(0.7, desc="Synthesizing Vocals...")
122
  tts_model = manager.get_tts()
123
 
 
124
  ref_audio = speaker_ref_path if speaker_ref_path else str(vocal_path)
125
-
126
  output_tts_path = manager.temp_dir / "tts_output.wav"
127
 
128
  tts_model.tts_to_file(
@@ -133,10 +143,12 @@ def process_audio_pipeline(
133
  split_sentences=True
134
  )
135
 
 
136
  progress(0.9, desc="Mixing...")
137
  tts_wav, _ = librosa.load(str(output_tts_path), sr=44100)
138
  inst_wav, _ = librosa.load(str(inst_path), sr=44100)
139
 
 
140
  min_len = min(len(tts_wav), len(inst_wav))
141
  mixed = tts_wav[:min_len] * 1.0 + inst_wav[:min_len] * 0.8
142
 
@@ -153,16 +165,17 @@ def process_audio_pipeline(
153
  )
154
 
155
  except Exception as e:
156
- logger.error(f"Pipeline failed: {str(e)}")
157
  return None, None, None, None, f"Error: {str(e)}", ""
158
 
 
159
  custom_css = """
160
  .container { max_width: 900px; margin: auto; }
161
  .gr-box { border-radius: 10px !important; border: 1px solid #e0e0e0; box-shadow: 0 4px 6px rgba(0,0,0,0.05); }
162
- .output-audio { margin-top: 10px; }
163
  """
164
 
165
- with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Song Translator") as demo:
 
166
  gr.Markdown("# 馃幍 AI Song Translator Pro")
167
 
168
  with gr.Row():
@@ -187,8 +200,9 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Song Translator
187
 
188
  with gr.Tabs():
189
  with gr.Tab("Lyrics"):
190
- orig_txt = gr.Textbox(label="Original Lyrics", lines=4, show_copy_button=True)
191
- trans_txt = gr.Textbox(label="Translated Lyrics", lines=4, show_copy_button=True)
 
192
 
193
  with gr.Tab("Stems"):
194
  voc_out = gr.Audio(label="Extracted Vocals")
@@ -202,4 +216,10 @@ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="AI Song Translator
202
  )
203
 
204
  if __name__ == "__main__":
205
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
1
  import os
2
  import sys
3
  import logging
 
4
  import tempfile
 
5
  import numpy as np
6
  import torch
 
7
  import soundfile as sf
8
  import gradio as gr
9
  from pathlib import Path
 
10
 
11
+ # Configuraci贸n de logs
12
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Configuraci贸n de entorno
16
+ os.environ["COQUI_TOS_AGREED"] = "1"
17
+ os.environ["CUDA_MODULE_LOADING"] = "LAZY"
18
+
19
+ # Intentar importar TTS con parche de seguridad para PyTorch 2.6+
20
  try:
21
  from TTS.api import TTS
22
  from TTS.config.shared_configs import BaseDatasetConfig
23
  torch.serialization.add_safe_globals([BaseDatasetConfig])
24
  except ImportError:
25
  pass
26
+ except Exception as e:
27
+ logger.warning(f"No se pudo aplicar el parche de seguridad de TTS: {e}")
28
 
29
+ # Importaciones de modelos (Lazy loading)
30
+ from transformers import pipeline
31
  from demucs.pretrained import get_model
32
  from demucs.apply import apply_model
33
+ import librosa
 
 
 
 
 
34
 
35
  class ProcessingManager:
36
  def __init__(self):
 
57
  model_name = f"Helsinki-NLP/opus-mt-{src}-{tgt}"
58
  self.models[key] = pipeline("translation", model=model_name, device=self.device)
59
  except Exception:
60
+ # Fallback a NLLB si el par de idiomas no existe en Helsinki-NLP
61
  self.models[key] = pipeline(
62
  "translation",
63
  model="facebook/nllb-200-distilled-600M",
 
93
  if not audio_path:
94
  raise ValueError("No audio file provided")
95
 
96
+ # 1. Separaci贸n (Demucs)
97
  progress(0.1, desc="Separating Vocals...")
98
  demucs_model = manager.get_demucs()
99
  wav, sr = librosa.load(audio_path, sr=44100, mono=False)
 
106
 
107
  sources = sources.cpu().numpy()
108
  vocals = sources[3]
109
+ instrumental = sources[0] + sources[1] + sources[2] # Bass + Drums + Other
110
 
111
  vocal_path = manager.temp_dir / "vocals.wav"
112
  inst_path = manager.temp_dir / "instrumental.wav"
 
114
  sf.write(vocal_path, vocals.T, 44100)
115
  sf.write(inst_path, instrumental.T, 44100)
116
 
117
+ # 2. Transcripci贸n (Whisper)
118
  progress(0.3, desc="Transcribing...")
119
  whisper = manager.get_whisper()
120
  transcription = whisper(str(vocal_path), generate_kwargs={"task": "transcribe", "language": src_lang})
121
  original_text = transcription["text"]
122
 
123
+ # 3. Traducci贸n
124
  progress(0.5, desc="Translating...")
125
  translator = manager.get_translator(src_lang, tgt_lang)
126
+ # Manejo simple de la salida del pipeline de traducci贸n
127
+ trans_output = translator(original_text)
128
+ translated_text = trans_output[0]['translation_text'] if isinstance(trans_output, list) else trans_output['translation_text']
129
 
130
+ # 4. S铆ntesis de Voz (TTS)
131
  progress(0.7, desc="Synthesizing Vocals...")
132
  tts_model = manager.get_tts()
133
 
134
+ # Usar la referencia subida o la vocal extra铆da
135
  ref_audio = speaker_ref_path if speaker_ref_path else str(vocal_path)
 
136
  output_tts_path = manager.temp_dir / "tts_output.wav"
137
 
138
  tts_model.tts_to_file(
 
143
  split_sentences=True
144
  )
145
 
146
+ # 5. Mezcla Final
147
  progress(0.9, desc="Mixing...")
148
  tts_wav, _ = librosa.load(str(output_tts_path), sr=44100)
149
  inst_wav, _ = librosa.load(str(inst_path), sr=44100)
150
 
151
+ # Ajustar longitudes
152
  min_len = min(len(tts_wav), len(inst_wav))
153
  mixed = tts_wav[:min_len] * 1.0 + inst_wav[:min_len] * 0.8
154
 
 
165
  )
166
 
167
  except Exception as e:
168
+ logger.error(f"Pipeline failed: {str(e)}", exc_info=True)
169
  return None, None, None, None, f"Error: {str(e)}", ""
170
 
171
+ # CSS personalizado
172
  custom_css = """
173
  .container { max_width: 900px; margin: auto; }
174
  .gr-box { border-radius: 10px !important; border: 1px solid #e0e0e0; box-shadow: 0 4px 6px rgba(0,0,0,0.05); }
 
175
  """
176
 
177
+ # Interfaz Gr谩fica
178
+ with gr.Blocks(title="AI Song Translator") as demo:
179
  gr.Markdown("# 馃幍 AI Song Translator Pro")
180
 
181
  with gr.Row():
 
200
 
201
  with gr.Tabs():
202
  with gr.Tab("Lyrics"):
203
+ # show_copy_button removido por incompatibilidad con Gradio 6.x
204
+ orig_txt = gr.Textbox(label="Original Lyrics", lines=4, interactive=False)
205
+ trans_txt = gr.Textbox(label="Translated Lyrics", lines=4, interactive=False)
206
 
207
  with gr.Tab("Stems"):
208
  voc_out = gr.Audio(label="Extracted Vocals")
 
216
  )
217
 
218
  if __name__ == "__main__":
219
+ # theme y css movidos al launch() para compatibilidad con Gradio 6.0
220
+ demo.launch(
221
+ server_name="0.0.0.0",
222
+ server_port=7860,
223
+ theme=gr.themes.Soft(),
224
+ css=custom_css
225
+ )