PlayerBPlaytime commited on
Commit
b2ff193
Β·
verified Β·
1 Parent(s): cbcfe40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -122
app.py CHANGED
@@ -5,28 +5,67 @@ import os
5
  import tempfile
6
  import zipfile
7
  from pathlib import Path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def get_channel_name(index, total_channels):
10
  """Asigna nombres a los canales segΓΊn la configuraciΓ³n Atmos/Surround"""
11
 
12
- # Configuraciones comunes de Dolby Atmos
13
  channel_maps = {
 
14
  2: ["Left", "Right"],
 
 
15
  6: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround"],
16
- 8: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround",
17
- "Left Back", "Right Back"],
18
- 10: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround",
19
- "Left Back", "Right Back", "Left Height", "Right Height"],
20
- 12: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround",
21
- "Left Back", "Right Back", "Left Height Front", "Right Height Front",
 
 
 
 
22
  "Left Height Rear", "Right Height Rear"],
23
- 14: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround",
24
- "Left Back", "Right Back", "Left Height Front", "Right Height Front",
25
- "Left Height Rear", "Right Height Rear", "Top Front", "Top Rear"],
26
- 16: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround",
27
- "Left Back", "Right Back", "Left Wide", "Right Wide",
28
- "Left Height Front", "Right Height Front", "Left Height Rear",
29
- "Right Height Rear", "Top Front", "Top Rear"],
 
 
 
 
 
 
30
  }
31
 
32
  if total_channels in channel_maps:
@@ -34,65 +73,94 @@ def get_channel_name(index, total_channels):
34
  else:
35
  return f"Channel_{index + 1}"
36
 
37
- def extract_stems(audio_file, output_format):
38
  """Extrae todos los stems/canales de un archivo de audio multicanal"""
39
 
40
  if audio_file is None:
41
  return None, "❌ Por favor, sube un archivo de audio"
42
 
 
 
43
  try:
44
- # Leer el archivo de audio
45
- audio_data, sample_rate = sf.read(audio_file)
 
 
 
 
 
 
 
46
 
47
- # Obtener informaciΓ³n del archivo
 
 
 
48
  if len(audio_data.shape) == 1:
49
- # Audio mono
50
  num_channels = 1
51
  audio_data = audio_data.reshape(-1, 1)
52
  else:
53
  num_channels = audio_data.shape[1]
54
 
55
- # InformaciΓ³n del archivo
56
  duration = len(audio_data) / sample_rate
57
  file_name = Path(audio_file).stem
58
 
59
- info_text = f"""
60
- ## πŸ“Š InformaciΓ³n del archivo
61
-
62
- - **Nombre:** {Path(audio_file).name}
63
- - **Canales detectados:** {num_channels}
64
- - **Sample Rate:** {sample_rate} Hz
65
- - **DuraciΓ³n:** {duration:.2f} segundos
66
- - **Formato de salida:** {output_format.upper()}
67
 
68
- ## 🎚️ Canales extraídos:
 
 
 
 
 
 
 
69
 
70
- """
71
 
72
- # Crear directorio temporal para los stems
73
  temp_dir = tempfile.mkdtemp()
74
  stem_files = []
75
 
76
- # Extraer cada canal
77
  for i in range(num_channels):
78
  channel_name = get_channel_name(i, num_channels)
79
- channel_data = audio_data[:, i]
 
 
 
 
 
 
80
 
81
- # Normalizar el canal
82
- max_val = np.max(np.abs(channel_data))
83
- if max_val > 0:
84
- channel_data = channel_data / max_val * 0.95
85
 
86
- # Guardar el stem
87
- stem_filename = f"{file_name}_{channel_name.replace(' ', '_')}.{output_format}"
88
  stem_path = os.path.join(temp_dir, stem_filename)
89
 
90
  sf.write(stem_path, channel_data, sample_rate)
91
  stem_files.append(stem_path)
92
 
93
- info_text += f"- βœ… **{channel_name}** β†’ `{stem_filename}`\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
- # Crear archivo ZIP con todos los stems
96
  zip_filename = f"{file_name}_stems.zip"
97
  zip_path = os.path.join(temp_dir, zip_filename)
98
 
@@ -100,119 +168,116 @@ def extract_stems(audio_file, output_format):
100
  for stem_file in stem_files:
101
  zipf.write(stem_file, os.path.basename(stem_file))
102
 
103
- info_text += f"\n## πŸ“¦ Descarga\n\nTodos los stems empaquetados en: `{zip_filename}`"
 
104
 
105
  return zip_path, info_text
106
 
107
  except Exception as e:
108
- return None, f"❌ Error procesando el archivo: {str(e)}"
109
-
110
- def create_demo_file():
111
- """Crea un archivo de demostraciΓ³n multicanal"""
112
 
 
 
 
 
 
 
 
 
113
  temp_dir = tempfile.mkdtemp()
114
  demo_path = os.path.join(temp_dir, "demo_5.1_surround.wav")
115
 
116
- sample_rate = 48000
117
- duration = 3 # segundos
118
- t = np.linspace(0, duration, int(sample_rate * duration))
119
 
120
- # Crear 6 canales con diferentes frecuencias (simulando 5.1)
121
  channels = [
122
- np.sin(2 * np.pi * 440 * t) * 0.5, # Left - La
123
- np.sin(2 * np.pi * 554 * t) * 0.5, # Right - Do#
124
- np.sin(2 * np.pi * 330 * t) * 0.5, # Center - Mi
125
- np.sin(2 * np.pi * 60 * t) * 0.8, # LFE - Bajo
126
- np.sin(2 * np.pi * 392 * t) * 0.4, # Left Surround - Sol
127
- np.sin(2 * np.pi * 494 * t) * 0.4, # Right Surround - Si
128
  ]
129
 
130
- # Combinar canales
131
- audio_data = np.column_stack(channels)
132
-
133
- sf.write(demo_path, audio_data, sample_rate)
134
-
135
  return demo_path
136
 
137
- # Interfaz Gradio
 
138
  with gr.Blocks(
139
- title="🎡 Dolby Atmos Stem Extractor",
140
- theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue")
 
 
 
 
141
  ) as demo:
142
-
143
  gr.Markdown("""
144
- # 🎡 Dolby Atmos Stem Extractor
145
-
146
- Extrae todos los canales/stems de archivos de audio **Dolby Atmos** o **Surround**.
147
-
148
- ### 🎯 Formatos soportados:
149
- - **Entrada:** WAV, FLAC, AIFF, OGG (multicanal)
150
- - **Configuraciones:** Stereo, 5.1, 7.1, 7.1.4, 9.1.6, y mΓ‘s
151
-
152
- ### πŸ“ Instrucciones:
153
- 1. Sube tu archivo de audio multicanal
154
- 2. Selecciona el formato de salida
155
- 3. Β‘Descarga tus stems!
156
  """)
157
-
158
  with gr.Row():
 
159
  with gr.Column(scale=1):
160
  audio_input = gr.File(
161
  label="πŸ“ Sube tu archivo de audio",
162
- file_types=[".wav", ".flac", ".aiff", ".ogg", ".mp3", ".m4a"],
 
 
163
  type="filepath"
164
  )
165
-
166
- output_format = gr.Radio(
167
- choices=["wav", "flac", "ogg"],
168
- value="wav",
169
- label="🎚️ Formato de salida"
170
- )
171
-
172
- extract_btn = gr.Button(
173
- "πŸš€ Extraer Stems",
174
- variant="primary",
175
- size="lg"
176
- )
177
-
178
- demo_btn = gr.Button(
179
- "🎹 Generar archivo demo 5.1",
180
- variant="secondary"
181
- )
182
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  with gr.Column(scale=1):
184
- output_file = gr.File(
185
- label="πŸ“¦ Descargar Stems (ZIP)"
186
- )
187
-
188
- info_output = gr.Markdown(
189
- label="πŸ“Š InformaciΓ³n",
190
- value="*Sube un archivo para comenzar...*"
191
- )
192
-
193
- # Ejemplos
194
- gr.Markdown("""
195
- ---
196
- ### πŸ’‘ Tips:
197
- - Los archivos **Dolby Atmos** tΓ­picamente tienen 12-16 canales
198
- - El formato **5.1 Surround** tiene 6 canales
199
- - El formato **7.1 Surround** tiene 8 canales
200
- - Los canales de **altura** (Height) son caracterΓ­sticos de Atmos
201
- """)
202
-
203
- # Eventos
204
  extract_btn.click(
205
  fn=extract_stems,
206
- inputs=[audio_input, output_format],
207
  outputs=[output_file, info_output]
208
  )
209
-
210
  demo_btn.click(
211
- fn=create_demo_file,
212
- inputs=[],
213
  outputs=[audio_input]
214
  )
215
 
216
- # Lanzar
217
  if __name__ == "__main__":
218
  demo.launch()
 
5
  import tempfile
6
  import zipfile
7
  from pathlib import Path
8
+ from pydub import AudioSegment
9
+ import io
10
+
11
+ def convert_to_wav(input_path):
12
+ """Convierte cualquier formato a WAV usando pydub/ffmpeg"""
13
+ ext = Path(input_path).suffix.lower()
14
+
15
+ format_map = {
16
+ ".m4a": "m4a",
17
+ ".mp3": "mp3",
18
+ ".aac": "aac",
19
+ ".ogg": "ogg",
20
+ ".flac": "flac",
21
+ ".aiff": "aiff",
22
+ ".aif": "aiff",
23
+ ".wav": "wav",
24
+ ".mp4": "mp4",
25
+ ".wma": "wma",
26
+ }
27
+
28
+ fmt = format_map.get(ext, ext.replace(".", ""))
29
+
30
+ temp_wav = tempfile.mktemp(suffix=".wav")
31
+ audio = AudioSegment.from_file(input_path, format=fmt)
32
+ audio.export(temp_wav, format="wav")
33
+
34
+ return temp_wav
35
 
36
  def get_channel_name(index, total_channels):
37
  """Asigna nombres a los canales segΓΊn la configuraciΓ³n Atmos/Surround"""
38
 
 
39
  channel_maps = {
40
+ 1: ["Mono"],
41
  2: ["Left", "Right"],
42
+ 3: ["Left", "Right", "Center"],
43
+ 4: ["Left", "Right", "Left Surround", "Right Surround"],
44
  6: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround"],
45
+ 8: ["Left", "Right", "Center", "LFE",
46
+ "Left Surround", "Right Surround", "Left Back", "Right Back"],
47
+ 10: ["Left", "Right", "Center", "LFE",
48
+ "Left Surround", "Right Surround",
49
+ "Left Back", "Right Back",
50
+ "Left Height", "Right Height"],
51
+ 12: ["Left", "Right", "Center", "LFE",
52
+ "Left Surround", "Right Surround",
53
+ "Left Back", "Right Back",
54
+ "Left Height Front", "Right Height Front",
55
  "Left Height Rear", "Right Height Rear"],
56
+ 14: ["Left", "Right", "Center", "LFE",
57
+ "Left Surround", "Right Surround",
58
+ "Left Back", "Right Back",
59
+ "Left Height Front", "Right Height Front",
60
+ "Left Height Rear", "Right Height Rear",
61
+ "Top Front", "Top Rear"],
62
+ 16: ["Left", "Right", "Center", "LFE",
63
+ "Left Surround", "Right Surround",
64
+ "Left Back", "Right Back",
65
+ "Left Wide", "Right Wide",
66
+ "Left Height Front", "Right Height Front",
67
+ "Left Height Rear", "Right Height Rear",
68
+ "Top Front", "Top Rear"],
69
  }
70
 
71
  if total_channels in channel_maps:
 
73
  else:
74
  return f"Channel_{index + 1}"
75
 
76
+ def extract_stems(audio_file, output_format, normalize):
77
  """Extrae todos los stems/canales de un archivo de audio multicanal"""
78
 
79
  if audio_file is None:
80
  return None, "❌ Por favor, sube un archivo de audio"
81
 
82
+ converted_wav = None
83
+
84
  try:
85
+ # ── 1. Convertir a WAV si hace falta ──────────────────────────
86
+ ext = Path(audio_file).suffix.lower()
87
+
88
+ if ext not in [".wav", ".flac", ".aiff", ".aif"]:
89
+ info_text = "⏳ Convirtiendo formato... por favor espera\n\n"
90
+ converted_wav = convert_to_wav(audio_file)
91
+ read_path = converted_wav
92
+ else:
93
+ read_path = audio_file
94
 
95
+ # ── 2. Leer el archivo ────────────────────────────────────────
96
+ audio_data, sample_rate = sf.read(read_path)
97
+
98
+ # Asegurar que sea 2D
99
  if len(audio_data.shape) == 1:
 
100
  num_channels = 1
101
  audio_data = audio_data.reshape(-1, 1)
102
  else:
103
  num_channels = audio_data.shape[1]
104
 
 
105
  duration = len(audio_data) / sample_rate
106
  file_name = Path(audio_file).stem
107
 
108
+ # ── 3. Info del archivo ───────────────────────────────────────
109
+ info_text = f"""## πŸ“Š InformaciΓ³n del archivo
 
 
 
 
 
 
110
 
111
+ | Campo | Valor |
112
+ |-------|-------|
113
+ | **Nombre** | {Path(audio_file).name} |
114
+ | **Canales detectados** | {num_channels} |
115
+ | **Sample Rate** | {sample_rate} Hz |
116
+ | **DuraciΓ³n** | {int(duration//60)}:{int(duration%60):02d} min |
117
+ | **Bits** | {audio_data.dtype} |
118
+ | **Formato salida** | {output_format.upper()} |
119
 
120
+ ## 🎚️ Stems extraídos:\n\n"""
121
 
122
+ # ── 4. Extraer cada canal ─────────────────────────────────────
123
  temp_dir = tempfile.mkdtemp()
124
  stem_files = []
125
 
 
126
  for i in range(num_channels):
127
  channel_name = get_channel_name(i, num_channels)
128
+ channel_data = audio_data[:, i].copy().astype(np.float32)
129
+
130
+ # Normalizar si se pide
131
+ if normalize:
132
+ max_val = np.max(np.abs(channel_data))
133
+ if max_val > 0:
134
+ channel_data = channel_data / max_val * 0.95
135
 
136
+ # Calcular volumen RMS del canal
137
+ rms = np.sqrt(np.mean(channel_data**2))
138
+ rms_db = 20 * np.log10(rms + 1e-10)
 
139
 
140
+ stem_filename = f"{file_name}_{i+1:02d}_{channel_name.replace(' ', '_')}.{output_format}"
 
141
  stem_path = os.path.join(temp_dir, stem_filename)
142
 
143
  sf.write(stem_path, channel_data, sample_rate)
144
  stem_files.append(stem_path)
145
 
146
+ # Emoji segΓΊn tipo de canal
147
+ emoji = "πŸ”Š"
148
+ if "LFE" in channel_name or "Sub" in channel_name:
149
+ emoji = "πŸ’₯"
150
+ elif "Height" in channel_name or "Top" in channel_name:
151
+ emoji = "⬆️"
152
+ elif "Surround" in channel_name or "Back" in channel_name:
153
+ emoji = "↩️"
154
+ elif "Center" in channel_name:
155
+ emoji = "🎀"
156
+ elif "Left" in channel_name:
157
+ emoji = "◀️"
158
+ elif "Right" in channel_name:
159
+ emoji = "▢️"
160
+
161
+ info_text += f"{emoji} **{channel_name}** β†’ `{stem_filename}` | RMS: `{rms_db:.1f} dBFS`\n\n"
162
 
163
+ # ── 5. Crear ZIP ──────────────────────────────────────────────
164
  zip_filename = f"{file_name}_stems.zip"
165
  zip_path = os.path.join(temp_dir, zip_filename)
166
 
 
168
  for stem_file in stem_files:
169
  zipf.write(stem_file, os.path.basename(stem_file))
170
 
171
+ zip_size = os.path.getsize(zip_path) / (1024 * 1024)
172
+ info_text += f"\n---\n## πŸ“¦ ZIP listo\n`{zip_filename}` β€” **{zip_size:.1f} MB** con {num_channels} stems"
173
 
174
  return zip_path, info_text
175
 
176
  except Exception as e:
177
+ import traceback
178
+ return None, f"❌ Error: {str(e)}\n\n```\n{traceback.format_exc()}\n```"
 
 
179
 
180
+ finally:
181
+ # Limpiar WAV temporal si se creΓ³
182
+ if converted_wav and os.path.exists(converted_wav):
183
+ os.remove(converted_wav)
184
+
185
+
186
+ def create_demo_51():
187
+ """Crea un archivo demo 5.1 Surround"""
188
  temp_dir = tempfile.mkdtemp()
189
  demo_path = os.path.join(temp_dir, "demo_5.1_surround.wav")
190
 
191
+ sr = 48000
192
+ t = np.linspace(0, 4, sr * 4)
 
193
 
 
194
  channels = [
195
+ np.sin(2 * np.pi * 440 * t) * 0.6, # L
196
+ np.sin(2 * np.pi * 554 * t) * 0.6, # R
197
+ np.sin(2 * np.pi * 330 * t) * 0.7, # C
198
+ np.sin(2 * np.pi * 55 * t) * 0.9, # LFE
199
+ np.sin(2 * np.pi * 392 * t) * 0.4, # Ls
200
+ np.sin(2 * np.pi * 494 * t) * 0.4, # Rs
201
  ]
202
 
203
+ sf.write(demo_path, np.column_stack(channels).astype(np.float32), sr)
 
 
 
 
204
  return demo_path
205
 
206
+
207
+ # ── UI ────────────────────────────────────────────────────────────────────────
208
  with gr.Blocks(
209
+ title="🎡 Atmos Stem Extractor",
210
+ theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue"),
211
+ css="""
212
+ .title { text-align: center; }
213
+ .gr-button-primary { background: linear-gradient(90deg,#7c3aed,#2563eb) !important; }
214
+ """
215
  ) as demo:
216
+
217
  gr.Markdown("""
218
+ # 🎡 Dolby Atmos · Stem Extractor
219
+ **Extrae cada canal de tus archivos multicanal** β€” Atmos, 5.1, 7.1, 7.1.4 y mΓ‘s
 
 
 
 
 
 
 
 
 
 
220
  """)
221
+
222
  with gr.Row():
223
+ # ── Columna izquierda ─────────────────────────────────────────
224
  with gr.Column(scale=1):
225
  audio_input = gr.File(
226
  label="πŸ“ Sube tu archivo de audio",
227
+ file_types=[".wav", ".flac", ".aiff", ".aif",
228
+ ".m4a", ".mp3", ".aac", ".ogg",
229
+ ".mp4", ".wma"],
230
  type="filepath"
231
  )
232
+
233
+ with gr.Row():
234
+ output_format = gr.Radio(
235
+ choices=["wav", "flac", "ogg"],
236
+ value="wav",
237
+ label="🎚️ Formato de salida"
238
+ )
239
+ normalize = gr.Checkbox(
240
+ value=False,
241
+ label="πŸ“Ά Normalizar canales"
242
+ )
243
+
244
+ extract_btn = gr.Button("πŸš€ Extraer Stems", variant="primary", size="lg")
245
+ demo_btn = gr.Button("🎹 Generar demo 5.1", variant="secondary")
246
+
247
+ gr.Markdown("""
248
+ ### πŸ“‹ Formatos soportados
249
+ | Entrada | Salida |
250
+ |---------|--------|
251
+ | WAV, FLAC, AIFF | WAV |
252
+ | **M4A, MP3, AAC** | FLAC |
253
+ | OGG, MP4, WMA | OGG |
254
+
255
+ ### πŸŽ›οΈ Configuraciones detectadas
256
+ | Config | Canales |
257
+ |--------|---------|
258
+ | Stereo | 2 |
259
+ | 5.1 Surround | 6 |
260
+ | 7.1 Surround | 8 |
261
+ | 7.1.4 Atmos | 12 |
262
+ | 9.1.6 Atmos | 16 |
263
+ """)
264
+
265
+ # ── Columna derecha ───────────────────────────────────────────
266
  with gr.Column(scale=1):
267
+ output_file = gr.File(label="πŸ“¦ Descargar ZIP con todos los stems")
268
+ info_output = gr.Markdown(value="*Sube un archivo para comenzar...*")
269
+
270
+ # ── Eventos ───────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  extract_btn.click(
272
  fn=extract_stems,
273
+ inputs=[audio_input, output_format, normalize],
274
  outputs=[output_file, info_output]
275
  )
276
+
277
  demo_btn.click(
278
+ fn=create_demo_51,
 
279
  outputs=[audio_input]
280
  )
281
 
 
282
  if __name__ == "__main__":
283
  demo.launch()