Spaces:

PlayerBPlaytime
/

dolbyshi

Sleeping

App Files Files Community

PlayerBPlaytime commited on Apr 24

Commit

b2ff193

verified ·

1 Parent(s): cbcfe40

Update app.py

Browse files

Files changed (1) hide show

app.py +187 -122

app.py CHANGED Viewed

@@ -5,28 +5,67 @@ import os
 import tempfile
 import zipfile
 from pathlib import Path
 def get_channel_name(index, total_channels):
     """Asigna nombres a los canales según la configuración Atmos/Surround"""
-    # Configuraciones comunes de Dolby Atmos
     channel_maps = {
         2: ["Left", "Right"],
         6: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround"],
-        8: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround",
-            "Left Back", "Right Back"],
-        10: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround",
-             "Left Back", "Right Back", "Left Height", "Right Height"],
-        12: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround",
-             "Left Back", "Right Back", "Left Height Front", "Right Height Front",
              "Left Height Rear", "Right Height Rear"],
-        14: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround",
-             "Left Back", "Right Back", "Left Height Front", "Right Height Front",
-             "Left Height Rear", "Right Height Rear", "Top Front", "Top Rear"],
-        16: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround",
-             "Left Back", "Right Back", "Left Wide", "Right Wide",
-             "Left Height Front", "Right Height Front", "Left Height Rear",
-             "Right Height Rear", "Top Front", "Top Rear"],
     }
     if total_channels in channel_maps:
@@ -34,65 +73,94 @@ def get_channel_name(index, total_channels):
     else:
         return f"Channel_{index + 1}"
-def extract_stems(audio_file, output_format):
     """Extrae todos los stems/canales de un archivo de audio multicanal"""
     if audio_file is None:
         return None, "❌ Por favor, sube un archivo de audio"
     try:
-        # Leer el archivo de audio
-        audio_data, sample_rate = sf.read(audio_file)
-        # Obtener información del archivo
         if len(audio_data.shape) == 1:
-            # Audio mono
             num_channels = 1
             audio_data = audio_data.reshape(-1, 1)
         else:
             num_channels = audio_data.shape[1]
-        # Información del archivo
         duration = len(audio_data) / sample_rate
         file_name = Path(audio_file).stem
-        info_text = f"""
-## 📊 Información del archivo
-- **Nombre:** {Path(audio_file).name}
-- **Canales detectados:** {num_channels}
-- **Sample Rate:** {sample_rate} Hz
-- **Duración:** {duration:.2f} segundos
-- **Formato de salida:** {output_format.upper()}
-## 🎚️ Canales extraídos:
-"""
-        # Crear directorio temporal para los stems
         temp_dir = tempfile.mkdtemp()
         stem_files = []
-        # Extraer cada canal
         for i in range(num_channels):
             channel_name = get_channel_name(i, num_channels)
-            channel_data = audio_data[:, i]
-            # Normalizar el canal
-            max_val = np.max(np.abs(channel_data))
-            if max_val > 0:
-                channel_data = channel_data / max_val * 0.95
-            # Guardar el stem
-            stem_filename = f"{file_name}_{channel_name.replace(' ', '_')}.{output_format}"
             stem_path = os.path.join(temp_dir, stem_filename)
             sf.write(stem_path, channel_data, sample_rate)
             stem_files.append(stem_path)
-            info_text += f"- ✅ **{channel_name}** → `{stem_filename}`\n"
-        # Crear archivo ZIP con todos los stems
         zip_filename = f"{file_name}_stems.zip"
         zip_path = os.path.join(temp_dir, zip_filename)
@@ -100,119 +168,116 @@ def extract_stems(audio_file, output_format):
             for stem_file in stem_files:
                 zipf.write(stem_file, os.path.basename(stem_file))
-        info_text += f"\n## 📦 Descarga\n\nTodos los stems empaquetados en: `{zip_filename}`"
         return zip_path, info_text
     except Exception as e:
-        return None, f"❌ Error procesando el archivo: {str(e)}"
-def create_demo_file():
-    """Crea un archivo de demostración multicanal"""
     temp_dir = tempfile.mkdtemp()
     demo_path = os.path.join(temp_dir, "demo_5.1_surround.wav")
-    sample_rate = 48000
-    duration = 3  # segundos
-    t = np.linspace(0, duration, int(sample_rate * duration))
-    # Crear 6 canales con diferentes frecuencias (simulando 5.1)
     channels = [
-        np.sin(2 * np.pi * 440 * t) * 0.5,   # Left - La
-        np.sin(2 * np.pi * 554 * t) * 0.5,   # Right - Do#
-        np.sin(2 * np.pi * 330 * t) * 0.5,   # Center - Mi
-        np.sin(2 * np.pi * 60 * t) * 0.8,    # LFE - Bajo
-        np.sin(2 * np.pi * 392 * t) * 0.4,   # Left Surround - Sol
-        np.sin(2 * np.pi * 494 * t) * 0.4,   # Right Surround - Si
     ]
-    # Combinar canales
-    audio_data = np.column_stack(channels)
-    sf.write(demo_path, audio_data, sample_rate)
     return demo_path
-# Interfaz Gradio
 with gr.Blocks(
-    title="🎵 Dolby Atmos Stem Extractor",
-    theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue")
 ) as demo:
     gr.Markdown("""
-    # 🎵 Dolby Atmos Stem Extractor
-    Extrae todos los canales/stems de archivos de audio **Dolby Atmos** o **Surround**.
-    ### 🎯 Formatos soportados:
-    - **Entrada:** WAV, FLAC, AIFF, OGG (multicanal)
-    - **Configuraciones:** Stereo, 5.1, 7.1, 7.1.4, 9.1.6, y más
-    ### 📝 Instrucciones:
-    1. Sube tu archivo de audio multicanal
-    2. Selecciona el formato de salida
-    3. ¡Descarga tus stems!
     """)
     with gr.Row():
         with gr.Column(scale=1):
             audio_input = gr.File(
                 label="📁 Sube tu archivo de audio",
-                file_types=[".wav", ".flac", ".aiff", ".ogg", ".mp3", ".m4a"],
                 type="filepath"
             )
-            output_format = gr.Radio(
-                choices=["wav", "flac", "ogg"],
-                value="wav",
-                label="🎚️ Formato de salida"
-            )
-            extract_btn = gr.Button(
-                "🚀 Extraer Stems",
-                variant="primary",
-                size="lg"
-            )
-            demo_btn = gr.Button(
-                "🎹 Generar archivo demo 5.1",
-                variant="secondary"
-            )
         with gr.Column(scale=1):
-            output_file = gr.File(
-                label="📦 Descargar Stems (ZIP)"
-            )
-            info_output = gr.Markdown(
-                label="📊 Información",
-                value="*Sube un archivo para comenzar...*"
-            )
-    # Ejemplos
-    gr.Markdown("""
-    ---
-    ### 💡 Tips:
-    - Los archivos **Dolby Atmos** típicamente tienen 12-16 canales
-    - El formato **5.1 Surround** tiene 6 canales
-    - El formato **7.1 Surround** tiene 8 canales
-    - Los canales de **altura** (Height) son característicos de Atmos
-    """)
-    # Eventos
     extract_btn.click(
         fn=extract_stems,
-        inputs=[audio_input, output_format],
         outputs=[output_file, info_output]
     )
     demo_btn.click(
-        fn=create_demo_file,
-        inputs=[],
         outputs=[audio_input]
     )
-# Lanzar
 if __name__ == "__main__":
     demo.launch()

 import tempfile
 import zipfile
 from pathlib import Path
+from pydub import AudioSegment
+import io
+def convert_to_wav(input_path):
+    """Convierte cualquier formato a WAV usando pydub/ffmpeg"""
+    ext = Path(input_path).suffix.lower()
+    format_map = {
+        ".m4a": "m4a",
+        ".mp3": "mp3",
+        ".aac": "aac",
+        ".ogg": "ogg",
+        ".flac": "flac",
+        ".aiff": "aiff",
+        ".aif": "aiff",
+        ".wav": "wav",
+        ".mp4": "mp4",
+        ".wma": "wma",
+    }
+    fmt = format_map.get(ext, ext.replace(".", ""))
+    temp_wav = tempfile.mktemp(suffix=".wav")
+    audio = AudioSegment.from_file(input_path, format=fmt)
+    audio.export(temp_wav, format="wav")
+    return temp_wav
 def get_channel_name(index, total_channels):
     """Asigna nombres a los canales según la configuración Atmos/Surround"""
     channel_maps = {
+        1: ["Mono"],
         2: ["Left", "Right"],
+        3: ["Left", "Right", "Center"],
+        4: ["Left", "Right", "Left Surround", "Right Surround"],
         6: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround"],
+        8: ["Left", "Right", "Center", "LFE",
+            "Left Surround", "Right Surround", "Left Back", "Right Back"],
+        10: ["Left", "Right", "Center", "LFE",
+             "Left Surround", "Right Surround",
+             "Left Back", "Right Back",
+             "Left Height", "Right Height"],
+        12: ["Left", "Right", "Center", "LFE",
+             "Left Surround", "Right Surround",
+             "Left Back", "Right Back",
+             "Left Height Front", "Right Height Front",
              "Left Height Rear", "Right Height Rear"],
+        14: ["Left", "Right", "Center", "LFE",
+             "Left Surround", "Right Surround",
+             "Left Back", "Right Back",
+             "Left Height Front", "Right Height Front",
+             "Left Height Rear", "Right Height Rear",
+             "Top Front", "Top Rear"],
+        16: ["Left", "Right", "Center", "LFE",
+             "Left Surround", "Right Surround",
+             "Left Back", "Right Back",
+             "Left Wide", "Right Wide",
+             "Left Height Front", "Right Height Front",
+             "Left Height Rear", "Right Height Rear",
+             "Top Front", "Top Rear"],
     }
     if total_channels in channel_maps:
     else:
         return f"Channel_{index + 1}"
+def extract_stems(audio_file, output_format, normalize):
     """Extrae todos los stems/canales de un archivo de audio multicanal"""
     if audio_file is None:
         return None, "❌ Por favor, sube un archivo de audio"
+    converted_wav = None
     try:
+        # ── 1. Convertir a WAV si hace falta ──────────────────────────
+        ext = Path(audio_file).suffix.lower()
+        if ext not in [".wav", ".flac", ".aiff", ".aif"]:
+            info_text = "⏳ Convirtiendo formato... por favor espera\n\n"
+            converted_wav = convert_to_wav(audio_file)
+            read_path = converted_wav
+        else:
+            read_path = audio_file
+        # ── 2. Leer el archivo ────────────────────────────────────────
+        audio_data, sample_rate = sf.read(read_path)
+        # Asegurar que sea 2D
         if len(audio_data.shape) == 1:
             num_channels = 1
             audio_data = audio_data.reshape(-1, 1)
         else:
             num_channels = audio_data.shape[1]
         duration = len(audio_data) / sample_rate
         file_name = Path(audio_file).stem
+        # ── 3. Info del archivo ───────────────────────────────────────
+        info_text = f"""## 📊 Información del archivo
+| Campo | Valor |
+|-------|-------|
+| **Nombre** | {Path(audio_file).name} |
+| **Canales detectados** | {num_channels} |
+| **Sample Rate** | {sample_rate} Hz |
+| **Duración** | {int(duration//60)}:{int(duration%60):02d} min |
+| **Bits** | {audio_data.dtype} |
+| **Formato salida** | {output_format.upper()} |
+## 🎚️ Stems extraídos:\n\n"""
+        # ── 4. Extraer cada canal ─────────────────────────────────────
         temp_dir = tempfile.mkdtemp()
         stem_files = []
         for i in range(num_channels):
             channel_name = get_channel_name(i, num_channels)
+            channel_data = audio_data[:, i].copy().astype(np.float32)
+            # Normalizar si se pide
+            if normalize:
+                max_val = np.max(np.abs(channel_data))
+                if max_val > 0:
+                    channel_data = channel_data / max_val * 0.95
+            # Calcular volumen RMS del canal
+            rms = np.sqrt(np.mean(channel_data**2))
+            rms_db = 20 * np.log10(rms + 1e-10)
+            stem_filename = f"{file_name}_{i+1:02d}_{channel_name.replace(' ', '_')}.{output_format}"
             stem_path = os.path.join(temp_dir, stem_filename)
             sf.write(stem_path, channel_data, sample_rate)
             stem_files.append(stem_path)
+            # Emoji según tipo de canal
+            emoji = "🔊"
+            if "LFE" in channel_name or "Sub" in channel_name:
+                emoji = "💥"
+            elif "Height" in channel_name or "Top" in channel_name:
+                emoji = "⬆️"
+            elif "Surround" in channel_name or "Back" in channel_name:
+                emoji = "↩️"
+            elif "Center" in channel_name:
+                emoji = "🎤"
+            elif "Left" in channel_name:
+                emoji = "◀️"
+            elif "Right" in channel_name:
+                emoji = "▶️"
+            info_text += f"{emoji} **{channel_name}** → `{stem_filename}` | RMS: `{rms_db:.1f} dBFS`\n\n"
+        # ── 5. Crear ZIP ──────────────────────────────────────────────
         zip_filename = f"{file_name}_stems.zip"
         zip_path = os.path.join(temp_dir, zip_filename)
             for stem_file in stem_files:
                 zipf.write(stem_file, os.path.basename(stem_file))
+        zip_size = os.path.getsize(zip_path) / (1024 * 1024)
+        info_text += f"\n---\n## 📦 ZIP listo\n`{zip_filename}` — **{zip_size:.1f} MB** con {num_channels} stems"
         return zip_path, info_text
     except Exception as e:
+        import traceback
+        return None, f"❌ Error: {str(e)}\n\n```\n{traceback.format_exc()}\n```"
+    finally:
+        # Limpiar WAV temporal si se creó
+        if converted_wav and os.path.exists(converted_wav):
+            os.remove(converted_wav)
+def create_demo_51():
+    """Crea un archivo demo 5.1 Surround"""
     temp_dir = tempfile.mkdtemp()
     demo_path = os.path.join(temp_dir, "demo_5.1_surround.wav")
+    sr = 48000
+    t  = np.linspace(0, 4, sr * 4)
     channels = [
+        np.sin(2 * np.pi * 440 * t) * 0.6,               # L
+        np.sin(2 * np.pi * 554 * t) * 0.6,               # R
+        np.sin(2 * np.pi * 330 * t) * 0.7,               # C
+        np.sin(2 * np.pi * 55  * t) * 0.9,               # LFE
+        np.sin(2 * np.pi * 392 * t) * 0.4,               # Ls
+        np.sin(2 * np.pi * 494 * t) * 0.4,               # Rs
     ]
+    sf.write(demo_path, np.column_stack(channels).astype(np.float32), sr)
     return demo_path
+# ── UI ────────────────────────────────────────────────────────────────────────
 with gr.Blocks(
+    title="🎵 Atmos Stem Extractor",
+    theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue"),
+    css="""
+        .title { text-align: center; }
+        .gr-button-primary { background: linear-gradient(90deg,#7c3aed,#2563eb) !important; }
+    """
 ) as demo:
     gr.Markdown("""
+    # 🎵 Dolby Atmos · Stem Extractor
+    **Extrae cada canal de tus archivos multicanal** — Atmos, 5.1, 7.1, 7.1.4 y más
     """)
     with gr.Row():
+        # ── Columna izquierda ─────────────────────────────────────────
         with gr.Column(scale=1):
             audio_input = gr.File(
                 label="📁 Sube tu archivo de audio",
+                file_types=[".wav", ".flac", ".aiff", ".aif",
+                            ".m4a", ".mp3", ".aac", ".ogg",
+                            ".mp4", ".wma"],
                 type="filepath"
             )
+            with gr.Row():
+                output_format = gr.Radio(
+                    choices=["wav", "flac", "ogg"],
+                    value="wav",
+                    label="🎚️ Formato de salida"
+                )
+                normalize = gr.Checkbox(
+                    value=False,
+                    label="📶 Normalizar canales"
+                )
+            extract_btn = gr.Button("🚀 Extraer Stems", variant="primary", size="lg")
+            demo_btn    = gr.Button("🎹 Generar demo 5.1", variant="secondary")
+            gr.Markdown("""
+            ### 📋 Formatos soportados
+            | Entrada | Salida |
+            |---------|--------|
+            | WAV, FLAC, AIFF | WAV |
+            | **M4A, MP3, AAC** | FLAC |
+            | OGG, MP4, WMA | OGG |
+            ### 🎛️ Configuraciones detectadas
+            | Config | Canales |
+            |--------|---------|
+            | Stereo | 2 |
+            | 5.1 Surround | 6 |
+            | 7.1 Surround | 8 |
+            | 7.1.4 Atmos | 12 |
+            | 9.1.6 Atmos | 16 |
+            """)
+        # ── Columna derecha ───────────────────────────────────────────
         with gr.Column(scale=1):
+            output_file = gr.File(label="📦 Descargar ZIP con todos los stems")
+            info_output = gr.Markdown(value="*Sube un archivo para comenzar...*")
+    # ── Eventos ───────────────────────────────────────────────────────
     extract_btn.click(
         fn=extract_stems,
+        inputs=[audio_input, output_format, normalize],
         outputs=[output_file, info_output]
     )
     demo_btn.click(
+        fn=create_demo_51,
         outputs=[audio_input]
     )
 if __name__ == "__main__":
     demo.launch()