import gradio as gr import numpy as np import soundfile as sf import os import tempfile import zipfile from pathlib import Path from pydub import AudioSegment import io def convert_to_wav(input_path): """Converts any format to WAV using pydub/ffmpeg""" ext = Path(input_path).suffix.lower() format_map = { ".m4a": "m4a", ".mp3": "mp3", ".aac": "aac", ".ogg": "ogg", ".flac": "flac", ".aiff": "aiff", ".aif": "aiff", ".wav": "wav", ".mp4": "mp4", ".wma": "wma", } fmt = format_map.get(ext, ext.replace(".", "")) temp_wav = tempfile.mktemp(suffix=".wav") audio = AudioSegment.from_file(input_path, format=fmt) audio.export(temp_wav, format="wav") return temp_wav def get_channel_name(index, total_channels): """Assigns names to channels based on Atmos/Surround configuration""" channel_maps = { 1: ["Mono"], 2: ["Left", "Right"], 3: ["Left", "Right", "Center"], 4: ["Left", "Right", "Left Surround", "Right Surround"], 6: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround"], 8: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround", "Left Back", "Right Back"], 10: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround", "Left Back", "Right Back", "Left Height", "Right Height"], 12: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround", "Left Back", "Right Back", "Left Height Front", "Right Height Front", "Left Height Rear", "Right Height Rear"], 14: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround", "Left Back", "Right Back", "Left Height Front", "Right Height Front", "Left Height Rear", "Right Height Rear", "Top Front", "Top Rear"], 16: ["Left", "Right", "Center", "LFE", "Left Surround", "Right Surround", "Left Back", "Right Back", "Left Wide", "Right Wide", "Left Height Front", "Right Height Front", "Left Height Rear", "Right Height Rear", "Top Front", "Top Rear"], } if total_channels in channel_maps: return channel_maps[total_channels][index] else: return f"Channel_{index + 1}" def extract_stems(audio_file, output_format, normalize): """Extracts all stems/channels from a multichannel audio file""" if audio_file is None: return None, "❌ Please upload an audio file" converted_wav = None try: # ── 1. Convert to WAV if needed ────────────────────────── ext = Path(audio_file).suffix.lower() if ext not in [".wav", ".flac", ".aiff", ".aif"]: info_text = "⏳ Converting format... please wait\n\n" converted_wav = convert_to_wav(audio_file) read_path = converted_wav else: read_path = audio_file # ── 2. Read the file ──────────────────────────────────────── audio_data, sample_rate = sf.read(read_path) if len(audio_data.shape) == 1: num_channels = 1 audio_data = audio_data.reshape(-1, 1) else: num_channels = audio_data.shape[1] duration = len(audio_data) / sample_rate file_name = Path(audio_file).stem # ── 3. File info ─────────────────────────────────────── info_text = f"""## 📊 File Information | Field | Value | |-------|-------| | **Name** | {Path(audio_file).name} | | **Detected Channels** | {num_channels} | | **Sample Rate** | {sample_rate} Hz | | **Duration** | {int(duration//60)}:{int(duration%60):02d} min | | **Bit Depth** | {audio_data.dtype} | | **Output Format** | {output_format.upper()} | ## 🎚️ Extracted Stems:\n\n""" # ── 4. Extract each channel ───────────────────────────────────── temp_dir = tempfile.mkdtemp() stem_files = [] for i in range(num_channels): channel_name = get_channel_name(i, num_channels) channel_data = audio_data[:, i].copy().astype(np.float32) if normalize: max_val = np.max(np.abs(channel_data)) if max_val > 0: channel_data = channel_data / max_val * 0.95 rms = np.sqrt(np.mean(channel_data**2)) rms_db = 20 * np.log10(rms + 1e-10) stem_filename = f"{file_name}_{i+1:02d}_{channel_name.replace(' ', '_')}.{output_format}" stem_path = os.path.join(temp_dir, stem_filename) sf.write(stem_path, channel_data, sample_rate) stem_files.append(stem_path) emoji = "🔊" if "LFE" in channel_name or "Sub" in channel_name: emoji = "💥" elif "Height" in channel_name or "Top" in channel_name: emoji = "⬆️" elif "Surround" in channel_name or "Back" in channel_name: emoji = "↩️" elif "Center" in channel_name: emoji = "🎤" elif "Left" in channel_name: emoji = "◀️" elif "Right" in channel_name: emoji = "▶️" info_text += f"{emoji} **{channel_name}** → `{stem_filename}` | RMS: `{rms_db:.1f} dBFS`\n\n" # ── 5. Create ZIP ────────────────────────────────────────────── zip_filename = f"{file_name}_stems.zip" zip_path = os.path.join(temp_dir, zip_filename) with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: for stem_file in stem_files: zipf.write(stem_file, os.path.basename(stem_file)) zip_size = os.path.getsize(zip_path) / (1024 * 1024) info_text += f"\n---\n## 📦 ZIP Ready\n`{zip_filename}` — **{zip_size:.1f} MB** with {num_channels} stems" return zip_path, info_text except Exception as e: import traceback return None, f"❌ Error: {str(e)}\n\n```\n{traceback.format_exc()}\n```" finally: if converted_wav and os.path.exists(converted_wav): os.remove(converted_wav) def create_demo_51(): """Creates a 5.1 Surround demo file""" temp_dir = tempfile.mkdtemp() demo_path = os.path.join(temp_dir, "demo_5.1_surround.wav") sr = 48000 t = np.linspace(0, 4, sr * 4) channels = [ np.sin(2 * np.pi * 440 * t) * 0.6, np.sin(2 * np.pi * 554 * t) * 0.6, np.sin(2 * np.pi * 330 * t) * 0.7, np.sin(2 * np.pi * 55 * t) * 0.9, np.sin(2 * np.pi * 392 * t) * 0.4, np.sin(2 * np.pi * 494 * t) * 0.4, ] sf.write(demo_path, np.column_stack(channels).astype(np.float32), sr) return demo_path with gr.Blocks( title="🎵 Atmos Stem Extractor", theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue"), css=""" .title { text-align: center; } .gr-button-primary { background: linear-gradient(90deg,#7c3aed,#2563eb) !important; } """ ) as demo: gr.Markdown(""" # 🎵 Dolby Atmos · Stem Extractor **Extract each channel from your multichannel files** — Atmos, 5.1, 7.1, 7.1.4 and more """) with gr.Row(): with gr.Column(scale=1): audio_input = gr.File( label="📁 Upload your audio file", file_types=[".wav", ".flac", ".aiff", ".aif", ".m4a", ".mp3", ".aac", ".ogg", ".mp4", ".wma"], type="filepath" ) with gr.Row(): output_format = gr.Radio( choices=["wav", "flac", "ogg"], value="wav", label="🎚️ Output Format" ) normalize = gr.Checkbox( value=False, label="📶 Normalize Channels" ) extract_btn = gr.Button("🚀 Extract Stems", variant="primary", size="lg") demo_btn = gr.Button("🎹 Generate 5.1 Demo", variant="secondary") gr.Markdown(""" ### 📋 Supported Formats | Input | Output | |---------|--------| | WAV, FLAC, AIFF | WAV | | **M4A, MP3, AAC** | FLAC | | OGG, MP4, WMA | OGG | ### 🎛️ Detected Configurations | Config | Channels | |--------|---------| | Stereo | 2 | | 5.1 Surround | 6 | | 7.1 Surround | 8 | | 7.1.4 Atmos | 12 | | 9.1.6 Atmos | 16 | """) with gr.Column(scale=1): output_file = gr.File(label="📦 Download ZIP with all stems") info_output = gr.Markdown(value="*Upload a file to get started...*") extract_btn.click( fn=extract_stems, inputs=[audio_input, output_format, normalize], outputs=[output_file, info_output] ) demo_btn.click( fn=create_demo_51, outputs=[audio_input] ) if __name__ == "__main__": demo.launch()