Spaces:
Build error
Build error
| """Melody Expander - Gradio web application. | |
| Separates audio into stems, transcribes melodies, and applies embellishments. | |
| Deployed on HuggingFace Spaces with ZeroGPU support. | |
| """ | |
| import os | |
| import sys | |
| import json | |
| import traceback | |
| # Patch gradio_client bug: crashes when JSON schema has additionalProperties=true (bool) | |
| import gradio_client.utils as _gc_utils | |
| _orig_get_type = _gc_utils.get_type | |
| def _patched_get_type(schema): | |
| if isinstance(schema, bool): | |
| return "Any" | |
| return _orig_get_type(schema) | |
| _gc_utils.get_type = _patched_get_type | |
| _orig_json_schema_to_python_type = _gc_utils._json_schema_to_python_type | |
| def _patched_json_schema_to_python_type(schema, defs=None): | |
| if isinstance(schema, bool): | |
| return "Any" | |
| return _orig_json_schema_to_python_type(schema, defs) | |
| _gc_utils._json_schema_to_python_type = _patched_json_schema_to_python_type | |
| import gradio as gr | |
| # Detect HuggingFace Spaces environment | |
| IS_SPACES = os.environ.get("SPACE_ID") is not None | |
| if IS_SPACES: | |
| import spaces | |
| from pipeline.orchestrator import run_separation, run_transcription_and_format | |
| from pipeline.transcriber import NoteEvent, estimate_tempo | |
| from pipeline.formatter import export_all_formats | |
| from embellishments.registry import ( | |
| get_style, | |
| list_styles, | |
| get_display_name, | |
| STYLE_DISPLAY_NAMES, | |
| ) | |
| from utils.audio_io import validate_audio_file, AudioValidationError | |
| from utils.file_manager import ( | |
| create_session_dir, | |
| cleanup_session, | |
| collect_all_files, | |
| package_zip, | |
| ) | |
| from utils.music_theory import detect_key, midi_to_note_name | |
| from utils.youtube import download_audio, is_youtube_url, YouTubeError | |
| # --------------------------------------------------------------------------- | |
| # GPU-decorated separation (only active on HF Spaces) | |
| # --------------------------------------------------------------------------- | |
| def _separate_gpu(audio_path: str, output_dir: str): | |
| """Stem separation — wrapped with @spaces.GPU when on Spaces.""" | |
| return run_separation(audio_path, output_dir) | |
| if IS_SPACES: | |
| _separate_gpu = spaces.GPU(duration=120)(_separate_gpu) | |
| # --------------------------------------------------------------------------- | |
| # State helpers — use JSON string to avoid Gradio schema bugs with dicts | |
| # --------------------------------------------------------------------------- | |
| def _encode_state(data): | |
| return json.dumps(data) | |
| def _decode_state(state_str): | |
| if not state_str: | |
| return {} | |
| try: | |
| return json.loads(state_str) | |
| except (json.JSONDecodeError, TypeError): | |
| return {} | |
| # --------------------------------------------------------------------------- | |
| # Tab 1: Upload & Separate | |
| # --------------------------------------------------------------------------- | |
| def process_audio(audio_file, youtube_url): | |
| """Main processing pipeline: separate stems, transcribe, format.""" | |
| audio_path = None | |
| session_dir = create_session_dir() | |
| try: | |
| if youtube_url and youtube_url.strip(): | |
| print(f"[melody-expander] Downloading from YouTube: {youtube_url.strip()}") | |
| try: | |
| yt_dir = os.path.join(session_dir, "youtube") | |
| audio_path = download_audio(youtube_url.strip(), yt_dir) | |
| print(f"[melody-expander] Downloaded to: {audio_path}") | |
| except YouTubeError as e: | |
| print(f"[melody-expander] YouTube error: {e}") | |
| return _error_result(f"YouTube error: {e}") | |
| elif audio_file is not None: | |
| audio_path = audio_file | |
| print(f"[melody-expander] Using uploaded file: {audio_path}") | |
| else: | |
| return _error_result("Please upload an audio file or paste a YouTube URL.") | |
| # Validate | |
| try: | |
| metadata = validate_audio_file(audio_path) | |
| except AudioValidationError as e: | |
| print(f"[melody-expander] Validation error: {e}") | |
| return _error_result(str(e)) | |
| info_text = ( | |
| f"**Input:** {metadata['duration']:.1f}s, " | |
| f"{metadata['sample_rate']}Hz, " | |
| f"{metadata['channels']}ch, {metadata['format']}" | |
| ) | |
| # Phase 1: Separation (GPU) | |
| print("[melody-expander] Starting stem separation...") | |
| stems_dir = os.path.join(session_dir, "stems") | |
| stem_paths = _separate_gpu(audio_path, stems_dir) | |
| # Phase 2: Transcription + Formatting (CPU) | |
| print("[melody-expander] Starting transcription...") | |
| output_dir = os.path.join(session_dir, "output") | |
| results = run_transcription_and_format(stem_paths, output_dir) | |
| # Build outputs for UI | |
| print("[melody-expander] Packaging results...") | |
| stem_audio_outputs = [] | |
| stem_info_parts = [] | |
| all_download_files = [] | |
| for stem_name in ["vocals", "drums", "bass", "other"]: | |
| stem_data = results["stems"].get(stem_name) | |
| if stem_data: | |
| stem_audio_outputs.append(stem_data["audio_path"]) | |
| note_count = stem_data["note_count"] | |
| tempo = stem_data["tempo_bpm"] | |
| if stem_data["notes"]: | |
| pcs = [n.pitch_midi % 12 for n in stem_data["notes"]] | |
| key_root, key_mode = detect_key(pcs) | |
| key_str = f"{midi_to_note_name(key_root + 60)[:-1]} {key_mode}" | |
| else: | |
| key_str = "N/A" | |
| stem_info_parts.append( | |
| f"**{stem_name.title()}:** {note_count} notes, " | |
| f"~{tempo} BPM, key: {key_str}" | |
| ) | |
| for fmt, fpath in stem_data["files"].items(): | |
| all_download_files.append(fpath) | |
| else: | |
| stem_audio_outputs.append(None) | |
| stem_info_parts.append(f"**{stem_name.title()}:** No output") | |
| # Create ZIP of everything | |
| zip_path = os.path.join(session_dir, "all_stems.zip") | |
| all_file_dict = collect_all_files(results) | |
| package_zip(all_file_dict, zip_path) | |
| all_download_files.append(zip_path) | |
| stem_info = info_text + "\n\n" + "\n\n".join(stem_info_parts) | |
| # Serialize state as JSON string | |
| state_data = {"session_dir": session_dir, "stems": {}} | |
| for stem_name, stem_data in results["stems"].items(): | |
| state_data["stems"][stem_name] = { | |
| "notes": [n.to_dict() for n in stem_data["notes"]], | |
| "tempo_bpm": stem_data["tempo_bpm"], | |
| "audio_path": stem_data["audio_path"], | |
| "files": stem_data["files"], | |
| } | |
| return ( | |
| stem_audio_outputs[0], | |
| stem_audio_outputs[1], | |
| stem_audio_outputs[2], | |
| stem_audio_outputs[3], | |
| stem_info, | |
| all_download_files, | |
| _encode_state(state_data), | |
| ) | |
| except Exception as e: | |
| print(f"[melody-expander] EXCEPTION: {e}") | |
| traceback.print_exc() | |
| return _error_result(f"Processing failed: {e}") | |
| def _error_result(msg): | |
| return (None, None, None, None, f"**Error:** {msg}", [], "") | |
| # --------------------------------------------------------------------------- | |
| # Tab 2: Embellish | |
| # --------------------------------------------------------------------------- | |
| def apply_embellishments(state_str, stem_choice, style_choices): | |
| """Apply selected embellishments to a stem's notes.""" | |
| state_data = _decode_state(state_str) | |
| if not state_data or "stems" not in state_data: | |
| return "**Error:** No stems loaded. Process audio first (Tab 1).", [] | |
| if not stem_choice: | |
| return "**Error:** Select a stem.", [] | |
| if not style_choices: | |
| return "**Error:** Select at least one embellishment style.", [] | |
| stem_data = state_data["stems"].get(stem_choice) | |
| if not stem_data: | |
| return f"**Error:** Stem '{stem_choice}' not found.", [] | |
| notes = [NoteEvent.from_dict(d) for d in stem_data["notes"]] | |
| tempo_bpm = stem_data["tempo_bpm"] | |
| if not notes: | |
| return f"**{stem_choice}** has no pitched content to embellish.", [] | |
| pcs = [n.pitch_midi % 12 for n in notes] | |
| key_root, key_mode = detect_key(pcs) | |
| print(f"[melody-expander] Applying embellishments: {style_choices}") | |
| result_notes = notes | |
| applied_names = [] | |
| for style_name in style_choices: | |
| style = get_style(style_name) | |
| result_notes = style.apply(result_notes, tempo_bpm, key_root, key_mode) | |
| applied_names.append(get_display_name(style_name)) | |
| print("[melody-expander] Exporting embellished files...") | |
| session_dir = state_data.get("session_dir", create_session_dir()) | |
| emb_dir = os.path.join(session_dir, "embellished", stem_choice) | |
| suffix = "_".join(style_choices) | |
| file_paths = export_all_formats( | |
| result_notes, emb_dir, f"{stem_choice}_{suffix}", tempo_bpm | |
| ) | |
| info = ( | |
| f"**Embellished {stem_choice.title()}**\n\n" | |
| f"Styles applied: {', '.join(applied_names)}\n\n" | |
| f"Original notes: {len(notes)} -> Embellished notes: {len(result_notes)}\n\n" | |
| f"Key: {midi_to_note_name(key_root + 60)[:-1]} {key_mode}, Tempo: {tempo_bpm} BPM" | |
| ) | |
| return info, list(file_paths.values()) | |
| def get_available_stems(state_str): | |
| """Return list of stems that have notes for the dropdown.""" | |
| state_data = _decode_state(state_str) | |
| if not state_data or "stems" not in state_data: | |
| return gr.Dropdown(choices=[], value=None) | |
| stems = [s for s, d in state_data["stems"].items() if d.get("notes")] | |
| return gr.Dropdown(choices=stems, value=stems[0] if stems else None) | |
| # --------------------------------------------------------------------------- | |
| # UI | |
| # --------------------------------------------------------------------------- | |
| ABOUT_TEXT = """ | |
| # Melody Expander | |
| **Separate, transcribe, and embellish melodies from any audio.** | |
| ## How It Works | |
| 1. **Upload** an MP3/WAV file (or paste a YouTube URL) up to 5 minutes long | |
| 2. **Separate** into 4 stems: vocals, drums, bass, other (using Demucs v4) | |
| 3. **Transcribe** each stem to notes (using Basic Pitch) | |
| 4. **Download** as MIDI, MusicXML, or JSON | |
| 5. **Embellish** with jazz swing, parallel harmonies, and more | |
| ## Technical Details | |
| - **Stem Separation:** Demucs v4 (htdemucs) — hybrid transformer model | |
| - **Transcription:** Basic Pitch by Spotify — lightweight neural MIDI transcription | |
| - **Output Formats:** MIDI (for DAWs), MusicXML (for notation software), JSON (for code) | |
| - **Embellishments:** Rule-based transformations on note events | |
| ## Limitations | |
| - Max 5 minutes, 50MB file size | |
| - Transcription quality depends on audio clarity | |
| - Drum transcription shows pitched components only | |
| - YouTube downloads require yt-dlp to be installed | |
| ## Credits | |
| Built with [Demucs](https://github.com/facebookresearch/demucs), | |
| [Basic Pitch](https://github.com/spotify/basic-pitch), | |
| [music21](https://web.mit.edu/music21/), | |
| and [Gradio](https://gradio.app/). | |
| """ | |
| def build_ui(): | |
| with gr.Blocks( | |
| title="Melody Expander", | |
| theme=gr.themes.Soft(), | |
| ) as app: | |
| gr.Markdown("# Melody Expander\nSeparate stems, transcribe melodies, apply embellishments.") | |
| # Hidden textbox for state (avoids Gradio schema introspection bugs with gr.State) | |
| pipeline_state = gr.Textbox(visible=False, elem_id="pipeline_state") | |
| with gr.Tabs(): | |
| # ------ Tab 1: Upload & Separate ------ | |
| with gr.Tab("Upload & Separate"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| audio_input = gr.Audio( | |
| label="Upload Audio (MP3/WAV, max 5 min)", | |
| type="filepath", | |
| ) | |
| youtube_input = gr.Textbox( | |
| label="Or paste a YouTube URL (local only, not available on Spaces)", | |
| placeholder="https://www.youtube.com/watch?v=...", | |
| interactive=not IS_SPACES, | |
| ) | |
| process_btn = gr.Button("Process", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| info_output = gr.Markdown(label="Info") | |
| gr.Markdown("### Separated Stems") | |
| with gr.Row(): | |
| vocals_audio = gr.Audio(label="Vocals", interactive=False) | |
| drums_audio = gr.Audio(label="Drums", interactive=False) | |
| with gr.Row(): | |
| bass_audio = gr.Audio(label="Bass", interactive=False) | |
| other_audio = gr.Audio(label="Other", interactive=False) | |
| download_files = gr.File( | |
| label="Download Files (MIDI, MusicXML, JSON, ZIP)", | |
| file_count="multiple", | |
| interactive=False, | |
| ) | |
| process_btn.click( | |
| fn=process_audio, | |
| inputs=[audio_input, youtube_input], | |
| outputs=[ | |
| vocals_audio, drums_audio, bass_audio, other_audio, | |
| info_output, download_files, pipeline_state, | |
| ], | |
| ) | |
| # ------ Tab 2: Embellish ------ | |
| with gr.Tab("Embellish"): | |
| gr.Markdown( | |
| "Select a stem and embellishment style(s) to transform the melody. " | |
| "Process audio in Tab 1 first." | |
| ) | |
| with gr.Row(): | |
| stem_dropdown = gr.Dropdown( | |
| label="Stem", | |
| choices=[], | |
| interactive=True, | |
| ) | |
| refresh_btn = gr.Button("Refresh Stems", size="sm") | |
| style_checkboxes = gr.CheckboxGroup( | |
| label="Embellishment Styles", | |
| choices=list_styles(), | |
| ) | |
| apply_btn = gr.Button("Apply Embellishments", variant="primary") | |
| emb_info = gr.Markdown() | |
| emb_files = gr.File( | |
| label="Download Embellished Files", | |
| file_count="multiple", | |
| interactive=False, | |
| ) | |
| refresh_btn.click( | |
| fn=get_available_stems, | |
| inputs=[pipeline_state], | |
| outputs=[stem_dropdown], | |
| ) | |
| apply_btn.click( | |
| fn=apply_embellishments, | |
| inputs=[pipeline_state, stem_dropdown, style_checkboxes], | |
| outputs=[emb_info, emb_files], | |
| ) | |
| # ------ Tab 3: About ------ | |
| with gr.Tab("About"): | |
| gr.Markdown(ABOUT_TEXT) | |
| return app | |
| demo = build_ui() | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False) | |