Spaces:

junghoonson
/

melody-expander

Build error

App Files Files Community

melody-expander / app.py

junghoonson

Disable YouTube input on HF Spaces (outbound blocked)

70319eb 2 months ago

raw

history blame contribute delete

15 kB

	"""Melody Expander - Gradio web application.

	Separates audio into stems, transcribes melodies, and applies embellishments.
	Deployed on HuggingFace Spaces with ZeroGPU support.
	"""

	import os
	import sys
	import json
	import traceback

	# Patch gradio_client bug: crashes when JSON schema has additionalProperties=true (bool)
	import gradio_client.utils as _gc_utils

	_orig_get_type = _gc_utils.get_type
	def _patched_get_type(schema):
	if isinstance(schema, bool):
	return "Any"
	return _orig_get_type(schema)
	_gc_utils.get_type = _patched_get_type

	_orig_json_schema_to_python_type = _gc_utils._json_schema_to_python_type
	def _patched_json_schema_to_python_type(schema, defs=None):
	if isinstance(schema, bool):
	return "Any"
	return _orig_json_schema_to_python_type(schema, defs)
	_gc_utils._json_schema_to_python_type = _patched_json_schema_to_python_type

	import gradio as gr

	# Detect HuggingFace Spaces environment
	IS_SPACES = os.environ.get("SPACE_ID") is not None

	if IS_SPACES:
	import spaces

	from pipeline.orchestrator import run_separation, run_transcription_and_format
	from pipeline.transcriber import NoteEvent, estimate_tempo
	from pipeline.formatter import export_all_formats
	from embellishments.registry import (
	get_style,
	list_styles,
	get_display_name,
	STYLE_DISPLAY_NAMES,
	)
	from utils.audio_io import validate_audio_file, AudioValidationError
	from utils.file_manager import (
	create_session_dir,
	cleanup_session,
	collect_all_files,
	package_zip,
	)
	from utils.music_theory import detect_key, midi_to_note_name
	from utils.youtube import download_audio, is_youtube_url, YouTubeError


	# ---------------------------------------------------------------------------
	# GPU-decorated separation (only active on HF Spaces)
	# ---------------------------------------------------------------------------

	def _separate_gpu(audio_path: str, output_dir: str):
	"""Stem separation — wrapped with @spaces.GPU when on Spaces."""
	return run_separation(audio_path, output_dir)


	if IS_SPACES:
	_separate_gpu = spaces.GPU(duration=120)(_separate_gpu)


	# ---------------------------------------------------------------------------
	# State helpers — use JSON string to avoid Gradio schema bugs with dicts
	# ---------------------------------------------------------------------------

	def _encode_state(data):
	return json.dumps(data)

	def _decode_state(state_str):
	if not state_str:
	return {}
	try:
	return json.loads(state_str)
	except (json.JSONDecodeError, TypeError):
	return {}


	# ---------------------------------------------------------------------------
	# Tab 1: Upload & Separate
	# ---------------------------------------------------------------------------

	def process_audio(audio_file, youtube_url):
	"""Main processing pipeline: separate stems, transcribe, format."""

	audio_path = None
	session_dir = create_session_dir()

	try:
	if youtube_url and youtube_url.strip():
	print(f"[melody-expander] Downloading from YouTube: {youtube_url.strip()}")
	try:
	yt_dir = os.path.join(session_dir, "youtube")
	audio_path = download_audio(youtube_url.strip(), yt_dir)
	print(f"[melody-expander] Downloaded to: {audio_path}")
	except YouTubeError as e:
	print(f"[melody-expander] YouTube error: {e}")
	return _error_result(f"YouTube error: {e}")
	elif audio_file is not None:
	audio_path = audio_file
	print(f"[melody-expander] Using uploaded file: {audio_path}")
	else:
	return _error_result("Please upload an audio file or paste a YouTube URL.")

	# Validate
	try:
	metadata = validate_audio_file(audio_path)
	except AudioValidationError as e:
	print(f"[melody-expander] Validation error: {e}")
	return _error_result(str(e))

	info_text = (
	f"Input: {metadata['duration']:.1f}s, "
	f"{metadata['sample_rate']}Hz, "
	f"{metadata['channels']}ch, {metadata['format']}"
	)

	# Phase 1: Separation (GPU)
	print("[melody-expander] Starting stem separation...")
	stems_dir = os.path.join(session_dir, "stems")
	stem_paths = _separate_gpu(audio_path, stems_dir)

	# Phase 2: Transcription + Formatting (CPU)
	print("[melody-expander] Starting transcription...")
	output_dir = os.path.join(session_dir, "output")
	results = run_transcription_and_format(stem_paths, output_dir)

	# Build outputs for UI
	print("[melody-expander] Packaging results...")

	stem_audio_outputs = []
	stem_info_parts = []
	all_download_files = []

	for stem_name in ["vocals", "drums", "bass", "other"]:
	stem_data = results["stems"].get(stem_name)
	if stem_data:
	stem_audio_outputs.append(stem_data["audio_path"])
	note_count = stem_data["note_count"]
	tempo = stem_data["tempo_bpm"]

	if stem_data["notes"]:
	pcs = [n.pitch_midi % 12 for n in stem_data["notes"]]
	key_root, key_mode = detect_key(pcs)
	key_str = f"{midi_to_note_name(key_root + 60)[:-1]} {key_mode}"
	else:
	key_str = "N/A"

	stem_info_parts.append(
	f"{stem_name.title()}: {note_count} notes, "
	f"~{tempo} BPM, key: {key_str}"
	)

	for fmt, fpath in stem_data["files"].items():
	all_download_files.append(fpath)
	else:
	stem_audio_outputs.append(None)
	stem_info_parts.append(f"{stem_name.title()}: No output")

	# Create ZIP of everything
	zip_path = os.path.join(session_dir, "all_stems.zip")
	all_file_dict = collect_all_files(results)
	package_zip(all_file_dict, zip_path)
	all_download_files.append(zip_path)

	stem_info = info_text + "\n\n" + "\n\n".join(stem_info_parts)

	# Serialize state as JSON string
	state_data = {"session_dir": session_dir, "stems": {}}
	for stem_name, stem_data in results["stems"].items():
	state_data["stems"][stem_name] = {
	"notes": [n.to_dict() for n in stem_data["notes"]],
	"tempo_bpm": stem_data["tempo_bpm"],
	"audio_path": stem_data["audio_path"],
	"files": stem_data["files"],
	}

	return (
	stem_audio_outputs[0],
	stem_audio_outputs[1],
	stem_audio_outputs[2],
	stem_audio_outputs[3],
	stem_info,
	all_download_files,
	_encode_state(state_data),
	)

	except Exception as e:
	print(f"[melody-expander] EXCEPTION: {e}")
	traceback.print_exc()
	return _error_result(f"Processing failed: {e}")


	def _error_result(msg):
	return (None, None, None, None, f"Error: {msg}", [], "")


	# ---------------------------------------------------------------------------
	# Tab 2: Embellish
	# ---------------------------------------------------------------------------

	def apply_embellishments(state_str, stem_choice, style_choices):
	"""Apply selected embellishments to a stem's notes."""
	state_data = _decode_state(state_str)

	if not state_data or "stems" not in state_data:
	return "Error: No stems loaded. Process audio first (Tab 1).", []

	if not stem_choice:
	return "Error: Select a stem.", []

	if not style_choices:
	return "Error: Select at least one embellishment style.", []

	stem_data = state_data["stems"].get(stem_choice)
	if not stem_data:
	return f"Error: Stem '{stem_choice}' not found.", []

	notes = [NoteEvent.from_dict(d) for d in stem_data["notes"]]
	tempo_bpm = stem_data["tempo_bpm"]

	if not notes:
	return f"{stem_choice} has no pitched content to embellish.", []

	pcs = [n.pitch_midi % 12 for n in notes]
	key_root, key_mode = detect_key(pcs)

	print(f"[melody-expander] Applying embellishments: {style_choices}")

	result_notes = notes
	applied_names = []
	for style_name in style_choices:
	style = get_style(style_name)
	result_notes = style.apply(result_notes, tempo_bpm, key_root, key_mode)
	applied_names.append(get_display_name(style_name))

	print("[melody-expander] Exporting embellished files...")

	session_dir = state_data.get("session_dir", create_session_dir())
	emb_dir = os.path.join(session_dir, "embellished", stem_choice)
	suffix = "_".join(style_choices)
	file_paths = export_all_formats(
	result_notes, emb_dir, f"{stem_choice}_{suffix}", tempo_bpm
	)

	info = (
	f"Embellished {stem_choice.title()}\n\n"
	f"Styles applied: {', '.join(applied_names)}\n\n"
	f"Original notes: {len(notes)} -> Embellished notes: {len(result_notes)}\n\n"
	f"Key: {midi_to_note_name(key_root + 60)[:-1]} {key_mode}, Tempo: {tempo_bpm} BPM"
	)

	return info, list(file_paths.values())


	def get_available_stems(state_str):
	"""Return list of stems that have notes for the dropdown."""
	state_data = _decode_state(state_str)
	if not state_data or "stems" not in state_data:
	return gr.Dropdown(choices=[], value=None)
	stems = [s for s, d in state_data["stems"].items() if d.get("notes")]
	return gr.Dropdown(choices=stems, value=stems[0] if stems else None)


	# ---------------------------------------------------------------------------
	# UI
	# ---------------------------------------------------------------------------

	ABOUT_TEXT = """
	# Melody Expander

	Separate, transcribe, and embellish melodies from any audio.

	## How It Works

	1. Upload an MP3/WAV file (or paste a YouTube URL) up to 5 minutes long
	2. Separate into 4 stems: vocals, drums, bass, other (using Demucs v4)
	3. Transcribe each stem to notes (using Basic Pitch)
	4. Download as MIDI, MusicXML, or JSON
	5. Embellish with jazz swing, parallel harmonies, and more

	## Technical Details

	- Stem Separation: Demucs v4 (htdemucs) — hybrid transformer model
	- Transcription: Basic Pitch by Spotify — lightweight neural MIDI transcription
	- Output Formats: MIDI (for DAWs), MusicXML (for notation software), JSON (for code)
	- Embellishments: Rule-based transformations on note events

	## Limitations

	- Max 5 minutes, 50MB file size
	- Transcription quality depends on audio clarity
	- Drum transcription shows pitched components only
	- YouTube downloads require yt-dlp to be installed

	## Credits

	Built with [Demucs](https://github.com/facebookresearch/demucs),
	[Basic Pitch](https://github.com/spotify/basic-pitch),
	[music21](https://web.mit.edu/music21/),
	and [Gradio](https://gradio.app/).
	"""


	def build_ui():
	with gr.Blocks(
	title="Melody Expander",
	theme=gr.themes.Soft(),
	) as app:
	gr.Markdown("# Melody Expander\nSeparate stems, transcribe melodies, apply embellishments.")

	# Hidden textbox for state (avoids Gradio schema introspection bugs with gr.State)
	pipeline_state = gr.Textbox(visible=False, elem_id="pipeline_state")

	with gr.Tabs():
	# ------ Tab 1: Upload & Separate ------
	with gr.Tab("Upload & Separate"):
	with gr.Row():
	with gr.Column(scale=1):
	audio_input = gr.Audio(
	label="Upload Audio (MP3/WAV, max 5 min)",
	type="filepath",
	)
	youtube_input = gr.Textbox(
	label="Or paste a YouTube URL (local only, not available on Spaces)",
	placeholder="https://www.youtube.com/watch?v=...",
	interactive=not IS_SPACES,
	)
	process_btn = gr.Button("Process", variant="primary", size="lg")

	with gr.Column(scale=2):
	info_output = gr.Markdown(label="Info")

	gr.Markdown("### Separated Stems")
	with gr.Row():
	vocals_audio = gr.Audio(label="Vocals", interactive=False)
	drums_audio = gr.Audio(label="Drums", interactive=False)
	with gr.Row():
	bass_audio = gr.Audio(label="Bass", interactive=False)
	other_audio = gr.Audio(label="Other", interactive=False)

	download_files = gr.File(
	label="Download Files (MIDI, MusicXML, JSON, ZIP)",
	file_count="multiple",
	interactive=False,
	)

	process_btn.click(
	fn=process_audio,
	inputs=[audio_input, youtube_input],
	outputs=[
	vocals_audio, drums_audio, bass_audio, other_audio,
	info_output, download_files, pipeline_state,
	],
	)

	# ------ Tab 2: Embellish ------
	with gr.Tab("Embellish"):
	gr.Markdown(
	"Select a stem and embellishment style(s) to transform the melody. "
	"Process audio in Tab 1 first."
	)

	with gr.Row():
	stem_dropdown = gr.Dropdown(
	label="Stem",
	choices=[],
	interactive=True,
	)
	refresh_btn = gr.Button("Refresh Stems", size="sm")

	style_checkboxes = gr.CheckboxGroup(
	label="Embellishment Styles",
	choices=list_styles(),
	)

	apply_btn = gr.Button("Apply Embellishments", variant="primary")

	emb_info = gr.Markdown()
	emb_files = gr.File(
	label="Download Embellished Files",
	file_count="multiple",
	interactive=False,
	)

	refresh_btn.click(
	fn=get_available_stems,
	inputs=[pipeline_state],
	outputs=[stem_dropdown],
	)

	apply_btn.click(
	fn=apply_embellishments,
	inputs=[pipeline_state, stem_dropdown, style_checkboxes],
	outputs=[emb_info, emb_files],
	)

	# ------ Tab 3: About ------
	with gr.Tab("About"):
	gr.Markdown(ABOUT_TEXT)

	return app


	demo = build_ui()

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)