Spaces:

calebhan
/

rescored

Sleeping

App Files Files Community

calebhan commited on Jan 4

Commit

0dfd298

1 Parent(s): 2b51b9c

updated source separation

Browse files

Files changed (20) hide show

backend/app_config.py +15 -8
backend/audio_separator_wrapper.py +283 -0
backend/evaluation/benchmark.py +32 -15
backend/evaluation/results/yourmt3_midi/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.mid +0 -0
backend/evaluation/results/yourmt3_results.csv +0 -2
backend/evaluation/results/yourmt3_results.json +0 -18
backend/pipeline.py +16 -669
backend/requirements.txt +2 -4
frontend/src/components/InstrumentSelector.css +77 -0
frontend/src/components/InstrumentSelector.tsx +66 -0
frontend/src/components/InstrumentTabs.css +91 -0
frontend/src/components/InstrumentTabs.tsx +68 -0
frontend/src/components/JobSubmission.css +9 -0
frontend/src/components/JobSubmission.tsx +18 -2
frontend/src/components/PlaybackControls.css +135 -15
frontend/src/components/PlaybackControls.tsx +1 -1
frontend/src/components/ScoreEditor.tsx +40 -13
frontend/src/store/notation.ts +70 -45
frontend/src/utils/duration.ts +35 -0
frontend/src/utils/musicxml-parser.ts +0 -275

backend/app_config.py CHANGED Viewed

@@ -31,12 +31,13 @@ class Settings(BaseSettings):
     gpu_enabled: bool = True
     max_video_duration: int = 900  # 15 minutes
-    # Transcription Configuration (basic-pitch)
-    onset_threshold: float = 0.3  # Note onset confidence (0-1). Lower = more notes detected
-    frame_threshold: float = 0.3  # Frame activation threshold (0-1). Basic-pitch default
-    minimum_note_length: int = 58  # Minimum note samples (~58ms at 44.1kHz). Basic-pitch default
-    minimum_frequency_hz: float = 65.0  # C2 (65 Hz) - filter low-frequency noise like F1
-    maximum_frequency_hz: float | None = None  # No upper limit for piano range
     # Tempo Detection Configuration
     tempo_detection_duration: int = 60  # Seconds of audio to analyze
@@ -66,7 +67,7 @@ class Settings(BaseSettings):
     # Feature Flags
     enable_envelope_analysis: bool = True
-    enable_tie_notation: bool = True
     # Phase 2: Zero-Tradeoff Solutions
     # Python compatibility: madmom runtime patch enables Python 3.10+ support
@@ -74,11 +75,17 @@ class Settings(BaseSettings):
     use_beat_synchronous_quantization: bool = True  # Beat-aligned quantization (eliminates double quantization)
     # Transcription Service Configuration
-    use_yourmt3_transcription: bool = True  # YourMT3+ for 80-85% accuracy (default, falls back to basic-pitch)
     transcription_service_url: str = "http://localhost:8000"  # Main API URL (YourMT3+ integrated)
     transcription_service_timeout: int = 300  # Timeout for transcription requests (seconds)
     yourmt3_device: str = _detect_device()  # Auto-detect device: 'cuda' (NVIDIA), 'mps' (Apple Silicon), or 'cpu'
     # Grand Staff Configuration
     enable_grand_staff: bool = True  # Split piano into treble + bass clefs
     middle_c_split: int = 60  # MIDI note number for staff split (60 = Middle C)

     gpu_enabled: bool = True
     max_video_duration: int = 900  # 15 minutes
+    # Transcription Configuration (deprecated - kept for API compatibility)
+    # These were used by basic-pitch, which has been removed in favor of YourMT3+
+    onset_threshold: float = 0.3  # Deprecated
+    frame_threshold: float = 0.3  # Deprecated
+    minimum_note_length: int = 58  # Deprecated
+    minimum_frequency_hz: float = 65.0  # Deprecated
+    maximum_frequency_hz: float | None = None  # Deprecated
     # Tempo Detection Configuration
     tempo_detection_duration: int = 60  # Seconds of audio to analyze
     # Feature Flags
     enable_envelope_analysis: bool = True
+    enable_tie_notation: bool = True  # Deprecated (was only used by old generate_musicxml)
     # Phase 2: Zero-Tradeoff Solutions
     # Python compatibility: madmom runtime patch enables Python 3.10+ support
     use_beat_synchronous_quantization: bool = True  # Beat-aligned quantization (eliminates double quantization)
     # Transcription Service Configuration
+    use_yourmt3_transcription: bool = True  # Deprecated (always True now - YourMT3+ is only transcriber)
     transcription_service_url: str = "http://localhost:8000"  # Main API URL (YourMT3+ integrated)
     transcription_service_timeout: int = 300  # Timeout for transcription requests (seconds)
     yourmt3_device: str = _detect_device()  # Auto-detect device: 'cuda' (NVIDIA), 'mps' (Apple Silicon), or 'cpu'
+    # Source Separation Configuration
+    use_two_stage_separation: bool = True  # Use BS-RoFormer + Demucs for better quality (vs Demucs only)
+    transcribe_vocals: bool = True  # Transcribe vocal melody as violin
+    vocal_instrument: int = 40  # MIDI program number for vocals (40=Violin, 73=Flute, 65=Alto Sax)
+    use_6stem_demucs: bool = True  # Use 6-stem Demucs (piano, guitar, drums, bass, other) vs 4-stem
     # Grand Staff Configuration
     enable_grand_staff: bool = True  # Split piano into treble + bass clefs
     middle_c_split: int = 60  # MIDI note number for staff split (60 = Middle C)

backend/audio_separator_wrapper.py ADDED Viewed

	@@ -0,0 +1,283 @@

+"""
+Audio Separator Wrapper
+Provides a clean interface to audio-separator library for 2-stage source separation:
+1. BS-RoFormer: Remove vocals (SOTA vocal/instrumental separation)
+2. Demucs: Separate instrumental into piano/guitar/bass/drums/other
+Based on: https://github.com/nomadkaraoke/python-audio-separator
+"""
+from pathlib import Path
+from typing import Dict, Optional
+import subprocess
+import shutil
+import sys
+class AudioSeparator:
+    """
+    Wrapper for audio-separator with support for multiple separation strategies.
+    Separation strategies:
+    1. Two-stage (vocal removal + instrument separation)
+    2. Direct piano isolation (Demucs 6-stem)
+    3. Legacy Demucs 4-stem (backwards compatibility)
+    """
+    def __init__(self, model_dir: Optional[Path] = None):
+        """
+        Initialize audio separator.
+        Args:
+            model_dir: Directory to store downloaded models (default: ~/.audio-separator/)
+        """
+        self.model_dir = model_dir or Path.home() / ".audio-separator"
+        self.model_dir.mkdir(parents=True, exist_ok=True)
+    def separate_vocals(
+        self,
+        audio_path: Path,
+        output_dir: Path,
+        model: str = "model_bs_roformer_ep_317_sdr_12.9755.ckpt"
+    ) -> Dict[str, Path]:
+        """
+        Separate vocals from instrumental using BS-RoFormer (SOTA).
+        Args:
+            audio_path: Input audio file
+            output_dir: Directory for output stems
+            model: BS-RoFormer model to use (default: best quality)
+        Returns:
+            Dict with keys: 'vocals', 'instrumental'
+        """
+        output_dir.mkdir(parents=True, exist_ok=True)
+        # Use audio-separator CLI - find it relative to Python executable
+        python_bin = Path(sys.executable)
+        venv_bin = python_bin.parent
+        audio_separator_bin = venv_bin / "audio-separator"
+        # Fall back to PATH if not in venv
+        if not audio_separator_bin.exists():
+            audio_separator_bin = shutil.which("audio-separator") or "audio-separator"
+        else:
+            audio_separator_bin = str(audio_separator_bin)
+        # Convert to absolute path for audio-separator
+        audio_path_abs = audio_path.resolve()
+        cmd = [
+            audio_separator_bin,
+            str(audio_path_abs),
+            "-m", model,
+            "--output_dir", str(output_dir.resolve()),
+            "--output_format", "WAV"
+        ]
+        if self.model_dir:
+            cmd.extend(["--model_file_dir", str(self.model_dir)])
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        # Debug: print stdout/stderr to see what happened
+        print(f"   [DEBUG] audio-separator return code: {result.returncode}")
+        if result.stdout:
+            print(f"   [DEBUG] stdout: {result.stdout[-1000:]}")
+        if result.stderr:
+            print(f"   [DEBUG] stderr: {result.stderr[-1000:]}")
+        if result.returncode != 0:
+            error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
+            raise RuntimeError(f"BS-RoFormer vocal separation failed: {error_msg}")
+        # audio-separator creates files with model name appended
+        # Pattern: filename_(Vocals)_modelname.wav or filename_(Vocals).wav
+        # Check what files were actually created
+        if output_dir.exists():
+            actual_files = list(output_dir.glob("*.wav"))
+            print(f"   [DEBUG] Files created in {output_dir}: {[f.name for f in actual_files]}")
+            # Find vocals and instrumental files by pattern matching
+            vocals_files = [f for f in actual_files if "Vocal" in f.name]
+            instrumental_files = [f for f in actual_files if "Instrumental" in f.name]
+            if vocals_files and instrumental_files:
+                vocals_path = vocals_files[0]
+                instrumental_path = instrumental_files[0]
+                print(f"   ✓ Found vocals: {vocals_path.name}")
+                print(f"   ✓ Found instrumental: {instrumental_path.name}")
+            else:
+                raise RuntimeError(f"Could not find output files. Found: {[f.name for f in actual_files]}")
+        else:
+            raise RuntimeError(f"Output directory {output_dir} does not exist")
+        return {
+            'vocals': vocals_path,
+            'instrumental': instrumental_path
+        }
+    def separate_instruments_demucs(
+        self,
+        audio_path: Path,
+        output_dir: Path,
+        stems: int = 6
+    ) -> Dict[str, Path]:
+        """
+        Separate instrumental audio into individual instruments using Demucs.
+        Args:
+            audio_path: Input audio file (should be instrumental, vocals already removed)
+            output_dir: Directory for output stems
+            stems: Number of stems (4 or 6)
+                4-stem: vocals, drums, bass, other
+                6-stem: vocals, drums, bass, guitar, piano, other
+        Returns:
+            Dict with stem names as keys and paths as values
+        """
+        output_dir.mkdir(parents=True, exist_ok=True)
+        # Use Demucs directly for instrument separation
+        model = "htdemucs_6s" if stems == 6 else "htdemucs"
+        # Find demucs binary relative to Python executable
+        python_bin = Path(sys.executable)
+        venv_bin = python_bin.parent
+        demucs_bin = venv_bin / "demucs"
+        # Fall back to PATH if not in venv
+        if not demucs_bin.exists():
+            demucs_bin = shutil.which("demucs") or "demucs"
+        else:
+            demucs_bin = str(demucs_bin)
+        # Convert to absolute path for demucs
+        audio_path_abs = audio_path.resolve()
+        cmd = [
+            demucs_bin,
+            "-n", model,
+            "-o", str(output_dir.resolve()),
+            str(audio_path_abs)
+        ]
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        if result.returncode != 0:
+            error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
+            raise RuntimeError(f"Demucs instrument separation failed: {error_msg}")
+        # Demucs creates: output_dir/model_name/audio_stem/*.wav
+        demucs_output = output_dir / model / audio_path.stem
+        if stems == 6:
+            stem_files = {
+                'vocals': demucs_output / "vocals.wav",
+                'drums': demucs_output / "drums.wav",
+                'bass': demucs_output / "bass.wav",
+                'guitar': demucs_output / "guitar.wav",
+                'piano': demucs_output / "piano.wav",
+                'other': demucs_output / "other.wav",
+            }
+        else:
+            stem_files = {
+                'vocals': demucs_output / "vocals.wav",
+                'drums': demucs_output / "drums.wav",
+                'bass': demucs_output / "bass.wav",
+                'other': demucs_output / "other.wav",
+            }
+        # Verify all expected stems exist
+        missing = [name for name, path in stem_files.items() if not path.exists()]
+        if missing:
+            raise RuntimeError(f"Missing expected stems: {missing}")
+        return stem_files
+    def two_stage_separation(
+        self,
+        audio_path: Path,
+        output_dir: Path,
+        instrument_stems: int = 6
+    ) -> Dict[str, Path]:
+        """
+        Two-stage separation for optimal quality:
+        1. Remove vocals with BS-RoFormer (SOTA vocal separation)
+        2. Separate clean instrumental with Demucs 6-stem (piano, guitar, drums, bass, other)
+        Args:
+            audio_path: Input audio file
+            output_dir: Directory for output stems
+            instrument_stems: Number of instrument stems (4 or 6)
+        Returns:
+            Dict with all stems: vocals, piano, guitar, drums, bass, other
+        """
+        output_dir.mkdir(parents=True, exist_ok=True)
+        # Stage 1: Remove vocals with BS-RoFormer
+        print("   Stage 1: Separating vocals with BS-RoFormer...")
+        vocal_dir = output_dir / "stage1_vocals"
+        vocal_stems = self.separate_vocals(audio_path, vocal_dir)
+        # Stage 2: Separate instrumental with Demucs
+        print(f"   Stage 2: Separating instruments with Demucs {instrument_stems}-stem...")
+        instrument_dir = output_dir / "stage2_instruments"
+        instrument_stems_dict = self.separate_instruments_demucs(
+            vocal_stems['instrumental'],
+            instrument_dir,
+            stems=instrument_stems
+        )
+        # Combine results (vocals from stage 1, instruments from stage 2)
+        all_stems = {
+            'vocals': vocal_stems['vocals'],  # From BS-RoFormer (clean)
+        }
+        # Add all instrument stems except the duplicate vocals stem from Demucs
+        for name, path in instrument_stems_dict.items():
+            if name != 'vocals':  # Skip Demucs vocals (we have better ones from BS-RoFormer)
+                all_stems[name] = path
+        print(f"   ✓ 2-stage separation complete: {list(all_stems.keys())}")
+        return all_stems
+if __name__ == "__main__":
+    # Test the separator
+    import argparse
+    parser = argparse.ArgumentParser(description="Test Audio Separator")
+    parser.add_argument("audio_file", type=str, help="Path to audio file")
+    parser.add_argument("--output", type=str, default="./output_stems",
+                       help="Output directory for stems")
+    parser.add_argument("--mode", type=str, default="two-stage",
+                       choices=["vocals", "instruments", "two-stage"],
+                       help="Separation mode")
+    args = parser.parse_args()
+    separator = AudioSeparator()
+    audio_path = Path(args.audio_file)
+    output_dir = Path(args.output)
+    if args.mode == "vocals":
+        stems = separator.separate_vocals(audio_path, output_dir)
+        print(f"Vocal separation complete:")
+        for name, path in stems.items():
+            print(f"  {name}: {path}")
+    elif args.mode == "instruments":
+        stems = separator.separate_instruments_demucs(audio_path, output_dir, stems=6)
+        print(f"Instrument separation complete:")
+        for name, path in stems.items():
+            print(f"  {name}: {path}")
+    elif args.mode == "two-stage":
+        stems = separator.two_stage_separation(audio_path, output_dir, instrument_stems=6)
+        print(f"2-stage separation complete:")
+        for name, path in stems.items():
+            print(f"  {name}: {path}")

backend/evaluation/benchmark.py CHANGED Viewed

@@ -10,6 +10,12 @@ from dataclasses import dataclass, asdict
 from pathlib import Path
 from typing import List, Dict, Optional
 import pandas as pd
 from evaluation.metrics import calculate_metrics, TranscriptionMetrics
@@ -19,7 +25,7 @@ class TestCase:
     """Represents a single test case for benchmarking."""
     name: str  # Descriptive name (e.g., "Chopin_Nocturne_Op9_No2")
     audio_path: Path  # Path to audio file (WAV/MP3)
-    ground_truth_midi: Path  # Path to ground truth MIDI file
     genre: str = "classical"  # Genre: classical, pop, jazz, simple
     difficulty: str = "medium"  # Difficulty: easy, medium, hard
     duration: Optional[float] = None  # Duration in seconds
@@ -29,7 +35,7 @@ class TestCase:
         return {
             'name': self.name,
             'audio_path': str(self.audio_path),
-            'ground_truth_midi': str(self.ground_truth_midi),
             'genre': self.genre,
             'difficulty': self.difficulty,
             'duration': self.duration
@@ -38,10 +44,11 @@ class TestCase:
     @classmethod
     def from_dict(cls, data: dict) -> 'TestCase':
         """Create TestCase from dictionary."""
         return cls(
             name=data['name'],
             audio_path=Path(data['audio_path']),
-            ground_truth_midi=Path(data['ground_truth_midi']),
             genre=data.get('genre', 'classical'),
             difficulty=data.get('difficulty', 'medium'),
             duration=data.get('duration')
@@ -138,18 +145,28 @@ class TranscriptionBenchmark:
             print(f"✅ Transcription completed in {processing_time:.1f}s")
-            # Calculate metrics
-            metrics = calculate_metrics(
-                predicted_midi,
-                test_case.ground_truth_midi,
-                onset_tolerance=self.onset_tolerance
-            )
-            print(f"\n📊 Results:")
-            print(f"   F1 Score: {metrics.f1_score:.3f}")
-            print(f"   Precision: {metrics.precision:.3f}")
-            print(f"   Recall: {metrics.recall:.3f}")
-            print(f"   Onset MAE: {metrics.onset_mae*1000:.1f}ms")
             return BenchmarkResult(
                 test_case_name=test_case.name,

 from pathlib import Path
 from typing import List, Dict, Optional
 import pandas as pd
+import sys
+# Add backend directory to path for imports
+backend_dir = Path(__file__).parent.parent
+if str(backend_dir) not in sys.path:
+    sys.path.insert(0, str(backend_dir))
 from evaluation.metrics import calculate_metrics, TranscriptionMetrics
     """Represents a single test case for benchmarking."""
     name: str  # Descriptive name (e.g., "Chopin_Nocturne_Op9_No2")
     audio_path: Path  # Path to audio file (WAV/MP3)
+    ground_truth_midi: Optional[Path] = None  # Path to ground truth MIDI file (None for manual review)
     genre: str = "classical"  # Genre: classical, pop, jazz, simple
     difficulty: str = "medium"  # Difficulty: easy, medium, hard
     duration: Optional[float] = None  # Duration in seconds
         return {
             'name': self.name,
             'audio_path': str(self.audio_path),
+            'ground_truth_midi': str(self.ground_truth_midi) if self.ground_truth_midi else None,
             'genre': self.genre,
             'difficulty': self.difficulty,
             'duration': self.duration
     @classmethod
     def from_dict(cls, data: dict) -> 'TestCase':
         """Create TestCase from dictionary."""
+        ground_truth = data.get('ground_truth_midi')
         return cls(
             name=data['name'],
             audio_path=Path(data['audio_path']),
+            ground_truth_midi=Path(ground_truth) if ground_truth else None,
             genre=data.get('genre', 'classical'),
             difficulty=data.get('difficulty', 'medium'),
             duration=data.get('duration')
             print(f"✅ Transcription completed in {processing_time:.1f}s")
+            # Calculate metrics only if ground truth is available
+            if test_case.ground_truth_midi:
+                metrics = calculate_metrics(
+                    predicted_midi,
+                    test_case.ground_truth_midi,
+                    onset_tolerance=self.onset_tolerance
+                )
+                print(f"\n📊 Results:")
+                print(f"   F1 Score: {metrics.f1_score:.3f}")
+                print(f"   Precision: {metrics.precision:.3f}")
+                print(f"   Recall: {metrics.recall:.3f}")
+                print(f"   Onset MAE: {metrics.onset_mae*1000:.1f}ms")
+            else:
+                # No ground truth - create placeholder metrics for manual review
+                print(f"\n📝 No ground truth available - MIDI saved for manual review")
+                print(f"   Output: {predicted_midi}")
+                metrics = TranscriptionMetrics(
+                    precision=0.0, recall=0.0, f1_score=0.0,
+                    onset_mae=0.0, pitch_accuracy=0.0,
+                    true_positives=0, false_positives=0, false_negatives=0
+                )
             return BenchmarkResult(
                 test_case_name=test_case.name,

backend/evaluation/results/yourmt3_midi/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.mid DELETED Viewed

Binary file (56.9 kB)

backend/evaluation/results/yourmt3_results.csv DELETED Viewed

	@@ -1,2 +0,0 @@
1	- test_case,genre,difficulty,f1_score,precision,recall,onset_mae,pitch_accuracy,true_positives,false_positives,false_negatives,processing_time,success,error
2	- MAESTRO_2004_AUDIO 02 R1 2004 05 Track05 wav,classical,easy,0.9831072434092655,0.9932764416860616,0.9731441601216113,0.005870731363360242,1.0,7682,52,212,114.05283236503601,True,

backend/evaluation/results/yourmt3_results.json DELETED Viewed

@@ -1,18 +0,0 @@
-[
-  {
-    "test_case": "MAESTRO_2004_AUDIO 02 R1 2004 05 Track05 wav",
-    "genre": "classical",
-    "difficulty": "easy",
-    "f1_score": 0.9831072434092655,
-    "precision": 0.9932764416860616,
-    "recall": 0.9731441601216113,
-    "onset_mae": 0.005870731363360242,
-    "pitch_accuracy": 1.0,
-    "true_positives": 7682,
-    "false_positives": 52,
-    "false_negatives": 212,
-    "processing_time": 114.05283236503601,
-    "success": true,
-    "error": null
-  }
-]

backend/pipeline.py CHANGED Viewed

@@ -11,8 +11,7 @@ from typing import Optional
 import mido
 import librosa
 import numpy as np
-from basic_pitch.inference import predict_and_save
-from basic_pitch import ICASSP_2022_MODEL_PATH
 from music21 import converter, key, meter, tempo, note, clef, stream, chord as m21_chord
 # Phase 2: Zero-tradeoff solutions with Python 3.10+ compatibility patch
@@ -91,13 +90,9 @@ class TranscriptionPipeline:
             self.final_midi_path = midi_path
             self.progress(90, "musicxml", "Generating MusicXML")
-            # Use minimal generator for YourMT3+, full generator for basic-pitch
-            if self.config.use_yourmt3_transcription:
-                print(f"   Using minimal MusicXML generation (YourMT3+)")
-                musicxml_path = self.generate_musicxml_minimal(midi_path, stems['other'])
-            else:
-                print(f"   Using full MusicXML generation (basic-pitch)")
-                musicxml_path = self.generate_musicxml(midi_path)
             self.progress(100, "complete", "Transcription complete")
             return musicxml_path
@@ -179,75 +174,23 @@ class TranscriptionPipeline:
         minimum_note_length: int = None
     ) -> Path:
         """
-        Transcribe audio to MIDI using basic-pitch.
         Args:
             audio_path: Path to audio file (should be 'other' stem for piano)
-            onset_threshold: Note onset confidence (0-1). Higher = fewer false positives
-            frame_threshold: Frame activation threshold (0-1)
-            minimum_note_length: Minimum note duration in samples (~58ms at 44.1kHz)
         Returns:
             Path to generated MIDI file
         """
-        # Use config defaults if not specified
-        if onset_threshold is None:
-            onset_threshold = self.config.onset_threshold
-        if frame_threshold is None:
-            frame_threshold = self.config.frame_threshold
-        if minimum_note_length is None:
-            minimum_note_length = self.config.minimum_note_length
         output_dir = self.temp_dir
-        # === STEP 1: Try YourMT3+ first (primary transcriber) ===
-        use_yourmt3 = self.config.use_yourmt3_transcription
-        midi_path = None
-        if use_yourmt3:
-            try:
-                print(f"   Transcribing with YourMT3+ (primary transcriber)...")
-                midi_path = self.transcribe_with_yourmt3(audio_path)
-                print(f"   ✓ YourMT3+ transcription complete")
-            except Exception as e:
-                import traceback
-                print(f"   ⚠ YourMT3+ failed: {e}")
-                print(f"   Full error: {traceback.format_exc()}")
-                print(f"   → Falling back to basic-pitch")
-                midi_path = None
-        # === STEP 2: Fallback to basic-pitch if YourMT3+ failed or disabled ===
-        if midi_path is None:
-            print(f"   Transcribing with basic-pitch (onset={onset_threshold}, frame={frame_threshold})...")
-            # Run basic-pitch inference
-            # predict_and_save creates output files in the output directory
-            predict_and_save(
-                audio_path_list=[str(audio_path)],
-                output_directory=str(output_dir),
-                save_midi=True,
-                sonify_midi=False,  # Don't create audio
-                save_model_outputs=False,  # Don't save raw outputs
-                save_notes=False,  # Don't save CSV
-                model_or_model_path=ICASSP_2022_MODEL_PATH,
-                onset_threshold=onset_threshold,
-                frame_threshold=frame_threshold,
-                minimum_note_length=minimum_note_length,
-                minimum_frequency=self.config.minimum_frequency_hz,  # Filter low-frequency noise (F1)
-                maximum_frequency=self.config.maximum_frequency_hz,  # No upper limit
-                multiple_pitch_bends=False,
-                melodia_trick=True,  # Improves monophonic melody
-                debug_file=None
-            )
-            # basic-pitch saves as {audio_stem}_basic_pitch.mid
-            generated_bp_midi = output_dir / f"{audio_path.stem}_basic_pitch.mid"
-            if not generated_bp_midi.exists():
-                raise RuntimeError("basic-pitch did not create MIDI file")
-            midi_path = generated_bp_midi
-            print(f"   ✓ basic-pitch transcription complete")
         # Rename final MIDI to standard name for post-processing
         final_midi_path = output_dir / "piano.mid"
@@ -1094,163 +1037,6 @@ class TranscriptionPipeline:
         return midi_path
-    def generate_musicxml(self, midi_path: Path) -> Path:
-        """
-        Convert MIDI to MusicXML with intelligent metadata detection and normalization.
-        New pipeline order (optimized):
-        1. Detect metadata from audio (tempo, time signature)
-        2. Parse MIDI
-        3. Detect key (ensemble)
-        4. Insert metadata
-        5. Deduplicate overlapping notes
-        6. Add clef
-        7. makeMeasures()
-        8. Normalize measure durations
-        9. Validate measures
-        10. Export MusicXML
-        Args:
-            midi_path: Path to input MIDI file
-        Returns:
-            Path to output MusicXML file
-        """
-        self.progress(92, "musicxml", "Detecting metadata from audio")
-        # Step 1: Detect metadata from audio BEFORE parsing MIDI
-        audio_path = self.temp_dir / "audio.wav"
-        if audio_path.exists():
-            # Detect tempo
-            detected_tempo, tempo_confidence = self.detect_tempo_from_audio(audio_path)
-            # Detect time signature (needs tempo)
-            time_sig_num, time_sig_denom, ts_confidence = self.detect_time_signature(
-                audio_path, detected_tempo
-            )
-        else:
-            # Fallback if audio not available
-            print("   WARNING: Audio file not found, using defaults")
-            detected_tempo, tempo_confidence = 120.0, 0.0
-            time_sig_num, time_sig_denom, ts_confidence = 4, 4, 0.0
-        self.progress(93, "musicxml", "Parsing MIDI")
-        # Step 2: Parse MIDI
-        score = converter.parse(midi_path)
-        self.progress(94, "musicxml", "Detecting key signature")
-        # Step 3: Detect key using ensemble methods
-        detected_key, key_confidence = self.detect_key_ensemble(score, audio_path)
-        self.progress(95, "musicxml", "Deduplicating overlapping notes")
-        # Step 4: Deduplicate overlapping notes (prevent polyphony issues)
-        score = self._deduplicate_overlapping_notes(score)
-        # Step 4.5: Merge sequential notes at music21 level (fixes Issue #8 - tiny rests)
-        # This fixes tiny rests from MIDI→music21 precision loss
-        # Increased from 0.02 to 0.08 to catch gaps created by quantization (125ms at 120 BPM)
-        self.progress(95, "musicxml", "Merging sequential notes")
-        score = self._merge_music21_notes(score, gap_threshold_qn=0.08)
-        # Step 5: Clean up any very short durations BEFORE makeMeasures
-        # This prevents music21 from creating impossible tuplets
-        for part in score.parts:
-            for element in part.flatten().notesAndRests:
-                if element.quarterLength < 0.0625:  # Shorter than 64th note
-                    element.quarterLength = 0.0625  # Round up to 64th note
-        self.progress(96, "musicxml", "Creating measures")
-        # Step 6: Create measures FIRST (required before grand staff split)
-        score = score.makeMeasures()
-        # Step 7: Split into grand staff (treble + bass clefs) if enabled
-        if self.config.enable_grand_staff:
-            print(f"   Splitting into grand staff (split at MIDI note {self.config.middle_c_split})...")
-            score = self._split_into_grand_staff(score)
-            print(f"   Created {len(score.parts)} staves (treble + bass)")
-            # Insert metadata into each part (grand staff creates new parts without metadata)
-            for part in score.parts:
-                # Get the first measure
-                measures = part.getElementsByClass('Measure')
-                if measures:
-                    first_measure = measures[0]
-                    # Insert key, time signature, and tempo into first measure
-                    first_measure.insert(0, tempo.MetronomeMark(number=detected_tempo))
-                    first_measure.insert(0, detected_key)
-                    first_measure.insert(0, meter.TimeSignature(f'{time_sig_num}/{time_sig_denom}'))
-        else:
-            # Single staff: add treble clef and metadata
-            for part in score.parts:
-                part.insert(0, clef.TrebleClef())
-                part.insert(0, detected_key)
-                part.insert(0, meter.TimeSignature(f'{time_sig_num}/{time_sig_denom}'))
-                part.insert(0, tempo.MetronomeMark(number=detected_tempo))
-                part.partName = "Piano"
-        # Step 7.5: Add tie notation for sustained notes across measure boundaries
-        if self.config.enable_tie_notation:
-            print("   Adding ties for sustained notes...")
-            score = self._add_ties_to_score(score)
-        self.progress(97, "musicxml", "Normalizing measure durations")
-        # Step 8: Remove impossible durations that makeMeasures created
-        score = self._remove_impossible_durations(score)
-        # Step 9: Fix tuplets with impossible durations
-        score = self._fix_tuplet_durations(score)
-        # Step 10: Normalize measure durations
-        score = self._normalize_measure_durations(score, time_sig_num, time_sig_denom)
-        # Step 10.5: Fix any NEW impossible tuplets created during normalization
-        # Normalization might add rests that music21 assigns tuplets to
-        score = self._fix_tuplet_durations(score)
-        # Step 11: Validate measures (logging only)
-        self._validate_measures(score)
-        self.progress(98, "musicxml", "Writing MusicXML file")
-        # Write MusicXML with proper error handling
-        output_path = self.temp_dir / f"{self.job_id}.musicxml"
-        try:
-            # Use makeNotation=False to prevent music21 from auto-generating tuplets
-            score.write('musicxml', fp=str(output_path), makeNotation=False)
-        except Exception as e:
-            error_msg = str(e)
-            # If still getting 2048th note errors after our normalization,
-            # it means music21 is creating them during export (not our fault)
-            if 'Cannot convert "2048th" duration to MusicXML' in error_msg or \
-               'Cannot convert "4096th" duration to MusicXML' in error_msg:
-                print(f"   ERROR: music21 generated impossible duration during export: {error_msg}")
-                print(f"   This is a music21 bug. Try re-running with different tempo/time signature.")
-                # Last resort: try exporting as MIDI instead
-                midi_fallback = self.temp_dir / f"{self.job_id}_fallback.mid"
-                score.write('midi', fp=str(midi_fallback))
-                print(f"   Created fallback MIDI export: {midi_fallback}")
-                raise RuntimeError(
-                    f"MusicXML export failed due to music21 bug. "
-                    f"MIDI fallback created at {midi_fallback}. "
-                    f"Original error: {error_msg}"
-                )
-            else:
-                # Different error, re-raise
-                raise
-        return output_path
     def generate_musicxml_minimal(self, midi_path: Path, source_audio: Path) -> Path:
         """
         Generate MusicXML from clean MIDI (YourMT3+ output) with minimal post-processing.
@@ -1387,298 +1173,9 @@ class TranscriptionPipeline:
         print(f"   ✓ MusicXML generation complete")
         return output_path
-    def _deduplicate_overlapping_notes(self, score) -> stream.Score:
-        """
-        Deduplicate overlapping notes from basic-pitch to prevent MusicXML corruption.
-        Problem: basic-pitch outputs multiple notes at the same timestamp for polyphonic detection.
-        When music21's makeMeasures() processes these, it creates measures with >4.0 beats.
-        Solution: Group simultaneous notes (within 10ms) into chords, merge duplicate pitches.
-        Args:
-            score: music21 Score object before makeMeasures()
-        Returns:
-            Cleaned score with deduplicated notes
-        """
-        from music21 import stream, note, chord as m21_chord
-        from collections import defaultdict
-        # Process each part
-        for part in score.parts:
-            # Collect all notes with their absolute offsets
-            notes_by_time = defaultdict(list)  # bucket -> [notes]
-            for element in part.flatten().notesAndRests:
-                if isinstance(element, note.Rest):
-                    continue  # Skip rests for deduplication
-                # Get absolute offset in quarter notes
-                offset_qn = element.offset
-                # Bucket notes that are within 0.005 quarter notes of each other (~5ms at 120 BPM)
-                # Finer resolution prevents chord notes from splitting into separate buckets
-                bucket = round(offset_qn / 0.005) * 0.005
-                if isinstance(element, note.Note):
-                    notes_by_time[bucket].append(element)
-                elif isinstance(element, m21_chord.Chord):
-                    # Explode chords into individual notes for deduplication
-                    for pitch in element.pitches:
-                        n = note.Note(pitch)
-                        n.quarterLength = element.quarterLength
-                        n.offset = element.offset
-                        notes_by_time[bucket].append(n)
-            # Rebuild part with deduplicated notes
-            new_part = stream.Part()
-            # Copy metadata (key, tempo, time signature will be added later)
-            new_part.id = part.id
-            new_part.partName = part.partName
-            for bucket_qn in sorted(notes_by_time.keys()):
-                bucket_notes = notes_by_time[bucket_qn]
-                if not bucket_notes:
-                    continue
-                # Group by pitch to remove duplicates
-                pitch_groups = defaultdict(list)
-                for n in bucket_notes:
-                    pitch_groups[n.pitch.midi].append(n)
-                # For each unique pitch, keep the note with longest duration
-                unique_notes = []
-                for midi_pitch, pitch_notes in pitch_groups.items():
-                    # Sort by duration (longest first)
-                    # Get velocity as integer for comparison (handle None values)
-                    def get_velocity(note):
-                        if hasattr(note, 'volume') and hasattr(note.volume, 'velocity'):
-                            vel = note.volume.velocity
-                            return vel if vel is not None else 64
-                        return 64
-                    pitch_notes.sort(key=lambda x: (x.quarterLength, get_velocity(x)), reverse=True)
-                    best_note = pitch_notes[0]
-                    # Filter out extremely short notes (< 64th note = 0.0625 quarter notes)
-                    # MusicXML can't handle notes shorter than 1024th
-                    if best_note.quarterLength >= 0.0625:
-                        unique_notes.append(best_note)
-                if not unique_notes:
-                    continue  # Skip if all notes were too short
-                # Use bucket quarter note offset directly
-                offset_qn = bucket_qn
-                if len(unique_notes) == 1:
-                    # Single note - snap duration to avoid impossible tuplets
-                    n = note.Note(unique_notes[0].pitch)
-                    n.quarterLength = self._snap_duration(unique_notes[0].quarterLength)
-                    new_part.insert(offset_qn, n)
-                elif len(unique_notes) > 1:
-                    # Multiple notes at same time -> create chord
-                    # Use the shortest duration to avoid overlaps, then snap
-                    min_duration = min(n.quarterLength for n in unique_notes)
-                    c = m21_chord.Chord([n.pitch for n in unique_notes])
-                    c.quarterLength = self._snap_duration(min_duration)
-                    new_part.insert(offset_qn, c)
-            # Replace old part with new part
-            score.replace(part, new_part)
-        return score
-    def _merge_music21_notes(self, score, gap_threshold_qn: float = 0.02) -> stream.Score:
-        """
-        Merge sequential notes of same pitch with small gaps at music21 level.
-        Fixes tiny rests created by makeMeasures() from MIDI→music21 precision loss.
-        MUST run AFTER deduplication but BEFORE makeMeasures.
-        Args:
-            score: music21 Score (before makeMeasures)
-            gap_threshold_qn: Max gap to merge (0.02 QN ≈ 20ms @ 120 BPM)
-        Returns:
-            Score with merged sequential notes
-        """
-        from music21 import stream, note, chord as m21_chord
-        from collections import defaultdict
-        for part in score.parts:
-            # Collect all notes with timing
-            elements_with_offsets = []
-            for element in part.flatten().notesAndRests:
-                if isinstance(element, note.Rest):
-                    continue
-                offset_qn = element.offset
-                duration_qn = element.quarterLength
-                if isinstance(element, note.Note):
-                    elements_with_offsets.append({
-                        'offset': offset_qn,
-                        'end': offset_qn + duration_qn,
-                        'pitch': element.pitch.midi,
-                        'element': element
-                    })
-                elif isinstance(element, m21_chord.Chord):
-                    # Track each chord pitch separately
-                    for pitch in element.pitches:
-                        elements_with_offsets.append({
-                            'offset': offset_qn,
-                            'end': offset_qn + duration_qn,
-                            'pitch': pitch.midi,
-                            'element': element,
-                            'chord_id': id(element)  # Prevent merging same-chord notes
-                        })
-            # Group by pitch and sort
-            notes_by_pitch = defaultdict(list)
-            for elem in elements_with_offsets:
-                notes_by_pitch[elem['pitch']].append(elem)
-            for pitch in notes_by_pitch:
-                notes_by_pitch[pitch].sort(key=lambda x: x['offset'])
-            # Track modifications
-            elements_to_remove = set()
-            duration_updates = {}
-            # Merge within each pitch group
-            for pitch, note_list in notes_by_pitch.items():
-                i = 0
-                while i < len(note_list):
-                    current = note_list[i]
-                    # Look ahead for mergeable notes
-                    j = i + 1
-                    while j < len(note_list):
-                        next_note = note_list[j]
-                        gap = next_note['offset'] - current['end']
-                        if gap <= gap_threshold_qn:
-                            # Don't merge notes from SAME chord
-                            if ('chord_id' in current and 'chord_id' in next_note and
-                                current['chord_id'] == next_note['chord_id']):
-                                break
-                            # Extend current to cover gap + next note
-                            new_end = next_note['end']
-                            new_duration = new_end - current['offset']
-                            duration_updates[id(current['element'])] = new_duration
-                            current['end'] = new_end
-                            elements_to_remove.add(id(next_note['element']))
-                            j += 1
-                        else:
-                            break
-                    i = j if j > i + 1 else i + 1
-            # Rebuild part with modifications
-            new_part = stream.Part()
-            new_part.id = part.id
-            new_part.partName = part.partName
-            for element in part.flatten().notesAndRests:
-                elem_id = id(element)
-                if elem_id in elements_to_remove:
-                    continue
-                if elem_id in duration_updates:
-                    element.quarterLength = duration_updates[elem_id]
-                new_part.insert(element.offset, element)
-            score.replace(part, new_part)
-        return score
-    def _add_ties_to_score(self, score) -> stream.Score:
-        """
-        Add tie notation to notes that span measure boundaries.
-        Uses music21's tie.Tie class:
-        - 'start': Beginning of tied note
-        - 'stop': End of tied note
-        Args:
-            score: music21 Score object
-        Returns:
-            Score with tie notation added
-        """
-        from music21 import tie
-        for part in score.parts:
-            measures = list(part.getElementsByClass('Measure'))
-            # Get time signature to determine expected measure length
-            ts = part.getElementsByClass('TimeSignature')
-            expected_measure_length = ts[0].barDuration.quarterLength if ts else 4.0
-            for measure_idx, measure in enumerate(measures):
-                # Get the time signature for this measure if it changed
-                measure_ts = measure.getElementsByClass('TimeSignature')
-                if measure_ts:
-                    expected_measure_length = measure_ts[0].barDuration.quarterLength
-                for element in measure.notesAndRests:
-                    if not isinstance(element, note.Note):
-                        continue
-                    # Check if note extends beyond measure boundary
-                    # Use expected_measure_length from time signature, not barDuration
-                    # which may have been auto-expanded by music21
-                    element_end = element.offset + element.quarterLength
-                    if element_end > expected_measure_length + 0.01:  # Tolerance for floating point
-                        # Note crosses boundary - add 'start' tie
-                        element.tie = tie.Tie('start')
-                        # Find continuation in next measure and add 'stop' tie
-                        if measure_idx + 1 < len(measures):
-                            next_measure = measures[measure_idx + 1]
-                            for next_elem in next_measure.notesAndRests:
-                                if (isinstance(next_elem, note.Note) and
-                                    next_elem.pitch.midi == element.pitch.midi and
-                                    next_elem.offset < 0.1):  # At start of measure
-                                    next_elem.tie = tie.Tie('stop')
-                                    break
-        return score
-    def _snap_duration(self, duration) -> float:
-        """
-        Snap duration to nearest MusicXML-valid note value to avoid impossible tuplets.
-        Valid durations: whole (4.0), half (2.0), quarter (1.0), eighth (0.5),
-        sixteenth (0.25), thirty-second (0.125), sixty-fourth (0.0625)
-        Args:
-            duration: Quarter length as float or Fraction
-        Returns:
-            Snapped quarter length
-        """
-        valid_durations = [4.0, 2.0, 1.0, 0.5, 0.25, 0.125, 0.0625]
-        # Convert to float for comparison
-        dur_float = float(duration)
-        # Find nearest valid duration
-        nearest = min(valid_durations, key=lambda x: abs(x - dur_float))
-        return nearest
     def _snap_to_valid_duration(self, duration: float) -> float:
         """
@@ -1708,73 +1205,6 @@ class TranscriptionPipeline:
         return nearest
-    def _normalize_measure_durations(self, score, time_sig_numerator: int = 4, time_sig_denominator: int = 4) -> stream.Score:
-        """
-        Normalize note durations to fit measures, using detected time signature.
-        Instead of removing notes, adjust durations to fill measures correctly.
-        Args:
-            score: music21 Score with measures
-            time_sig_numerator: Detected time signature numerator
-            time_sig_denominator: Detected time signature denominator
-        Returns:
-            Normalized score
-        """
-        expected_duration = (time_sig_numerator / time_sig_denominator) * 4.0  # Quarter notes
-        for part in score.parts:
-            for measure in part.getElementsByClass('Measure'):
-                # Get all notes and chords
-                elements = list(measure.notesAndRests)
-                if not elements:
-                    continue
-                # Calculate actual duration
-                actual_duration = sum(e.quarterLength for e in elements)
-                # Increased tolerance from 0.05 to 0.15 QN (150ms at 120 BPM)
-                # Prevents normalizing "good enough" measures that get made worse by rounding
-                if abs(actual_duration - expected_duration) < 0.15:
-                    continue  # Already correct (allow tolerance for quantization errors)
-                # Normalize durations proportionally
-                scale_factor = expected_duration / actual_duration if actual_duration > 0 else 1.0
-                for element in elements:
-                    # Scale duration
-                    new_duration = element.quarterLength * scale_factor
-                    # Snap to valid music21 duration
-                    element.quarterLength = self._snap_to_valid_duration(new_duration)
-                # Verify total duration after normalization
-                new_total = sum(e.quarterLength for e in measure.notesAndRests)
-                if abs(new_total - expected_duration) > 0.1:
-                    gap = expected_duration - new_total
-                    if gap > 0.01:
-                        # Underfull - add rest to fill
-                        rest = note.Rest(quarterLength=gap)
-                        measure.append(rest)
-                    elif gap < -0.01:
-                        # Overfull - proportionally adjust all elements (ZERO data loss)
-                        # This is better than removing notes
-                        overage = -gap
-                        elements = list(measure.notesAndRests)
-                        print(f"   WARNING: Measure overfull by {overage:.3f} QN, adjusting durations proportionally")
-                        # Proportionally reduce all durations
-                        reduction_factor = expected_duration / new_total
-                        for elem in elements:
-                            elem.quarterLength = self._snap_to_valid_duration(elem.quarterLength * reduction_factor)
-        return score
     def _validate_and_adjust_metadata(
         self,
@@ -2055,93 +1485,7 @@ class TranscriptionPipeline:
         return score
-    def _fix_tuplet_durations(self, score) -> stream.Score:
-        """
-        Simplify all durations to prevent music21 from creating impossible tuplets during export.
-        music21 creates tuplets on-the-fly during MusicXML export when durations don't
-        fit standard values. By rounding all durations to simple fractions, we prevent
-        the export logic from generating 2048th note tuplets.
-        Args:
-            score: music21 Score with measures
-        Returns:
-            Cleaned score with simplified durations
-        """
-        from music21 import note, chord, stream, duration
-        simplified_count = 0
-        # Simple durations that don't trigger tuplet creation (in quarter notes)
-        SIMPLE_DURATIONS = [
-            4.0,     # Whole note
-            3.0,     # Dotted half
-            2.0,     # Half note
-            1.5,     # Dotted quarter
-            1.0,     # Quarter note
-            0.75,    # Dotted eighth
-            0.5,     # Eighth note
-            0.375,   # Dotted 16th
-            0.25,    # 16th note
-            0.125,   # 32nd note (CRITICAL: was missing, caused durations to double!)
-            0.0625,  # 64th note
-            0.03125, # 128th note
-        ]
-        for part in score.parts:
-            for measure in part.getElementsByClass('Measure'):
-                # Process all notes, rests, and chords
-                for element in measure.notesAndRests:
-                    original_duration = element.quarterLength
-                    # Round to nearest simple duration
-                    nearest_duration = min(SIMPLE_DURATIONS, key=lambda x: abs(x - original_duration))
-                    if abs(original_duration - nearest_duration) > 0.01:
-                        element.quarterLength = nearest_duration
-                        simplified_count += 1
-                    # Strip any tuplets that might exist
-                    if element.duration.tuplets:
-                        element.duration.tuplets = ()
-                    # For chords, also process each note within
-                    if isinstance(element, chord.Chord):
-                        for n in element.notes:
-                            if n.duration.tuplets:
-                                n.duration.tuplets = ()
-        if simplified_count > 0:
-            print(f"   Simplified {simplified_count} durations to prevent tuplet creation during export")
-        return score
-    def _validate_measures(self, score) -> None:
-        """
-        Validate that all measures have correct durations matching their time signature.
-        Logs warnings for any measures that are overfull or underfull.
-        Args:
-            score: music21 Score with measures already created
-        """
-        for part_idx, part in enumerate(score.parts):
-            for measure_idx, measure in enumerate(part.getElementsByClass('Measure')):
-                # Get time signature for this measure
-                ts = measure.timeSignature or measure.getContextByClass('TimeSignature')
-                if not ts:
-                    continue  # Skip if no time signature
-                expected_duration = ts.barDuration.quarterLength
-                actual_duration = measure.duration.quarterLength
-                # Allow small floating-point tolerance (0.01 quarter notes = ~10ms at 120 BPM)
-                tolerance = 0.01
-                if abs(actual_duration - expected_duration) > tolerance:
-                    print(f"WARNING: Measure {measure_idx + 1} in part {part_idx} has duration {float(actual_duration):.2f} "
-                          f"(expected {float(expected_duration):.2f} for {ts.ratioString} time)")
     def _split_into_grand_staff(self, score) -> stream.Score:
         """
@@ -2740,7 +2084,10 @@ def remove_short_notes(midi_path: Path, min_duration: int = 60) -> Path:
 def generate_musicxml(midi_path: Path, storage_path: Path) -> Path:
     """Generate MusicXML from MIDI (module-level wrapper)."""
     pipeline = TranscriptionPipeline("compat_job", "http://example.com", storage_path)
-    return pipeline.generate_musicxml(midi_path)
 def detect_key_signature(midi_path: Path) -> dict:

 import mido
 import librosa
 import numpy as np
+# basic-pitch removed - using YourMT3+ only
 from music21 import converter, key, meter, tempo, note, clef, stream, chord as m21_chord
 # Phase 2: Zero-tradeoff solutions with Python 3.10+ compatibility patch
             self.final_midi_path = midi_path
             self.progress(90, "musicxml", "Generating MusicXML")
+            # Use minimal MusicXML generation (YourMT3+ optimized)
+            print(f"   Using minimal MusicXML generation (YourMT3+)")
+            musicxml_path = self.generate_musicxml_minimal(midi_path, stems['other'])
             self.progress(100, "complete", "Transcription complete")
             return musicxml_path
         minimum_note_length: int = None
     ) -> Path:
         """
+        Transcribe audio to MIDI using YourMT3+.
         Args:
             audio_path: Path to audio file (should be 'other' stem for piano)
+            onset_threshold: Deprecated (kept for API compatibility)
+            frame_threshold: Deprecated (kept for API compatibility)
+            minimum_note_length: Deprecated (kept for API compatibility)
         Returns:
             Path to generated MIDI file
         """
         output_dir = self.temp_dir
+        # Transcribe with YourMT3+ (only transcription method)
+        print(f"   Transcribing with YourMT3+...")
+        midi_path = self.transcribe_with_yourmt3(audio_path)
+        print(f"   ✓ YourMT3+ transcription complete")
         # Rename final MIDI to standard name for post-processing
         final_midi_path = output_dir / "piano.mid"
         return midi_path
     def generate_musicxml_minimal(self, midi_path: Path, source_audio: Path) -> Path:
         """
         Generate MusicXML from clean MIDI (YourMT3+ output) with minimal post-processing.
         print(f"   ✓ MusicXML generation complete")
         return output_path
     def _snap_to_valid_duration(self, duration: float) -> float:
         """
         return nearest
     def _validate_and_adjust_metadata(
         self,
         return score
     def _split_into_grand_staff(self, score) -> stream.Score:
         """
 def generate_musicxml(midi_path: Path, storage_path: Path) -> Path:
     """Generate MusicXML from MIDI (module-level wrapper)."""
     pipeline = TranscriptionPipeline("compat_job", "http://example.com", storage_path)
+    # Use minimal pipeline (YourMT3+ optimized)
+    # Note: source_audio path may not exist for module-level calls, but minimal pipeline can handle it
+    audio_path = storage_path / "temp" / "compat_job" / "audio.wav"
+    return pipeline.generate_musicxml_minimal(midi_path, audio_path)
 def detect_key_signature(midi_path: Path) -> dict:

backend/requirements.txt CHANGED Viewed

@@ -18,11 +18,9 @@ scipy
 torch>=2.0.0
 torchaudio==2.1.0  # Pin to version that uses SoundFile backend, not torchcodec
 demucs>=3.0.6
-# Pitch detection (macOS default runtime is CoreML)
-basic-pitch==0.4.0  # Fallback transcriber when YourMT3+ service unavailable
-# YourMT3+ Transcription (integrated into main service)
 lightning>=2.2.1
 transformers==4.45.1
 einops>=0.7.0

 torch>=2.0.0
 torchaudio==2.1.0  # Pin to version that uses SoundFile backend, not torchcodec
 demucs>=3.0.6
+audio-separator>=0.40.0  # BS-RoFormer and UVR models for better vocal separation
+# YourMT3+ Transcription (primary transcriber)
 lightning>=2.2.1
 transformers==4.45.1
 einops>=0.7.0

frontend/src/components/InstrumentSelector.css ADDED Viewed

	@@ -0,0 +1,77 @@

+.instrument-selector {
+  margin-bottom: 2rem;
+}
+.selector-label {
+  display: block;
+  margin-bottom: 1rem;
+  font-weight: bold;
+  font-size: 1.1rem;
+  color: #333;
+}
+.instrument-grid {
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
+  gap: 1rem;
+  margin-bottom: 0.5rem;
+}
+.instrument-button {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  justify-content: center;
+  padding: 1rem;
+  border: 2px solid #ddd;
+  border-radius: 8px;
+  background-color: #fff;
+  cursor: pointer;
+  transition: all 0.2s ease;
+  min-height: 100px;
+}
+.instrument-button:hover {
+  border-color: #007bff;
+  background-color: #f8f9fa;
+  transform: translateY(-2px);
+  box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
+}
+.instrument-button.selected {
+  border-color: #007bff;
+  background: linear-gradient(135deg, #007bff 0%, #0056b3 100%);
+  color: white;
+  box-shadow: 0 4px 12px rgba(0, 123, 255, 0.3);
+}
+.instrument-button.selected:hover {
+  background: linear-gradient(135deg, #0056b3 0%, #003d82 100%);
+}
+.instrument-icon {
+  font-size: 2rem;
+  margin-bottom: 0.5rem;
+  display: block;
+}
+.instrument-label {
+  font-size: 0.9rem;
+  font-weight: 500;
+  text-align: center;
+}
+.selector-hint {
+  color: #888;
+  font-size: 0.85rem;
+  margin-top: 0.5rem;
+  text-align: center;
+  font-style: italic;
+}
+/* Responsive adjustments */
+@media (max-width: 600px) {
+  .instrument-grid {
+    grid-template-columns: repeat(2, 1fr);
+  }
+}

frontend/src/components/InstrumentSelector.tsx ADDED Viewed

	@@ -0,0 +1,66 @@

+/**
+ * Multi-instrument selector for choosing which instruments to transcribe.
+ */
+import { useState } from 'react';
+import './InstrumentSelector.css';
+export interface Instrument {
+  id: string;
+  label: string;
+  icon: string;
+}
+const INSTRUMENTS: Instrument[] = [
+  { id: 'piano', label: 'Piano', icon: '🎹' },
+  { id: 'vocals', label: 'Vocals (Violin)', icon: '🎤' },
+  { id: 'drums', label: 'Drums', icon: '🥁' },
+  { id: 'bass', label: 'Bass', icon: '🎸' },
+  { id: 'guitar', label: 'Guitar', icon: '🎸' },
+  { id: 'other', label: 'Other Instruments', icon: '🎵' }
+];
+interface InstrumentSelectorProps {
+  selectedInstruments: string[];
+  onChange: (instruments: string[]) => void;
+}
+export function InstrumentSelector({ selectedInstruments, onChange }: InstrumentSelectorProps) {
+  const handleToggle = (instrumentId: string) => {
+    const isSelected = selectedInstruments.includes(instrumentId);
+    if (isSelected) {
+      // Don't allow deselecting if it's the only selected instrument
+      if (selectedInstruments.length === 1) {
+        return;
+      }
+      onChange(selectedInstruments.filter(id => id !== instrumentId));
+    } else {
+      onChange([...selectedInstruments, instrumentId]);
+    }
+  };
+  return (
+    <div className="instrument-selector">
+      <label className="selector-label">Select Instruments:</label>
+      <div className="instrument-grid">
+        {INSTRUMENTS.map(instrument => (
+          <button
+            key={instrument.id}
+            type="button"
+            className={`instrument-button ${selectedInstruments.includes(instrument.id) ? 'selected' : ''}`}
+            onClick={() => handleToggle(instrument.id)}
+            aria-pressed={selectedInstruments.includes(instrument.id)}
+          >
+            <span className="instrument-icon">{instrument.icon}</span>
+            <span className="instrument-label">{instrument.label}</span>
+          </button>
+        ))}
+      </div>
+      <p className="selector-hint">
+        Select at least one instrument to transcribe
+      </p>
+    </div>
+  );
+}
+export default InstrumentSelector;

frontend/src/components/InstrumentTabs.css ADDED Viewed

	@@ -0,0 +1,91 @@

+.instrument-tabs {
+  display: flex;
+  gap: 0.5rem;
+  margin-bottom: 1.5rem;
+  padding: 0.5rem;
+  background-color: #f8f9fa;
+  border-radius: 8px;
+  overflow-x: auto;
+}
+.instrument-tabs.single {
+  justify-content: center;
+}
+.instrument-badge {
+  display: flex;
+  align-items: center;
+  gap: 0.5rem;
+  padding: 0.75rem 1.5rem;
+  background: linear-gradient(135deg, #007bff 0%, #0056b3 100%);
+  color: white;
+  border-radius: 6px;
+  font-weight: 500;
+  box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+}
+.instrument-tab {
+  display: flex;
+  align-items: center;
+  gap: 0.5rem;
+  padding: 0.75rem 1.25rem;
+  border: 2px solid #dee2e6;
+  border-radius: 6px;
+  background-color: white;
+  cursor: pointer;
+  transition: all 0.2s ease;
+  font-size: 0.95rem;
+  font-weight: 500;
+  white-space: nowrap;
+}
+.instrument-tab:hover {
+  border-color: #007bff;
+  background-color: #f8f9fa;
+  transform: translateY(-1px);
+  box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1);
+}
+.instrument-tab.active {
+  border-color: #007bff;
+  background: linear-gradient(135deg, #007bff 0%, #0056b3 100%);
+  color: white;
+  box-shadow: 0 3px 8px rgba(0, 123, 255, 0.3);
+}
+.instrument-tab.active:hover {
+  background: linear-gradient(135deg, #0056b3 0%, #003d82 100%);
+  transform: translateY(-1px);
+}
+.instrument-tab .instrument-icon,
+.instrument-badge .instrument-icon {
+  font-size: 1.25rem;
+  line-height: 1;
+}
+.instrument-tab .instrument-label,
+.instrument-badge .instrument-label {
+  font-size: 0.95rem;
+}
+/* Responsive */
+@media (max-width: 600px) {
+  .instrument-tabs {
+    gap: 0.25rem;
+    padding: 0.25rem;
+  }
+  .instrument-tab {
+    padding: 0.5rem 0.75rem;
+    font-size: 0.85rem;
+  }
+  .instrument-tab .instrument-icon {
+    font-size: 1.1rem;
+  }
+  .instrument-tab .instrument-label {
+    display: none; /* Hide labels on mobile, show icons only */
+  }
+}

frontend/src/components/InstrumentTabs.tsx ADDED Viewed

	@@ -0,0 +1,68 @@

+/**
+ * Instrument tabs for switching between transcribed instruments.
+ */
+import './InstrumentTabs.css';
+interface InstrumentInfo {
+  id: string;
+  label: string;
+  icon: string;
+}
+const INSTRUMENT_INFO: Record<string, InstrumentInfo> = {
+  piano: { id: 'piano', label: 'Piano', icon: '🎹' },
+  vocals: { id: 'vocals', label: 'Vocals', icon: '🎤' },
+  drums: { id: 'drums', label: 'Drums', icon: '🥁' },
+  bass: { id: 'bass', label: 'Bass', icon: '🎸' },
+  guitar: { id: 'guitar', label: 'Guitar', icon: '🎸' },
+  other: { id: 'other', label: 'Other', icon: '🎵' },
+};
+interface InstrumentTabsProps {
+  instruments: string[];
+  activeInstrument: string;
+  onInstrumentChange: (instrument: string) => void;
+}
+export function InstrumentTabs({ instruments, activeInstrument, onInstrumentChange }: InstrumentTabsProps) {
+  if (instruments.length === 0) {
+    return null;
+  }
+  // If only one instrument, show it as a badge instead of tabs
+  if (instruments.length === 1) {
+    const instrument = instruments[0];
+    const info = INSTRUMENT_INFO[instrument] || { id: instrument, label: instrument, icon: '🎵' };
+    return (
+      <div className="instrument-tabs single">
+        <div className="instrument-badge">
+          <span className="instrument-icon">{info.icon}</span>
+          <span className="instrument-label">{info.label}</span>
+        </div>
+      </div>
+    );
+  }
+  return (
+    <div className="instrument-tabs">
+      {instruments.map((instrument) => {
+        const info = INSTRUMENT_INFO[instrument] || { id: instrument, label: instrument, icon: '🎵' };
+        const isActive = instrument === activeInstrument;
+        return (
+          <button
+            key={instrument}
+            className={`instrument-tab ${isActive ? 'active' : ''}`}
+            onClick={() => onInstrumentChange(instrument)}
+            aria-pressed={isActive}
+          >
+            <span className="instrument-icon">{info.icon}</span>
+            <span className="instrument-label">{info.label}</span>
+          </button>
+        );
+      })}
+    </div>
+  );
+}
+export default InstrumentTabs;

frontend/src/components/JobSubmission.css CHANGED Viewed

@@ -102,3 +102,12 @@ button:hover {
   background-color: #f8d7da;
   color: #721c24;
 }

   background-color: #f8d7da;
   color: #721c24;
 }
+.error-alert {
+  background-color: #f8d7da;
+  color: #721c24;
+  padding: 0.75rem 1rem;
+  border-radius: 4px;
+  margin-top: 1rem;
+  border: 1px solid #f5c6cb;
+}

frontend/src/components/JobSubmission.tsx CHANGED Viewed

@@ -4,6 +4,7 @@
 import { useState, useRef, useEffect } from 'react';
 import { api } from '../api/client';
 import type { ProgressUpdate } from '../api/client';
 import './JobSubmission.css';
 interface JobSubmissionProps {
@@ -13,6 +14,7 @@ interface JobSubmissionProps {
 export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps) {
   const [youtubeUrl, setYoutubeUrl] = useState('');
   const [status, setStatus] = useState<'idle' | 'submitting' | 'processing' | 'failed'>('idle');
   const [error, setError] = useState<string | null>(null);
   const [progress, setProgress] = useState(0);
@@ -43,15 +45,24 @@ export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps
   const handleSubmit = async (e: React.FormEvent) => {
     e.preventDefault();
     setError(null);
     const validation = validateUrl(youtubeUrl);
     if (validation) {
       setError(validation);
       return;
     }
     setStatus('submitting');
     try {
-      const response = await api.submitJob(youtubeUrl, { instruments: ['piano'] });
       setYoutubeUrl('');
       if (onJobSubmitted) onJobSubmitted(response);
@@ -150,6 +161,11 @@ export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps
       {(status === 'idle' || status === 'submitting') && (
         <form onSubmit={handleSubmit}>
           <div className="form-group">
             <label htmlFor="youtube-url">YouTube URL:</label>
             <input
@@ -167,7 +183,7 @@ export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps
           </div>
           <button type="submit" disabled={status === 'submitting'}>Transcribe</button>
           {status === 'submitting' && <div>Submitting...</div>}
-          {error && <div role="alert">{error}</div>}
         </form>
       )}

 import { useState, useRef, useEffect } from 'react';
 import { api } from '../api/client';
 import type { ProgressUpdate } from '../api/client';
+import { InstrumentSelector } from './InstrumentSelector';
 import './JobSubmission.css';
 interface JobSubmissionProps {
 export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps) {
   const [youtubeUrl, setYoutubeUrl] = useState('');
+  const [selectedInstruments, setSelectedInstruments] = useState<string[]>(['piano']);
   const [status, setStatus] = useState<'idle' | 'submitting' | 'processing' | 'failed'>('idle');
   const [error, setError] = useState<string | null>(null);
   const [progress, setProgress] = useState(0);
   const handleSubmit = async (e: React.FormEvent) => {
     e.preventDefault();
     setError(null);
+    // Validate URL
     const validation = validateUrl(youtubeUrl);
     if (validation) {
       setError(validation);
       return;
     }
+    // Validate at least one instrument is selected
+    if (selectedInstruments.length === 0) {
+      setError('Please select at least one instrument');
+      return;
+    }
     setStatus('submitting');
     try {
+      const response = await api.submitJob(youtubeUrl, { instruments: selectedInstruments });
       setYoutubeUrl('');
       if (onJobSubmitted) onJobSubmitted(response);
       {(status === 'idle' || status === 'submitting') && (
         <form onSubmit={handleSubmit}>
+          <InstrumentSelector
+            selectedInstruments={selectedInstruments}
+            onChange={setSelectedInstruments}
+          />
           <div className="form-group">
             <label htmlFor="youtube-url">YouTube URL:</label>
             <input
           </div>
           <button type="submit" disabled={status === 'submitting'}>Transcribe</button>
           {status === 'submitting' && <div>Submitting...</div>}
+          {error && <div role="alert" className="error-alert">{error}</div>}
         </form>
       )}

frontend/src/components/PlaybackControls.css CHANGED Viewed

@@ -2,36 +2,60 @@
   display: flex;
   align-items: center;
   gap: 1rem;
-  padding: 1rem;
-  background: #f5f5f5;
-  border-radius: 8px;
-  margin: 1rem 0;
   flex-wrap: wrap;
 }
 .playback-controls button {
-  padding: 0.5rem 1rem;
-  font-size: 1rem;
-  border: 1px solid #ccc;
   background: white;
-  border-radius: 4px;
   cursor: pointer;
-  transition: all 0.2s;
 }
 .playback-controls button:hover:not(:disabled) {
-  background: #e0e0e0;
 }
 .playback-controls button:disabled {
-  opacity: 0.5;
   cursor: not-allowed;
 }
 .tempo-control {
   display: flex;
   align-items: center;
-  gap: 0.5rem;
 }
 .tempo-control label {
@@ -39,14 +63,110 @@
   align-items: center;
   gap: 0.5rem;
   font-size: 0.9rem;
 }
 .tempo-control input[type="range"] {
   width: 150px;
 }
-.position-indicator {
   font-size: 0.9rem;
-  color: #666;
-  font-family: monospace;
 }

   display: flex;
   align-items: center;
   gap: 1rem;
+  padding: 1.25rem;
+  background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
+  border-radius: 12px;
+  margin: 1.5rem 0;
   flex-wrap: wrap;
+  box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
+  border: 1px solid #dee2e6;
 }
 .playback-controls button {
+  padding: 0.6rem 1.25rem;
+  font-size: 0.95rem;
+  font-weight: 500;
+  border: 2px solid #dee2e6;
   background: white;
+  border-radius: 8px;
   cursor: pointer;
+  transition: all 0.2s ease;
+  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.05);
 }
 .playback-controls button:hover:not(:disabled) {
+  background: #007bff;
+  border-color: #007bff;
+  color: white;
+  transform: translateY(-1px);
+  box-shadow: 0 3px 6px rgba(0, 123, 255, 0.2);
+}
+.playback-controls button:active:not(:disabled) {
+  transform: translateY(0);
+  box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
 }
 .playback-controls button:disabled {
+  opacity: 0.4;
   cursor: not-allowed;
+  background: #f8f9fa;
+}
+.playback-controls button.active {
+  background: #007bff;
+  border-color: #007bff;
+  color: white;
 }
 .tempo-control {
   display: flex;
   align-items: center;
+  gap: 0.75rem;
+  background: white;
+  padding: 0.5rem 0.75rem;
+  border-radius: 8px;
+  border: 1px solid #dee2e6;
 }
 .tempo-control label {
   align-items: center;
   gap: 0.5rem;
   font-size: 0.9rem;
+  font-weight: 500;
+  color: #495057;
 }
 .tempo-control input[type="range"] {
   width: 150px;
+  height: 6px;
+  border-radius: 3px;
+  background: #dee2e6;
+  outline: none;
+  -webkit-appearance: none;
+}
+.tempo-control input[type="range"]::-webkit-slider-thumb {
+  -webkit-appearance: none;
+  appearance: none;
+  width: 16px;
+  height: 16px;
+  border-radius: 50%;
+  background: #007bff;
+  cursor: pointer;
+  box-shadow: 0 2px 4px rgba(0, 123, 255, 0.3);
+  transition: all 0.2s ease;
+}
+.tempo-control input[type="range"]::-webkit-slider-thumb:hover {
+  background: #0056b3;
+  transform: scale(1.1);
+  box-shadow: 0 3px 6px rgba(0, 123, 255, 0.4);
+}
+.tempo-control input[type="range"]::-moz-range-thumb {
+  width: 16px;
+  height: 16px;
+  border-radius: 50%;
+  background: #007bff;
+  cursor: pointer;
+  border: none;
+  box-shadow: 0 2px 4px rgba(0, 123, 255, 0.3);
+  transition: all 0.2s ease;
 }
+.tempo-control input[type="range"]::-moz-range-thumb:hover {
+  background: #0056b3;
+  transform: scale(1.1);
+  box-shadow: 0 3px 6px rgba(0, 123, 255, 0.4);
+}
+.tempo-control span {
+  font-weight: 600;
+  color: #007bff;
+  min-width: 40px;
+  text-align: center;
+}
+.time-display {
+  font-size: 0.95rem;
+  color: #495057;
+  font-family: 'Courier New', monospace;
+  font-weight: 500;
+  background: white;
+  padding: 0.5rem 0.75rem;
+  border-radius: 6px;
+  border: 1px solid #dee2e6;
+}
+.volume-control {
+  display: flex;
+  align-items: center;
+  gap: 0.5rem;
+}
+.volume-control label {
+  display: flex;
+  align-items: center;
+  gap: 0.5rem;
   font-size: 0.9rem;
+  color: #495057;
+}
+.volume-control input[type="range"] {
+  width: 100px;
+}
+/* Responsive adjustments */
+@media (max-width: 768px) {
+  .playback-controls {
+    justify-content: center;
+    gap: 0.75rem;
+    padding: 1rem;
+  }
+  .playback-controls button {
+    padding: 0.5rem 1rem;
+    font-size: 0.9rem;
+  }
+  .tempo-control {
+    width: 100%;
+    justify-content: space-between;
+  }
+  .time-display {
+    width: 100%;
+    text-align: center;
+  }
 }

frontend/src/components/PlaybackControls.tsx CHANGED Viewed

@@ -10,7 +10,7 @@ import { useState, useRef, useEffect } from 'react';
 import * as Tone from 'tone';
 // useNotationStore is optional for tests; guard its usage
 import { useNotationStore } from '../store/notation';
-import { durationToSeconds } from '../utils/musicxml-parser';
 import type { Note } from '../store/notation';
 import './PlaybackControls.css';

 import * as Tone from 'tone';
 // useNotationStore is optional for tests; guard its usage
 import { useNotationStore } from '../store/notation';
+import { durationToSeconds } from '../utils/duration';
 import type { Note } from '../store/notation';
 import './PlaybackControls.css';

frontend/src/components/ScoreEditor.tsx CHANGED Viewed

@@ -1,11 +1,13 @@
 /**
  * Main score editor component integrating notation, playback, and export.
  */
 import { useState, useEffect } from 'react';
-import { getMidiFile, getMetadata } from '../api/client';
 import { useNotationStore } from '../store/notation';
 import { NotationCanvas } from './NotationCanvas';
 import { PlaybackControls } from './PlaybackControls';
 import './ScoreEditor.css';
 interface ScoreEditorProps {
@@ -15,7 +17,11 @@ interface ScoreEditorProps {
 export function ScoreEditor({ jobId }: ScoreEditorProps) {
   const [loading, setLoading] = useState(true);
   const [error, setError] = useState<string | null>(null);
   const loadFromMidi = useNotationStore((state) => state.loadFromMidi);
   useEffect(() => {
     loadScore();
@@ -26,18 +32,34 @@ export function ScoreEditor({ jobId }: ScoreEditorProps) {
       setLoading(true);
       setError(null);
-      // Fetch MIDI file and metadata in parallel
-      const [midiData, metadata] = await Promise.all([
-        getMidiFile(jobId),
-        getMetadata(jobId),
-      ]);
-      // Load MIDI into notation store
-      await loadFromMidi(midiData, {
-        tempo: metadata.tempo,
-        keySignature: metadata.key_signature,
-        timeSignature: metadata.time_signature,
-      });
       setLoading(false);
     } catch (err) {
@@ -88,11 +110,16 @@ export function ScoreEditor({ jobId }: ScoreEditorProps) {
       <div className="editor-toolbar">
         <h2>Score Editor</h2>
         <div className="toolbar-actions">
-          <button onClick={handleExportMusicXML}>Export MusicXML</button>
           <button onClick={handleExportMIDI}>Export MIDI</button>
         </div>
       </div>
       <PlaybackControls />
       <NotationCanvas />

 /**
  * Main score editor component integrating notation, playback, and export.
+ * Supports multi-instrument transcription.
  */
 import { useState, useEffect } from 'react';
+import { getMidiFile, getMetadata, getJobStatus } from '../api/client';
 import { useNotationStore } from '../store/notation';
 import { NotationCanvas } from './NotationCanvas';
 import { PlaybackControls } from './PlaybackControls';
+import { InstrumentTabs } from './InstrumentTabs';
 import './ScoreEditor.css';
 interface ScoreEditorProps {
 export function ScoreEditor({ jobId }: ScoreEditorProps) {
   const [loading, setLoading] = useState(true);
   const [error, setError] = useState<string | null>(null);
+  const [instruments, setInstruments] = useState<string[]>([]);
   const loadFromMidi = useNotationStore((state) => state.loadFromMidi);
+  const activeInstrument = useNotationStore((state) => state.activeInstrument);
+  const setActiveInstrument = useNotationStore((state) => state.setActiveInstrument);
   useEffect(() => {
     loadScore();
       setLoading(true);
       setError(null);
+      // Get job status to find which instruments were transcribed
+      const jobStatus = await getJobStatus(jobId);
+      // For now, assume piano is the default instrument (backend doesn't yet return instruments list)
+      // TODO: Update when backend API returns instruments list in job status
+      const transcribedInstruments = ['piano'];
+      setInstruments(transcribedInstruments);
+      // Fetch metadata once (shared across all instruments)
+      const metadata = await getMetadata(jobId);
+      // Load MIDI files for each instrument
+      for (const instrument of transcribedInstruments) {
+        // For MVP, backend only supports piano (single stem)
+        // In the future, this will fetch per-instrument MIDI: `/api/v1/scores/${jobId}/midi/${instrument}`
+        const midiData = await getMidiFile(jobId);
+        await loadFromMidi(instrument, midiData, {
+          tempo: metadata.tempo,
+          keySignature: metadata.key_signature,
+          timeSignature: metadata.time_signature,
+        });
+      }
+      // Set first instrument as active
+      if (transcribedInstruments.length > 0) {
+        setActiveInstrument(transcribedInstruments[0]);
+      }
       setLoading(false);
     } catch (err) {
       <div className="editor-toolbar">
         <h2>Score Editor</h2>
         <div className="toolbar-actions">
           <button onClick={handleExportMIDI}>Export MIDI</button>
         </div>
       </div>
+      <InstrumentTabs
+        instruments={instruments}
+        activeInstrument={activeInstrument}
+        onInstrumentChange={setActiveInstrument}
+      />
       <PlaybackControls />
       <NotationCanvas />

frontend/src/store/notation.ts CHANGED Viewed

@@ -1,8 +1,8 @@
 /**
  * Zustand store for notation state management.
  */
 import { create } from 'zustand';
-import { parseMusicXML } from '../utils/musicxml-parser';
 import { parseMidiFile, assignChordIds } from '../utils/midi-parser';
 export interface Note {
@@ -42,15 +42,22 @@ export interface Score {
 }
 interface NotationState {
   score: Score | null;
   selectedNoteIds: string[];
   currentTool: 'select' | 'add' | 'delete';
   currentDuration: string;
   playingNoteIds: string[]; // Notes currently being played (for visual feedback)
   // Actions
-  loadFromMusicXML: (xml: string) => void;
   loadFromMidi: (
     midiData: ArrayBuffer,
     metadata?: {
       tempo?: number;
@@ -58,7 +65,7 @@ interface NotationState {
       timeSignature?: { numerator: number; denominator: number };
     }
   ) => Promise<void>;
-  exportToMusicXML: () => string;
   addNote: (measureId: string, note: Note) => void;
   deleteNote: (noteId: string) => void;
   updateNote: (noteId: string, changes: Partial<Note>) => void;
@@ -69,72 +76,90 @@ interface NotationState {
   setPlayingNoteIds: (noteIds: string[]) => void;
 }
-export const useNotationStore = create<NotationState>((set, _get) => ({
   score: null,
   selectedNoteIds: [],
   currentTool: 'select',
   currentDuration: 'quarter',
   playingNoteIds: [],
-  loadFromMusicXML: (xml: string) => {
-    try {
-      const score = parseMusicXML(xml);
-      set({ score });
-    } catch (error) {
-      console.error('Failed to parse MusicXML:', error);
-      // Fallback to empty score
-      set({
-        score: {
-          id: 'score-1',
-          title: 'Transcribed Score',
-          composer: 'Unknown',
-          key: 'C',
-          timeSignature: '4/4',
-          tempo: 120,
-          parts: [],
-          measures: [],
-        },
-      });
-    }
-  },
-  loadFromMidi: async (midiData, metadata) => {
     try {
       let score = await parseMidiFile(midiData, {
         tempo: metadata?.tempo,
         timeSignature: metadata?.timeSignature,
         keySignature: metadata?.keySignature,
-        splitAtMiddleC: true,
         middleCNote: 60,
       });
       // Assign chord IDs to simultaneous notes
       score = assignChordIds(score);
-      set({ score });
     } catch (error) {
       console.error('Failed to parse MIDI:', error);
-      // Fallback to empty score
       set({
-        score: {
-          id: 'score-1',
-          title: 'Transcribed Score',
-          composer: 'YourMT3+',
-          key: metadata?.keySignature || 'C',
-          timeSignature: metadata?.timeSignature
-            ? `${metadata.timeSignature.numerator}/${metadata.timeSignature.denominator}`
-            : '4/4',
-          tempo: metadata?.tempo || 120,
-          parts: [],
-          measures: [],
-        },
       });
     }
   },
-  exportToMusicXML: () => {
-    // TODO: Implement MusicXML generation
-    return '<?xml version="1.0"?><score-partwise></score-partwise>';
   },
   addNote: (measureId, note) =>

 /**
  * Zustand store for notation state management.
+ * Supports multi-instrument transcription.
  */
 import { create } from 'zustand';
 import { parseMidiFile, assignChordIds } from '../utils/midi-parser';
 export interface Note {
 }
 interface NotationState {
+  // Multi-instrument support
+  scores: Map<string, Score>; // instrument -> Score
+  activeInstrument: string; // Currently viewing instrument (e.g., 'piano', 'vocals')
+  availableInstruments: string[]; // All transcribed instruments
+  // Legacy single-score access (for backward compatibility)
   score: Score | null;
   selectedNoteIds: string[];
   currentTool: 'select' | 'add' | 'delete';
   currentDuration: string;
   playingNoteIds: string[]; // Notes currently being played (for visual feedback)
   // Actions
   loadFromMidi: (
+    instrument: string,
     midiData: ArrayBuffer,
     metadata?: {
       tempo?: number;
       timeSignature?: { numerator: number; denominator: number };
     }
   ) => Promise<void>;
+  setActiveInstrument: (instrument: string) => void;
   addNote: (measureId: string, note: Note) => void;
   deleteNote: (noteId: string) => void;
   updateNote: (noteId: string, changes: Partial<Note>) => void;
   setPlayingNoteIds: (noteIds: string[]) => void;
 }
+export const useNotationStore = create<NotationState>((set, get) => ({
+  // Multi-instrument state
+  scores: new Map(),
+  activeInstrument: 'piano',
+  availableInstruments: [],
+  // Legacy single-score (points to active instrument's score)
   score: null,
   selectedNoteIds: [],
   currentTool: 'select',
   currentDuration: 'quarter',
   playingNoteIds: [],
+  loadFromMidi: async (instrument, midiData, metadata) => {
     try {
       let score = await parseMidiFile(midiData, {
         tempo: metadata?.tempo,
         timeSignature: metadata?.timeSignature,
         keySignature: metadata?.keySignature,
+        splitAtMiddleC: instrument === 'piano', // Only split piano into grand staff
         middleCNote: 60,
       });
       // Assign chord IDs to simultaneous notes
       score = assignChordIds(score);
+      // Update scores map
+      const state = get();
+      const newScores = new Map(state.scores);
+      newScores.set(instrument, score);
+      // Update available instruments if this is a new one
+      const newAvailableInstruments = state.availableInstruments.includes(instrument)
+        ? state.availableInstruments
+        : [...state.availableInstruments, instrument];
+      set({
+        scores: newScores,
+        availableInstruments: newAvailableInstruments,
+        // Update legacy score if this is the active instrument
+        score: state.activeInstrument === instrument ? score : state.score,
+      });
     } catch (error) {
       console.error('Failed to parse MIDI:', error);
+      // Create fallback empty score
+      const emptyScore: Score = {
+        id: `score-${instrument}`,
+        title: 'Transcribed Score',
+        composer: 'YourMT3+',
+        key: metadata?.keySignature || 'C',
+        timeSignature: metadata?.timeSignature
+          ? `${metadata.timeSignature.numerator}/${metadata.timeSignature.denominator}`
+          : '4/4',
+        tempo: metadata?.tempo || 120,
+        parts: [],
+        measures: [],
+      };
+      const state = get();
+      const newScores = new Map(state.scores);
+      newScores.set(instrument, emptyScore);
+      const newAvailableInstruments = state.availableInstruments.includes(instrument)
+        ? state.availableInstruments
+        : [...state.availableInstruments, instrument];
       set({
+        scores: newScores,
+        availableInstruments: newAvailableInstruments,
+        score: state.activeInstrument === instrument ? emptyScore : state.score,
       });
     }
   },
+  setActiveInstrument: (instrument) => {
+    const state = get();
+    const instrumentScore = state.scores.get(instrument);
+    set({
+      activeInstrument: instrument,
+      score: instrumentScore || null,
+      selectedNoteIds: [], // Clear selection when switching instruments
+    });
   },
   addNote: (measureId, note) =>

frontend/src/utils/duration.ts ADDED Viewed

	@@ -0,0 +1,35 @@

+/**
+ * Duration conversion utilities for music notation.
+ */
+/**
+ * Convert note duration to seconds based on tempo.
+ *
+ * @param duration - Note duration type (whole, half, quarter, eighth, 16th, 32nd)
+ * @param tempo - Tempo in BPM
+ * @param dotted - Whether the note is dotted (increases duration by 50%)
+ * @returns Duration in seconds
+ */
+export function durationToSeconds(
+  duration: string,
+  tempo: number,
+  dotted: boolean = false
+): number {
+  // Quarter note duration at given tempo
+  const quarterNoteDuration = 60 / tempo;
+  // Map durations to quarter note multipliers
+  const durationMap: Record<string, number> = {
+    'whole': 4,
+    'half': 2,
+    'quarter': 1,
+    'eighth': 0.5,
+    '16th': 0.25,
+    '32nd': 0.125,
+  };
+  const baseDuration = durationMap[duration] || 1;
+  const multiplier = dotted ? 1.5 : 1;
+  return quarterNoteDuration * baseDuration * multiplier;
+}

frontend/src/utils/musicxml-parser.ts DELETED Viewed

@@ -1,275 +0,0 @@
-/**
- * Lightweight MusicXML parser for extracting notes and metadata.
- *
- * Supports grand staff with multiple parts (treble + bass for piano).
- */
-import type { Note, Score, Measure, Part } from '../store/notation';
-interface ParsedNote {
-  pitch: string;
-  octave: number;
-  duration: number; // in divisions
-  type: string; // whole, half, quarter, etc.
-  accidental?: string;
-  dotted: boolean;
-  isRest: boolean;
-}
-export function parseMusicXML(xml: string): Score {
-  const parser = new DOMParser();
-  const doc = parser.parseFromString(xml, 'text/xml');
-  // Extract metadata
-  const title = doc.querySelector('movement-title')?.textContent ||
-                doc.querySelector('work-title')?.textContent ||
-                'Untitled';
-  const composer = doc.querySelector('creator[type="composer"]')?.textContent || 'Unknown';
-  // Extract key signature
-  const fifths = doc.querySelector('key fifths')?.textContent;
-  const keyMap: Record<string, string> = {
-    '-7': 'Cb', '-6': 'Gb', '-5': 'Db', '-4': 'Ab', '-3': 'Eb', '-2': 'Bb', '-1': 'F',
-    '0': 'C', '1': 'G', '2': 'D', '3': 'A', '4': 'E', '5': 'B', '6': 'F#', '7': 'C#'
-  };
-  const key = fifths ? keyMap[fifths] || 'C' : 'C';
-  // Extract time signature
-  const beats = doc.querySelector('time beats')?.textContent || '4';
-  const beatType = doc.querySelector('time beat-type')?.textContent || '4';
-  const timeSignature = `${beats}/${beatType}`;
-  // Extract tempo
-  let tempo = 120;
-  const tempoElement = doc.querySelector('sound[tempo]');
-  if (tempoElement) {
-    const tempoAttr = tempoElement.getAttribute('tempo');
-    if (tempoAttr) {
-      tempo = parseInt(tempoAttr);
-    }
-  }
-  // Parse all parts (for grand staff: treble + bass)
-  const partElements = doc.querySelectorAll('score-partwise > part');
-  const parts: Part[] = [];
-  let allMeasures: Measure[] = []; // For backward compatibility
-  partElements.forEach((partEl, partIdx) => {
-    const partId = partEl.getAttribute('id') || `part-${partIdx}`;
-    // Get part name and clef
-    const partName = doc.querySelector(`score-part[id="${partId}"] part-name`)?.textContent || `Part ${partIdx + 1}`;
-    // Determine clef from first measure
-    const firstClefSign = partEl.querySelector('measure clef sign')?.textContent || 'G';
-    const clef: 'treble' | 'bass' = firstClefSign === 'F' ? 'bass' : 'treble';
-    const measureElements = partEl.querySelectorAll('measure');
-    const measures: Measure[] = [];
-    measureElements.forEach((measureEl, idx) => {
-    const measureNumber = parseInt(measureEl.getAttribute('number') || String(idx + 1));
-    const notes: Note[] = [];
-    const noteElements = measureEl.querySelectorAll('note');
-    let currentChord: Note[] = [];
-    let currentChordId: string | null = null;
-    noteElements.forEach((noteEl, noteIdx) => {
-      const parsedNote = parseNoteElement(noteEl);
-      if (!parsedNote) return;
-      // Check if this note is part of a chord (simultaneous with previous note)
-      const isChordMember = noteEl.querySelector('chord') !== null;
-      // Assign chord ID for chord grouping
-      if (!isChordMember) {
-        // Start new chord group (or single note)
-        currentChordId = `chord-${measureNumber}-${noteIdx}`;
-      }
-      if (parsedNote.isRest) {
-        // Flush any pending chord before adding rest
-        if (currentChord.length > 0) {
-          notes.push(...currentChord);
-          currentChord = [];
-        }
-        // Include rests (rests don't have chordId)
-        notes.push({
-          id: `note-${measureNumber}-${notes.length}`,
-          pitch: '',
-          duration: parsedNote.type,
-          octave: 0,
-          startTime: 0,
-          dotted: parsedNote.dotted,
-          isRest: true,
-          chordId: undefined, // Rests are never part of chords
-        });
-      } else {
-        // Build full pitch string for pitched notes
-        const pitchName = parsedNote.pitch +
-                         (parsedNote.accidental === 'sharp' ? '#' :
-                          parsedNote.accidental === 'flat' ? 'b' : '');
-        const fullPitch = pitchName + parsedNote.octave;
-        const note: Note = {
-          id: `note-${measureNumber}-${notes.length + currentChord.length}`,
-          pitch: fullPitch,
-          duration: parsedNote.type,
-          octave: parsedNote.octave,
-          startTime: 0,
-          dotted: parsedNote.dotted,
-          accidental: parsedNote.accidental as 'sharp' | 'flat' | 'natural' | undefined,
-          isRest: false,
-          chordId: currentChordId || undefined, // Assign chord ID for grouping
-        };
-        if (isChordMember) {
-          // Add to current chord group
-          currentChord.push(note);
-        } else {
-          // Flush previous chord if any
-          if (currentChord.length > 0) {
-            notes.push(...currentChord);
-            currentChord = [];
-          }
-          // Start new chord group (or single note)
-          currentChord = [note];
-        }
-      }
-    });
-    // Flush any remaining chord
-    if (currentChord.length > 0) {
-      notes.push(...currentChord);
-    }
-      // Add ALL measures, even if empty (will show as blank measures)
-      measures.push({
-        id: `part-${partIdx}-measure-${measureNumber}`,
-        number: measureNumber,
-        notes,
-      });
-    });
-    // Add this part to the parts array
-    parts.push({
-      id: partId,
-      name: partName,
-      clef,
-      measures,
-    });
-    // For backward compatibility, use first part's measures
-    if (partIdx === 0) {
-      allMeasures = measures;
-    }
-  });
-  // If no parts found, return empty score
-  if (parts.length === 0) {
-    parts.push({
-      id: 'part-0',
-      name: 'Piano',
-      clef: 'treble',
-      measures: [],
-    });
-  }
-  return {
-    id: 'parsed-score',
-    title,
-    composer,
-    key,
-    timeSignature,
-    tempo,
-    parts,
-    measures: allMeasures, // Legacy field for backward compat
-  };
-}
-function parseNoteElement(noteEl: Element): ParsedNote | null {
-  const durationEl = noteEl.querySelector('duration');
-  const typeEl = noteEl.querySelector('type');
-  if (!durationEl || !typeEl) return null;
-  // Check if this is a rest
-  const isRest = noteEl.querySelector('rest') !== null;
-  if (isRest) {
-    return {
-      pitch: '',
-      octave: 0,
-      duration: parseInt(durationEl.textContent || '0'),
-      type: typeEl.textContent || 'quarter',
-      dotted: noteEl.querySelector('dot') !== null,
-      isRest: true,
-    };
-  }
-  // Parse pitched note
-  const pitchEl = noteEl.querySelector('pitch');
-  if (!pitchEl) return null;
-  const step = pitchEl.querySelector('step')?.textContent;
-  const octave = pitchEl.querySelector('octave')?.textContent;
-  const alter = pitchEl.querySelector('alter')?.textContent; // Semantic pitch alteration
-  const accidentalEl = noteEl.querySelector('accidental'); // Visual accidental display
-  const dotEl = noteEl.querySelector('dot');
-  if (!step || !octave) return null;
-  // Parse accidental from both <alter> (semantic) and <accidental> (visual) tags
-  let accidental: string | undefined;
-  // Priority 1: Use <alter> for pitch accuracy (indicates actual pitch)
-  if (alter) {
-    const alterValue = parseInt(alter);
-    if (alterValue === 1) accidental = 'sharp';
-    else if (alterValue === -1) accidental = 'flat';
-    else if (alterValue === 0) accidental = 'natural';
-  }
-  // Priority 2: If no <alter>, check <accidental> tag (visual notation)
-  if (!accidental && accidentalEl) {
-    const accType = accidentalEl.textContent;
-    if (accType === 'sharp') accidental = 'sharp';
-    else if (accType === 'flat') accidental = 'flat';
-    else if (accType === 'natural') accidental = 'natural';
-  }
-  return {
-    pitch: step,
-    octave: parseInt(octave),
-    duration: parseInt(durationEl.textContent || '0'),
-    type: typeEl.textContent || 'quarter',
-    accidental,
-    dotted: dotEl !== null,
-    isRest: false,
-  };
-}
-/**
- * Convert note duration string to seconds based on tempo.
- */
-export function durationToSeconds(duration: string, tempo: number, dotted: boolean = false): number {
-  const quarterNoteDuration = 60 / tempo; // seconds per quarter note
-  const durationMap: Record<string, number> = {
-    'whole': quarterNoteDuration * 4,
-    'half': quarterNoteDuration * 2,
-    'quarter': quarterNoteDuration,
-    'eighth': quarterNoteDuration / 2,
-    '16th': quarterNoteDuration / 4,
-    '32nd': quarterNoteDuration / 8,
-  };
-  let baseDuration = durationMap[duration] || quarterNoteDuration;
-  if (dotted) {
-    baseDuration *= 1.5;
-  }
-  return baseDuration;
-}