updated source separation
Browse files- backend/app_config.py +15 -8
- backend/audio_separator_wrapper.py +283 -0
- backend/evaluation/benchmark.py +32 -15
- backend/evaluation/results/yourmt3_midi/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.mid +0 -0
- backend/evaluation/results/yourmt3_results.csv +0 -2
- backend/evaluation/results/yourmt3_results.json +0 -18
- backend/pipeline.py +16 -669
- backend/requirements.txt +2 -4
- frontend/src/components/InstrumentSelector.css +77 -0
- frontend/src/components/InstrumentSelector.tsx +66 -0
- frontend/src/components/InstrumentTabs.css +91 -0
- frontend/src/components/InstrumentTabs.tsx +68 -0
- frontend/src/components/JobSubmission.css +9 -0
- frontend/src/components/JobSubmission.tsx +18 -2
- frontend/src/components/PlaybackControls.css +135 -15
- frontend/src/components/PlaybackControls.tsx +1 -1
- frontend/src/components/ScoreEditor.tsx +40 -13
- frontend/src/store/notation.ts +70 -45
- frontend/src/utils/duration.ts +35 -0
- frontend/src/utils/musicxml-parser.ts +0 -275
backend/app_config.py
CHANGED
|
@@ -31,12 +31,13 @@ class Settings(BaseSettings):
|
|
| 31 |
gpu_enabled: bool = True
|
| 32 |
max_video_duration: int = 900 # 15 minutes
|
| 33 |
|
| 34 |
-
# Transcription Configuration (
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
| 40 |
|
| 41 |
# Tempo Detection Configuration
|
| 42 |
tempo_detection_duration: int = 60 # Seconds of audio to analyze
|
|
@@ -66,7 +67,7 @@ class Settings(BaseSettings):
|
|
| 66 |
|
| 67 |
# Feature Flags
|
| 68 |
enable_envelope_analysis: bool = True
|
| 69 |
-
enable_tie_notation: bool = True
|
| 70 |
|
| 71 |
# Phase 2: Zero-Tradeoff Solutions
|
| 72 |
# Python compatibility: madmom runtime patch enables Python 3.10+ support
|
|
@@ -74,11 +75,17 @@ class Settings(BaseSettings):
|
|
| 74 |
use_beat_synchronous_quantization: bool = True # Beat-aligned quantization (eliminates double quantization)
|
| 75 |
|
| 76 |
# Transcription Service Configuration
|
| 77 |
-
use_yourmt3_transcription: bool = True #
|
| 78 |
transcription_service_url: str = "http://localhost:8000" # Main API URL (YourMT3+ integrated)
|
| 79 |
transcription_service_timeout: int = 300 # Timeout for transcription requests (seconds)
|
| 80 |
yourmt3_device: str = _detect_device() # Auto-detect device: 'cuda' (NVIDIA), 'mps' (Apple Silicon), or 'cpu'
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
# Grand Staff Configuration
|
| 83 |
enable_grand_staff: bool = True # Split piano into treble + bass clefs
|
| 84 |
middle_c_split: int = 60 # MIDI note number for staff split (60 = Middle C)
|
|
|
|
| 31 |
gpu_enabled: bool = True
|
| 32 |
max_video_duration: int = 900 # 15 minutes
|
| 33 |
|
| 34 |
+
# Transcription Configuration (deprecated - kept for API compatibility)
|
| 35 |
+
# These were used by basic-pitch, which has been removed in favor of YourMT3+
|
| 36 |
+
onset_threshold: float = 0.3 # Deprecated
|
| 37 |
+
frame_threshold: float = 0.3 # Deprecated
|
| 38 |
+
minimum_note_length: int = 58 # Deprecated
|
| 39 |
+
minimum_frequency_hz: float = 65.0 # Deprecated
|
| 40 |
+
maximum_frequency_hz: float | None = None # Deprecated
|
| 41 |
|
| 42 |
# Tempo Detection Configuration
|
| 43 |
tempo_detection_duration: int = 60 # Seconds of audio to analyze
|
|
|
|
| 67 |
|
| 68 |
# Feature Flags
|
| 69 |
enable_envelope_analysis: bool = True
|
| 70 |
+
enable_tie_notation: bool = True # Deprecated (was only used by old generate_musicxml)
|
| 71 |
|
| 72 |
# Phase 2: Zero-Tradeoff Solutions
|
| 73 |
# Python compatibility: madmom runtime patch enables Python 3.10+ support
|
|
|
|
| 75 |
use_beat_synchronous_quantization: bool = True # Beat-aligned quantization (eliminates double quantization)
|
| 76 |
|
| 77 |
# Transcription Service Configuration
|
| 78 |
+
use_yourmt3_transcription: bool = True # Deprecated (always True now - YourMT3+ is only transcriber)
|
| 79 |
transcription_service_url: str = "http://localhost:8000" # Main API URL (YourMT3+ integrated)
|
| 80 |
transcription_service_timeout: int = 300 # Timeout for transcription requests (seconds)
|
| 81 |
yourmt3_device: str = _detect_device() # Auto-detect device: 'cuda' (NVIDIA), 'mps' (Apple Silicon), or 'cpu'
|
| 82 |
|
| 83 |
+
# Source Separation Configuration
|
| 84 |
+
use_two_stage_separation: bool = True # Use BS-RoFormer + Demucs for better quality (vs Demucs only)
|
| 85 |
+
transcribe_vocals: bool = True # Transcribe vocal melody as violin
|
| 86 |
+
vocal_instrument: int = 40 # MIDI program number for vocals (40=Violin, 73=Flute, 65=Alto Sax)
|
| 87 |
+
use_6stem_demucs: bool = True # Use 6-stem Demucs (piano, guitar, drums, bass, other) vs 4-stem
|
| 88 |
+
|
| 89 |
# Grand Staff Configuration
|
| 90 |
enable_grand_staff: bool = True # Split piano into treble + bass clefs
|
| 91 |
middle_c_split: int = 60 # MIDI note number for staff split (60 = Middle C)
|
backend/audio_separator_wrapper.py
ADDED
|
@@ -0,0 +1,283 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Audio Separator Wrapper
|
| 3 |
+
|
| 4 |
+
Provides a clean interface to audio-separator library for 2-stage source separation:
|
| 5 |
+
1. BS-RoFormer: Remove vocals (SOTA vocal/instrumental separation)
|
| 6 |
+
2. Demucs: Separate instrumental into piano/guitar/bass/drums/other
|
| 7 |
+
|
| 8 |
+
Based on: https://github.com/nomadkaraoke/python-audio-separator
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from typing import Dict, Optional
|
| 13 |
+
import subprocess
|
| 14 |
+
import shutil
|
| 15 |
+
import sys
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class AudioSeparator:
|
| 19 |
+
"""
|
| 20 |
+
Wrapper for audio-separator with support for multiple separation strategies.
|
| 21 |
+
|
| 22 |
+
Separation strategies:
|
| 23 |
+
1. Two-stage (vocal removal + instrument separation)
|
| 24 |
+
2. Direct piano isolation (Demucs 6-stem)
|
| 25 |
+
3. Legacy Demucs 4-stem (backwards compatibility)
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
def __init__(self, model_dir: Optional[Path] = None):
|
| 29 |
+
"""
|
| 30 |
+
Initialize audio separator.
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
model_dir: Directory to store downloaded models (default: ~/.audio-separator/)
|
| 34 |
+
"""
|
| 35 |
+
self.model_dir = model_dir or Path.home() / ".audio-separator"
|
| 36 |
+
self.model_dir.mkdir(parents=True, exist_ok=True)
|
| 37 |
+
|
| 38 |
+
def separate_vocals(
|
| 39 |
+
self,
|
| 40 |
+
audio_path: Path,
|
| 41 |
+
output_dir: Path,
|
| 42 |
+
model: str = "model_bs_roformer_ep_317_sdr_12.9755.ckpt"
|
| 43 |
+
) -> Dict[str, Path]:
|
| 44 |
+
"""
|
| 45 |
+
Separate vocals from instrumental using BS-RoFormer (SOTA).
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
audio_path: Input audio file
|
| 49 |
+
output_dir: Directory for output stems
|
| 50 |
+
model: BS-RoFormer model to use (default: best quality)
|
| 51 |
+
|
| 52 |
+
Returns:
|
| 53 |
+
Dict with keys: 'vocals', 'instrumental'
|
| 54 |
+
"""
|
| 55 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 56 |
+
|
| 57 |
+
# Use audio-separator CLI - find it relative to Python executable
|
| 58 |
+
python_bin = Path(sys.executable)
|
| 59 |
+
venv_bin = python_bin.parent
|
| 60 |
+
audio_separator_bin = venv_bin / "audio-separator"
|
| 61 |
+
|
| 62 |
+
# Fall back to PATH if not in venv
|
| 63 |
+
if not audio_separator_bin.exists():
|
| 64 |
+
audio_separator_bin = shutil.which("audio-separator") or "audio-separator"
|
| 65 |
+
else:
|
| 66 |
+
audio_separator_bin = str(audio_separator_bin)
|
| 67 |
+
|
| 68 |
+
# Convert to absolute path for audio-separator
|
| 69 |
+
audio_path_abs = audio_path.resolve()
|
| 70 |
+
|
| 71 |
+
cmd = [
|
| 72 |
+
audio_separator_bin,
|
| 73 |
+
str(audio_path_abs),
|
| 74 |
+
"-m", model,
|
| 75 |
+
"--output_dir", str(output_dir.resolve()),
|
| 76 |
+
"--output_format", "WAV"
|
| 77 |
+
]
|
| 78 |
+
|
| 79 |
+
if self.model_dir:
|
| 80 |
+
cmd.extend(["--model_file_dir", str(self.model_dir)])
|
| 81 |
+
|
| 82 |
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 83 |
+
|
| 84 |
+
# Debug: print stdout/stderr to see what happened
|
| 85 |
+
print(f" [DEBUG] audio-separator return code: {result.returncode}")
|
| 86 |
+
if result.stdout:
|
| 87 |
+
print(f" [DEBUG] stdout: {result.stdout[-1000:]}")
|
| 88 |
+
if result.stderr:
|
| 89 |
+
print(f" [DEBUG] stderr: {result.stderr[-1000:]}")
|
| 90 |
+
|
| 91 |
+
if result.returncode != 0:
|
| 92 |
+
error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
|
| 93 |
+
raise RuntimeError(f"BS-RoFormer vocal separation failed: {error_msg}")
|
| 94 |
+
|
| 95 |
+
# audio-separator creates files with model name appended
|
| 96 |
+
# Pattern: filename_(Vocals)_modelname.wav or filename_(Vocals).wav
|
| 97 |
+
|
| 98 |
+
# Check what files were actually created
|
| 99 |
+
if output_dir.exists():
|
| 100 |
+
actual_files = list(output_dir.glob("*.wav"))
|
| 101 |
+
print(f" [DEBUG] Files created in {output_dir}: {[f.name for f in actual_files]}")
|
| 102 |
+
|
| 103 |
+
# Find vocals and instrumental files by pattern matching
|
| 104 |
+
vocals_files = [f for f in actual_files if "Vocal" in f.name]
|
| 105 |
+
instrumental_files = [f for f in actual_files if "Instrumental" in f.name]
|
| 106 |
+
|
| 107 |
+
if vocals_files and instrumental_files:
|
| 108 |
+
vocals_path = vocals_files[0]
|
| 109 |
+
instrumental_path = instrumental_files[0]
|
| 110 |
+
print(f" ✓ Found vocals: {vocals_path.name}")
|
| 111 |
+
print(f" ✓ Found instrumental: {instrumental_path.name}")
|
| 112 |
+
else:
|
| 113 |
+
raise RuntimeError(f"Could not find output files. Found: {[f.name for f in actual_files]}")
|
| 114 |
+
else:
|
| 115 |
+
raise RuntimeError(f"Output directory {output_dir} does not exist")
|
| 116 |
+
|
| 117 |
+
return {
|
| 118 |
+
'vocals': vocals_path,
|
| 119 |
+
'instrumental': instrumental_path
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
def separate_instruments_demucs(
|
| 123 |
+
self,
|
| 124 |
+
audio_path: Path,
|
| 125 |
+
output_dir: Path,
|
| 126 |
+
stems: int = 6
|
| 127 |
+
) -> Dict[str, Path]:
|
| 128 |
+
"""
|
| 129 |
+
Separate instrumental audio into individual instruments using Demucs.
|
| 130 |
+
|
| 131 |
+
Args:
|
| 132 |
+
audio_path: Input audio file (should be instrumental, vocals already removed)
|
| 133 |
+
output_dir: Directory for output stems
|
| 134 |
+
stems: Number of stems (4 or 6)
|
| 135 |
+
4-stem: vocals, drums, bass, other
|
| 136 |
+
6-stem: vocals, drums, bass, guitar, piano, other
|
| 137 |
+
|
| 138 |
+
Returns:
|
| 139 |
+
Dict with stem names as keys and paths as values
|
| 140 |
+
"""
|
| 141 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 142 |
+
|
| 143 |
+
# Use Demucs directly for instrument separation
|
| 144 |
+
model = "htdemucs_6s" if stems == 6 else "htdemucs"
|
| 145 |
+
|
| 146 |
+
# Find demucs binary relative to Python executable
|
| 147 |
+
python_bin = Path(sys.executable)
|
| 148 |
+
venv_bin = python_bin.parent
|
| 149 |
+
demucs_bin = venv_bin / "demucs"
|
| 150 |
+
|
| 151 |
+
# Fall back to PATH if not in venv
|
| 152 |
+
if not demucs_bin.exists():
|
| 153 |
+
demucs_bin = shutil.which("demucs") or "demucs"
|
| 154 |
+
else:
|
| 155 |
+
demucs_bin = str(demucs_bin)
|
| 156 |
+
|
| 157 |
+
# Convert to absolute path for demucs
|
| 158 |
+
audio_path_abs = audio_path.resolve()
|
| 159 |
+
|
| 160 |
+
cmd = [
|
| 161 |
+
demucs_bin,
|
| 162 |
+
"-n", model,
|
| 163 |
+
"-o", str(output_dir.resolve()),
|
| 164 |
+
str(audio_path_abs)
|
| 165 |
+
]
|
| 166 |
+
|
| 167 |
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 168 |
+
|
| 169 |
+
if result.returncode != 0:
|
| 170 |
+
error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
|
| 171 |
+
raise RuntimeError(f"Demucs instrument separation failed: {error_msg}")
|
| 172 |
+
|
| 173 |
+
# Demucs creates: output_dir/model_name/audio_stem/*.wav
|
| 174 |
+
demucs_output = output_dir / model / audio_path.stem
|
| 175 |
+
|
| 176 |
+
if stems == 6:
|
| 177 |
+
stem_files = {
|
| 178 |
+
'vocals': demucs_output / "vocals.wav",
|
| 179 |
+
'drums': demucs_output / "drums.wav",
|
| 180 |
+
'bass': demucs_output / "bass.wav",
|
| 181 |
+
'guitar': demucs_output / "guitar.wav",
|
| 182 |
+
'piano': demucs_output / "piano.wav",
|
| 183 |
+
'other': demucs_output / "other.wav",
|
| 184 |
+
}
|
| 185 |
+
else:
|
| 186 |
+
stem_files = {
|
| 187 |
+
'vocals': demucs_output / "vocals.wav",
|
| 188 |
+
'drums': demucs_output / "drums.wav",
|
| 189 |
+
'bass': demucs_output / "bass.wav",
|
| 190 |
+
'other': demucs_output / "other.wav",
|
| 191 |
+
}
|
| 192 |
+
|
| 193 |
+
# Verify all expected stems exist
|
| 194 |
+
missing = [name for name, path in stem_files.items() if not path.exists()]
|
| 195 |
+
if missing:
|
| 196 |
+
raise RuntimeError(f"Missing expected stems: {missing}")
|
| 197 |
+
|
| 198 |
+
return stem_files
|
| 199 |
+
|
| 200 |
+
def two_stage_separation(
|
| 201 |
+
self,
|
| 202 |
+
audio_path: Path,
|
| 203 |
+
output_dir: Path,
|
| 204 |
+
instrument_stems: int = 6
|
| 205 |
+
) -> Dict[str, Path]:
|
| 206 |
+
"""
|
| 207 |
+
Two-stage separation for optimal quality:
|
| 208 |
+
1. Remove vocals with BS-RoFormer (SOTA vocal separation)
|
| 209 |
+
2. Separate clean instrumental with Demucs 6-stem (piano, guitar, drums, bass, other)
|
| 210 |
+
|
| 211 |
+
Args:
|
| 212 |
+
audio_path: Input audio file
|
| 213 |
+
output_dir: Directory for output stems
|
| 214 |
+
instrument_stems: Number of instrument stems (4 or 6)
|
| 215 |
+
|
| 216 |
+
Returns:
|
| 217 |
+
Dict with all stems: vocals, piano, guitar, drums, bass, other
|
| 218 |
+
"""
|
| 219 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 220 |
+
|
| 221 |
+
# Stage 1: Remove vocals with BS-RoFormer
|
| 222 |
+
print(" Stage 1: Separating vocals with BS-RoFormer...")
|
| 223 |
+
vocal_dir = output_dir / "stage1_vocals"
|
| 224 |
+
vocal_stems = self.separate_vocals(audio_path, vocal_dir)
|
| 225 |
+
|
| 226 |
+
# Stage 2: Separate instrumental with Demucs
|
| 227 |
+
print(f" Stage 2: Separating instruments with Demucs {instrument_stems}-stem...")
|
| 228 |
+
instrument_dir = output_dir / "stage2_instruments"
|
| 229 |
+
instrument_stems_dict = self.separate_instruments_demucs(
|
| 230 |
+
vocal_stems['instrumental'],
|
| 231 |
+
instrument_dir,
|
| 232 |
+
stems=instrument_stems
|
| 233 |
+
)
|
| 234 |
+
|
| 235 |
+
# Combine results (vocals from stage 1, instruments from stage 2)
|
| 236 |
+
all_stems = {
|
| 237 |
+
'vocals': vocal_stems['vocals'], # From BS-RoFormer (clean)
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
# Add all instrument stems except the duplicate vocals stem from Demucs
|
| 241 |
+
for name, path in instrument_stems_dict.items():
|
| 242 |
+
if name != 'vocals': # Skip Demucs vocals (we have better ones from BS-RoFormer)
|
| 243 |
+
all_stems[name] = path
|
| 244 |
+
|
| 245 |
+
print(f" ✓ 2-stage separation complete: {list(all_stems.keys())}")
|
| 246 |
+
|
| 247 |
+
return all_stems
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
if __name__ == "__main__":
|
| 251 |
+
# Test the separator
|
| 252 |
+
import argparse
|
| 253 |
+
|
| 254 |
+
parser = argparse.ArgumentParser(description="Test Audio Separator")
|
| 255 |
+
parser.add_argument("audio_file", type=str, help="Path to audio file")
|
| 256 |
+
parser.add_argument("--output", type=str, default="./output_stems",
|
| 257 |
+
help="Output directory for stems")
|
| 258 |
+
parser.add_argument("--mode", type=str, default="two-stage",
|
| 259 |
+
choices=["vocals", "instruments", "two-stage"],
|
| 260 |
+
help="Separation mode")
|
| 261 |
+
args = parser.parse_args()
|
| 262 |
+
|
| 263 |
+
separator = AudioSeparator()
|
| 264 |
+
audio_path = Path(args.audio_file)
|
| 265 |
+
output_dir = Path(args.output)
|
| 266 |
+
|
| 267 |
+
if args.mode == "vocals":
|
| 268 |
+
stems = separator.separate_vocals(audio_path, output_dir)
|
| 269 |
+
print(f"Vocal separation complete:")
|
| 270 |
+
for name, path in stems.items():
|
| 271 |
+
print(f" {name}: {path}")
|
| 272 |
+
|
| 273 |
+
elif args.mode == "instruments":
|
| 274 |
+
stems = separator.separate_instruments_demucs(audio_path, output_dir, stems=6)
|
| 275 |
+
print(f"Instrument separation complete:")
|
| 276 |
+
for name, path in stems.items():
|
| 277 |
+
print(f" {name}: {path}")
|
| 278 |
+
|
| 279 |
+
elif args.mode == "two-stage":
|
| 280 |
+
stems = separator.two_stage_separation(audio_path, output_dir, instrument_stems=6)
|
| 281 |
+
print(f"2-stage separation complete:")
|
| 282 |
+
for name, path in stems.items():
|
| 283 |
+
print(f" {name}: {path}")
|
backend/evaluation/benchmark.py
CHANGED
|
@@ -10,6 +10,12 @@ from dataclasses import dataclass, asdict
|
|
| 10 |
from pathlib import Path
|
| 11 |
from typing import List, Dict, Optional
|
| 12 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
from evaluation.metrics import calculate_metrics, TranscriptionMetrics
|
| 15 |
|
|
@@ -19,7 +25,7 @@ class TestCase:
|
|
| 19 |
"""Represents a single test case for benchmarking."""
|
| 20 |
name: str # Descriptive name (e.g., "Chopin_Nocturne_Op9_No2")
|
| 21 |
audio_path: Path # Path to audio file (WAV/MP3)
|
| 22 |
-
ground_truth_midi: Path # Path to ground truth MIDI file
|
| 23 |
genre: str = "classical" # Genre: classical, pop, jazz, simple
|
| 24 |
difficulty: str = "medium" # Difficulty: easy, medium, hard
|
| 25 |
duration: Optional[float] = None # Duration in seconds
|
|
@@ -29,7 +35,7 @@ class TestCase:
|
|
| 29 |
return {
|
| 30 |
'name': self.name,
|
| 31 |
'audio_path': str(self.audio_path),
|
| 32 |
-
'ground_truth_midi': str(self.ground_truth_midi),
|
| 33 |
'genre': self.genre,
|
| 34 |
'difficulty': self.difficulty,
|
| 35 |
'duration': self.duration
|
|
@@ -38,10 +44,11 @@ class TestCase:
|
|
| 38 |
@classmethod
|
| 39 |
def from_dict(cls, data: dict) -> 'TestCase':
|
| 40 |
"""Create TestCase from dictionary."""
|
|
|
|
| 41 |
return cls(
|
| 42 |
name=data['name'],
|
| 43 |
audio_path=Path(data['audio_path']),
|
| 44 |
-
ground_truth_midi=Path(
|
| 45 |
genre=data.get('genre', 'classical'),
|
| 46 |
difficulty=data.get('difficulty', 'medium'),
|
| 47 |
duration=data.get('duration')
|
|
@@ -138,18 +145,28 @@ class TranscriptionBenchmark:
|
|
| 138 |
|
| 139 |
print(f"✅ Transcription completed in {processing_time:.1f}s")
|
| 140 |
|
| 141 |
-
# Calculate metrics
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
return BenchmarkResult(
|
| 155 |
test_case_name=test_case.name,
|
|
|
|
| 10 |
from pathlib import Path
|
| 11 |
from typing import List, Dict, Optional
|
| 12 |
import pandas as pd
|
| 13 |
+
import sys
|
| 14 |
+
|
| 15 |
+
# Add backend directory to path for imports
|
| 16 |
+
backend_dir = Path(__file__).parent.parent
|
| 17 |
+
if str(backend_dir) not in sys.path:
|
| 18 |
+
sys.path.insert(0, str(backend_dir))
|
| 19 |
|
| 20 |
from evaluation.metrics import calculate_metrics, TranscriptionMetrics
|
| 21 |
|
|
|
|
| 25 |
"""Represents a single test case for benchmarking."""
|
| 26 |
name: str # Descriptive name (e.g., "Chopin_Nocturne_Op9_No2")
|
| 27 |
audio_path: Path # Path to audio file (WAV/MP3)
|
| 28 |
+
ground_truth_midi: Optional[Path] = None # Path to ground truth MIDI file (None for manual review)
|
| 29 |
genre: str = "classical" # Genre: classical, pop, jazz, simple
|
| 30 |
difficulty: str = "medium" # Difficulty: easy, medium, hard
|
| 31 |
duration: Optional[float] = None # Duration in seconds
|
|
|
|
| 35 |
return {
|
| 36 |
'name': self.name,
|
| 37 |
'audio_path': str(self.audio_path),
|
| 38 |
+
'ground_truth_midi': str(self.ground_truth_midi) if self.ground_truth_midi else None,
|
| 39 |
'genre': self.genre,
|
| 40 |
'difficulty': self.difficulty,
|
| 41 |
'duration': self.duration
|
|
|
|
| 44 |
@classmethod
|
| 45 |
def from_dict(cls, data: dict) -> 'TestCase':
|
| 46 |
"""Create TestCase from dictionary."""
|
| 47 |
+
ground_truth = data.get('ground_truth_midi')
|
| 48 |
return cls(
|
| 49 |
name=data['name'],
|
| 50 |
audio_path=Path(data['audio_path']),
|
| 51 |
+
ground_truth_midi=Path(ground_truth) if ground_truth else None,
|
| 52 |
genre=data.get('genre', 'classical'),
|
| 53 |
difficulty=data.get('difficulty', 'medium'),
|
| 54 |
duration=data.get('duration')
|
|
|
|
| 145 |
|
| 146 |
print(f"✅ Transcription completed in {processing_time:.1f}s")
|
| 147 |
|
| 148 |
+
# Calculate metrics only if ground truth is available
|
| 149 |
+
if test_case.ground_truth_midi:
|
| 150 |
+
metrics = calculate_metrics(
|
| 151 |
+
predicted_midi,
|
| 152 |
+
test_case.ground_truth_midi,
|
| 153 |
+
onset_tolerance=self.onset_tolerance
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
print(f"\n📊 Results:")
|
| 157 |
+
print(f" F1 Score: {metrics.f1_score:.3f}")
|
| 158 |
+
print(f" Precision: {metrics.precision:.3f}")
|
| 159 |
+
print(f" Recall: {metrics.recall:.3f}")
|
| 160 |
+
print(f" Onset MAE: {metrics.onset_mae*1000:.1f}ms")
|
| 161 |
+
else:
|
| 162 |
+
# No ground truth - create placeholder metrics for manual review
|
| 163 |
+
print(f"\n📝 No ground truth available - MIDI saved for manual review")
|
| 164 |
+
print(f" Output: {predicted_midi}")
|
| 165 |
+
metrics = TranscriptionMetrics(
|
| 166 |
+
precision=0.0, recall=0.0, f1_score=0.0,
|
| 167 |
+
onset_mae=0.0, pitch_accuracy=0.0,
|
| 168 |
+
true_positives=0, false_positives=0, false_negatives=0
|
| 169 |
+
)
|
| 170 |
|
| 171 |
return BenchmarkResult(
|
| 172 |
test_case_name=test_case.name,
|
backend/evaluation/results/yourmt3_midi/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.mid
DELETED
|
Binary file (56.9 kB)
|
|
|
backend/evaluation/results/yourmt3_results.csv
DELETED
|
@@ -1,2 +0,0 @@
|
|
| 1 |
-
test_case,genre,difficulty,f1_score,precision,recall,onset_mae,pitch_accuracy,true_positives,false_positives,false_negatives,processing_time,success,error
|
| 2 |
-
MAESTRO_2004_AUDIO 02 R1 2004 05 Track05 wav,classical,easy,0.9831072434092655,0.9932764416860616,0.9731441601216113,0.005870731363360242,1.0,7682,52,212,114.05283236503601,True,
|
|
|
|
|
|
|
|
|
backend/evaluation/results/yourmt3_results.json
DELETED
|
@@ -1,18 +0,0 @@
|
|
| 1 |
-
[
|
| 2 |
-
{
|
| 3 |
-
"test_case": "MAESTRO_2004_AUDIO 02 R1 2004 05 Track05 wav",
|
| 4 |
-
"genre": "classical",
|
| 5 |
-
"difficulty": "easy",
|
| 6 |
-
"f1_score": 0.9831072434092655,
|
| 7 |
-
"precision": 0.9932764416860616,
|
| 8 |
-
"recall": 0.9731441601216113,
|
| 9 |
-
"onset_mae": 0.005870731363360242,
|
| 10 |
-
"pitch_accuracy": 1.0,
|
| 11 |
-
"true_positives": 7682,
|
| 12 |
-
"false_positives": 52,
|
| 13 |
-
"false_negatives": 212,
|
| 14 |
-
"processing_time": 114.05283236503601,
|
| 15 |
-
"success": true,
|
| 16 |
-
"error": null
|
| 17 |
-
}
|
| 18 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
backend/pipeline.py
CHANGED
|
@@ -11,8 +11,7 @@ from typing import Optional
|
|
| 11 |
import mido
|
| 12 |
import librosa
|
| 13 |
import numpy as np
|
| 14 |
-
|
| 15 |
-
from basic_pitch import ICASSP_2022_MODEL_PATH
|
| 16 |
from music21 import converter, key, meter, tempo, note, clef, stream, chord as m21_chord
|
| 17 |
|
| 18 |
# Phase 2: Zero-tradeoff solutions with Python 3.10+ compatibility patch
|
|
@@ -91,13 +90,9 @@ class TranscriptionPipeline:
|
|
| 91 |
self.final_midi_path = midi_path
|
| 92 |
|
| 93 |
self.progress(90, "musicxml", "Generating MusicXML")
|
| 94 |
-
# Use minimal
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
musicxml_path = self.generate_musicxml_minimal(midi_path, stems['other'])
|
| 98 |
-
else:
|
| 99 |
-
print(f" Using full MusicXML generation (basic-pitch)")
|
| 100 |
-
musicxml_path = self.generate_musicxml(midi_path)
|
| 101 |
|
| 102 |
self.progress(100, "complete", "Transcription complete")
|
| 103 |
return musicxml_path
|
|
@@ -179,75 +174,23 @@ class TranscriptionPipeline:
|
|
| 179 |
minimum_note_length: int = None
|
| 180 |
) -> Path:
|
| 181 |
"""
|
| 182 |
-
Transcribe audio to MIDI using
|
| 183 |
|
| 184 |
Args:
|
| 185 |
audio_path: Path to audio file (should be 'other' stem for piano)
|
| 186 |
-
onset_threshold:
|
| 187 |
-
frame_threshold:
|
| 188 |
-
minimum_note_length:
|
| 189 |
|
| 190 |
Returns:
|
| 191 |
Path to generated MIDI file
|
| 192 |
"""
|
| 193 |
-
# Use config defaults if not specified
|
| 194 |
-
if onset_threshold is None:
|
| 195 |
-
onset_threshold = self.config.onset_threshold
|
| 196 |
-
if frame_threshold is None:
|
| 197 |
-
frame_threshold = self.config.frame_threshold
|
| 198 |
-
if minimum_note_length is None:
|
| 199 |
-
minimum_note_length = self.config.minimum_note_length
|
| 200 |
-
|
| 201 |
output_dir = self.temp_dir
|
| 202 |
|
| 203 |
-
#
|
| 204 |
-
|
| 205 |
-
midi_path =
|
| 206 |
-
|
| 207 |
-
if use_yourmt3:
|
| 208 |
-
try:
|
| 209 |
-
print(f" Transcribing with YourMT3+ (primary transcriber)...")
|
| 210 |
-
midi_path = self.transcribe_with_yourmt3(audio_path)
|
| 211 |
-
print(f" ✓ YourMT3+ transcription complete")
|
| 212 |
-
except Exception as e:
|
| 213 |
-
import traceback
|
| 214 |
-
print(f" ⚠ YourMT3+ failed: {e}")
|
| 215 |
-
print(f" Full error: {traceback.format_exc()}")
|
| 216 |
-
print(f" → Falling back to basic-pitch")
|
| 217 |
-
midi_path = None
|
| 218 |
-
|
| 219 |
-
# === STEP 2: Fallback to basic-pitch if YourMT3+ failed or disabled ===
|
| 220 |
-
if midi_path is None:
|
| 221 |
-
print(f" Transcribing with basic-pitch (onset={onset_threshold}, frame={frame_threshold})...")
|
| 222 |
-
|
| 223 |
-
# Run basic-pitch inference
|
| 224 |
-
# predict_and_save creates output files in the output directory
|
| 225 |
-
predict_and_save(
|
| 226 |
-
audio_path_list=[str(audio_path)],
|
| 227 |
-
output_directory=str(output_dir),
|
| 228 |
-
save_midi=True,
|
| 229 |
-
sonify_midi=False, # Don't create audio
|
| 230 |
-
save_model_outputs=False, # Don't save raw outputs
|
| 231 |
-
save_notes=False, # Don't save CSV
|
| 232 |
-
model_or_model_path=ICASSP_2022_MODEL_PATH,
|
| 233 |
-
onset_threshold=onset_threshold,
|
| 234 |
-
frame_threshold=frame_threshold,
|
| 235 |
-
minimum_note_length=minimum_note_length,
|
| 236 |
-
minimum_frequency=self.config.minimum_frequency_hz, # Filter low-frequency noise (F1)
|
| 237 |
-
maximum_frequency=self.config.maximum_frequency_hz, # No upper limit
|
| 238 |
-
multiple_pitch_bends=False,
|
| 239 |
-
melodia_trick=True, # Improves monophonic melody
|
| 240 |
-
debug_file=None
|
| 241 |
-
)
|
| 242 |
-
|
| 243 |
-
# basic-pitch saves as {audio_stem}_basic_pitch.mid
|
| 244 |
-
generated_bp_midi = output_dir / f"{audio_path.stem}_basic_pitch.mid"
|
| 245 |
-
|
| 246 |
-
if not generated_bp_midi.exists():
|
| 247 |
-
raise RuntimeError("basic-pitch did not create MIDI file")
|
| 248 |
-
|
| 249 |
-
midi_path = generated_bp_midi
|
| 250 |
-
print(f" ✓ basic-pitch transcription complete")
|
| 251 |
|
| 252 |
# Rename final MIDI to standard name for post-processing
|
| 253 |
final_midi_path = output_dir / "piano.mid"
|
|
@@ -1094,163 +1037,6 @@ class TranscriptionPipeline:
|
|
| 1094 |
|
| 1095 |
return midi_path
|
| 1096 |
|
| 1097 |
-
def generate_musicxml(self, midi_path: Path) -> Path:
|
| 1098 |
-
"""
|
| 1099 |
-
Convert MIDI to MusicXML with intelligent metadata detection and normalization.
|
| 1100 |
-
|
| 1101 |
-
New pipeline order (optimized):
|
| 1102 |
-
1. Detect metadata from audio (tempo, time signature)
|
| 1103 |
-
2. Parse MIDI
|
| 1104 |
-
3. Detect key (ensemble)
|
| 1105 |
-
4. Insert metadata
|
| 1106 |
-
5. Deduplicate overlapping notes
|
| 1107 |
-
6. Add clef
|
| 1108 |
-
7. makeMeasures()
|
| 1109 |
-
8. Normalize measure durations
|
| 1110 |
-
9. Validate measures
|
| 1111 |
-
10. Export MusicXML
|
| 1112 |
-
|
| 1113 |
-
Args:
|
| 1114 |
-
midi_path: Path to input MIDI file
|
| 1115 |
-
|
| 1116 |
-
Returns:
|
| 1117 |
-
Path to output MusicXML file
|
| 1118 |
-
"""
|
| 1119 |
-
self.progress(92, "musicxml", "Detecting metadata from audio")
|
| 1120 |
-
|
| 1121 |
-
# Step 1: Detect metadata from audio BEFORE parsing MIDI
|
| 1122 |
-
audio_path = self.temp_dir / "audio.wav"
|
| 1123 |
-
|
| 1124 |
-
if audio_path.exists():
|
| 1125 |
-
# Detect tempo
|
| 1126 |
-
detected_tempo, tempo_confidence = self.detect_tempo_from_audio(audio_path)
|
| 1127 |
-
|
| 1128 |
-
# Detect time signature (needs tempo)
|
| 1129 |
-
time_sig_num, time_sig_denom, ts_confidence = self.detect_time_signature(
|
| 1130 |
-
audio_path, detected_tempo
|
| 1131 |
-
)
|
| 1132 |
-
else:
|
| 1133 |
-
# Fallback if audio not available
|
| 1134 |
-
print(" WARNING: Audio file not found, using defaults")
|
| 1135 |
-
detected_tempo, tempo_confidence = 120.0, 0.0
|
| 1136 |
-
time_sig_num, time_sig_denom, ts_confidence = 4, 4, 0.0
|
| 1137 |
-
|
| 1138 |
-
self.progress(93, "musicxml", "Parsing MIDI")
|
| 1139 |
-
|
| 1140 |
-
# Step 2: Parse MIDI
|
| 1141 |
-
score = converter.parse(midi_path)
|
| 1142 |
-
|
| 1143 |
-
self.progress(94, "musicxml", "Detecting key signature")
|
| 1144 |
-
|
| 1145 |
-
# Step 3: Detect key using ensemble methods
|
| 1146 |
-
detected_key, key_confidence = self.detect_key_ensemble(score, audio_path)
|
| 1147 |
-
|
| 1148 |
-
self.progress(95, "musicxml", "Deduplicating overlapping notes")
|
| 1149 |
-
|
| 1150 |
-
# Step 4: Deduplicate overlapping notes (prevent polyphony issues)
|
| 1151 |
-
score = self._deduplicate_overlapping_notes(score)
|
| 1152 |
-
|
| 1153 |
-
# Step 4.5: Merge sequential notes at music21 level (fixes Issue #8 - tiny rests)
|
| 1154 |
-
# This fixes tiny rests from MIDI→music21 precision loss
|
| 1155 |
-
# Increased from 0.02 to 0.08 to catch gaps created by quantization (125ms at 120 BPM)
|
| 1156 |
-
self.progress(95, "musicxml", "Merging sequential notes")
|
| 1157 |
-
score = self._merge_music21_notes(score, gap_threshold_qn=0.08)
|
| 1158 |
-
|
| 1159 |
-
# Step 5: Clean up any very short durations BEFORE makeMeasures
|
| 1160 |
-
# This prevents music21 from creating impossible tuplets
|
| 1161 |
-
for part in score.parts:
|
| 1162 |
-
for element in part.flatten().notesAndRests:
|
| 1163 |
-
if element.quarterLength < 0.0625: # Shorter than 64th note
|
| 1164 |
-
element.quarterLength = 0.0625 # Round up to 64th note
|
| 1165 |
-
|
| 1166 |
-
self.progress(96, "musicxml", "Creating measures")
|
| 1167 |
-
|
| 1168 |
-
# Step 6: Create measures FIRST (required before grand staff split)
|
| 1169 |
-
score = score.makeMeasures()
|
| 1170 |
-
|
| 1171 |
-
# Step 7: Split into grand staff (treble + bass clefs) if enabled
|
| 1172 |
-
if self.config.enable_grand_staff:
|
| 1173 |
-
print(f" Splitting into grand staff (split at MIDI note {self.config.middle_c_split})...")
|
| 1174 |
-
score = self._split_into_grand_staff(score)
|
| 1175 |
-
print(f" Created {len(score.parts)} staves (treble + bass)")
|
| 1176 |
-
|
| 1177 |
-
# Insert metadata into each part (grand staff creates new parts without metadata)
|
| 1178 |
-
for part in score.parts:
|
| 1179 |
-
# Get the first measure
|
| 1180 |
-
measures = part.getElementsByClass('Measure')
|
| 1181 |
-
if measures:
|
| 1182 |
-
first_measure = measures[0]
|
| 1183 |
-
# Insert key, time signature, and tempo into first measure
|
| 1184 |
-
first_measure.insert(0, tempo.MetronomeMark(number=detected_tempo))
|
| 1185 |
-
first_measure.insert(0, detected_key)
|
| 1186 |
-
first_measure.insert(0, meter.TimeSignature(f'{time_sig_num}/{time_sig_denom}'))
|
| 1187 |
-
else:
|
| 1188 |
-
# Single staff: add treble clef and metadata
|
| 1189 |
-
for part in score.parts:
|
| 1190 |
-
part.insert(0, clef.TrebleClef())
|
| 1191 |
-
part.insert(0, detected_key)
|
| 1192 |
-
part.insert(0, meter.TimeSignature(f'{time_sig_num}/{time_sig_denom}'))
|
| 1193 |
-
part.insert(0, tempo.MetronomeMark(number=detected_tempo))
|
| 1194 |
-
part.partName = "Piano"
|
| 1195 |
-
|
| 1196 |
-
# Step 7.5: Add tie notation for sustained notes across measure boundaries
|
| 1197 |
-
if self.config.enable_tie_notation:
|
| 1198 |
-
print(" Adding ties for sustained notes...")
|
| 1199 |
-
score = self._add_ties_to_score(score)
|
| 1200 |
-
|
| 1201 |
-
self.progress(97, "musicxml", "Normalizing measure durations")
|
| 1202 |
-
|
| 1203 |
-
# Step 8: Remove impossible durations that makeMeasures created
|
| 1204 |
-
score = self._remove_impossible_durations(score)
|
| 1205 |
-
|
| 1206 |
-
# Step 9: Fix tuplets with impossible durations
|
| 1207 |
-
score = self._fix_tuplet_durations(score)
|
| 1208 |
-
|
| 1209 |
-
# Step 10: Normalize measure durations
|
| 1210 |
-
score = self._normalize_measure_durations(score, time_sig_num, time_sig_denom)
|
| 1211 |
-
|
| 1212 |
-
# Step 10.5: Fix any NEW impossible tuplets created during normalization
|
| 1213 |
-
# Normalization might add rests that music21 assigns tuplets to
|
| 1214 |
-
score = self._fix_tuplet_durations(score)
|
| 1215 |
-
|
| 1216 |
-
# Step 11: Validate measures (logging only)
|
| 1217 |
-
self._validate_measures(score)
|
| 1218 |
-
|
| 1219 |
-
self.progress(98, "musicxml", "Writing MusicXML file")
|
| 1220 |
-
|
| 1221 |
-
# Write MusicXML with proper error handling
|
| 1222 |
-
output_path = self.temp_dir / f"{self.job_id}.musicxml"
|
| 1223 |
-
|
| 1224 |
-
try:
|
| 1225 |
-
# Use makeNotation=False to prevent music21 from auto-generating tuplets
|
| 1226 |
-
score.write('musicxml', fp=str(output_path), makeNotation=False)
|
| 1227 |
-
except Exception as e:
|
| 1228 |
-
error_msg = str(e)
|
| 1229 |
-
|
| 1230 |
-
# If still getting 2048th note errors after our normalization,
|
| 1231 |
-
# it means music21 is creating them during export (not our fault)
|
| 1232 |
-
if 'Cannot convert "2048th" duration to MusicXML' in error_msg or \
|
| 1233 |
-
'Cannot convert "4096th" duration to MusicXML' in error_msg:
|
| 1234 |
-
|
| 1235 |
-
print(f" ERROR: music21 generated impossible duration during export: {error_msg}")
|
| 1236 |
-
print(f" This is a music21 bug. Try re-running with different tempo/time signature.")
|
| 1237 |
-
|
| 1238 |
-
# Last resort: try exporting as MIDI instead
|
| 1239 |
-
midi_fallback = self.temp_dir / f"{self.job_id}_fallback.mid"
|
| 1240 |
-
score.write('midi', fp=str(midi_fallback))
|
| 1241 |
-
print(f" Created fallback MIDI export: {midi_fallback}")
|
| 1242 |
-
|
| 1243 |
-
raise RuntimeError(
|
| 1244 |
-
f"MusicXML export failed due to music21 bug. "
|
| 1245 |
-
f"MIDI fallback created at {midi_fallback}. "
|
| 1246 |
-
f"Original error: {error_msg}"
|
| 1247 |
-
)
|
| 1248 |
-
else:
|
| 1249 |
-
# Different error, re-raise
|
| 1250 |
-
raise
|
| 1251 |
-
|
| 1252 |
-
return output_path
|
| 1253 |
-
|
| 1254 |
def generate_musicxml_minimal(self, midi_path: Path, source_audio: Path) -> Path:
|
| 1255 |
"""
|
| 1256 |
Generate MusicXML from clean MIDI (YourMT3+ output) with minimal post-processing.
|
|
@@ -1387,298 +1173,9 @@ class TranscriptionPipeline:
|
|
| 1387 |
print(f" ✓ MusicXML generation complete")
|
| 1388 |
return output_path
|
| 1389 |
|
| 1390 |
-
def _deduplicate_overlapping_notes(self, score) -> stream.Score:
|
| 1391 |
-
"""
|
| 1392 |
-
Deduplicate overlapping notes from basic-pitch to prevent MusicXML corruption.
|
| 1393 |
-
|
| 1394 |
-
Problem: basic-pitch outputs multiple notes at the same timestamp for polyphonic detection.
|
| 1395 |
-
When music21's makeMeasures() processes these, it creates measures with >4.0 beats.
|
| 1396 |
-
|
| 1397 |
-
Solution: Group simultaneous notes (within 10ms) into chords, merge duplicate pitches.
|
| 1398 |
-
|
| 1399 |
-
Args:
|
| 1400 |
-
score: music21 Score object before makeMeasures()
|
| 1401 |
-
|
| 1402 |
-
Returns:
|
| 1403 |
-
Cleaned score with deduplicated notes
|
| 1404 |
-
"""
|
| 1405 |
-
from music21 import stream, note, chord as m21_chord
|
| 1406 |
-
from collections import defaultdict
|
| 1407 |
-
|
| 1408 |
-
# Process each part
|
| 1409 |
-
for part in score.parts:
|
| 1410 |
-
# Collect all notes with their absolute offsets
|
| 1411 |
-
notes_by_time = defaultdict(list) # bucket -> [notes]
|
| 1412 |
-
|
| 1413 |
-
for element in part.flatten().notesAndRests:
|
| 1414 |
-
if isinstance(element, note.Rest):
|
| 1415 |
-
continue # Skip rests for deduplication
|
| 1416 |
-
|
| 1417 |
-
# Get absolute offset in quarter notes
|
| 1418 |
-
offset_qn = element.offset
|
| 1419 |
-
|
| 1420 |
-
# Bucket notes that are within 0.005 quarter notes of each other (~5ms at 120 BPM)
|
| 1421 |
-
# Finer resolution prevents chord notes from splitting into separate buckets
|
| 1422 |
-
bucket = round(offset_qn / 0.005) * 0.005
|
| 1423 |
-
|
| 1424 |
-
if isinstance(element, note.Note):
|
| 1425 |
-
notes_by_time[bucket].append(element)
|
| 1426 |
-
elif isinstance(element, m21_chord.Chord):
|
| 1427 |
-
# Explode chords into individual notes for deduplication
|
| 1428 |
-
for pitch in element.pitches:
|
| 1429 |
-
n = note.Note(pitch)
|
| 1430 |
-
n.quarterLength = element.quarterLength
|
| 1431 |
-
n.offset = element.offset
|
| 1432 |
-
notes_by_time[bucket].append(n)
|
| 1433 |
-
|
| 1434 |
-
# Rebuild part with deduplicated notes
|
| 1435 |
-
new_part = stream.Part()
|
| 1436 |
-
|
| 1437 |
-
# Copy metadata (key, tempo, time signature will be added later)
|
| 1438 |
-
new_part.id = part.id
|
| 1439 |
-
new_part.partName = part.partName
|
| 1440 |
-
|
| 1441 |
-
for bucket_qn in sorted(notes_by_time.keys()):
|
| 1442 |
-
bucket_notes = notes_by_time[bucket_qn]
|
| 1443 |
-
|
| 1444 |
-
if not bucket_notes:
|
| 1445 |
-
continue
|
| 1446 |
-
|
| 1447 |
-
# Group by pitch to remove duplicates
|
| 1448 |
-
pitch_groups = defaultdict(list)
|
| 1449 |
-
for n in bucket_notes:
|
| 1450 |
-
pitch_groups[n.pitch.midi].append(n)
|
| 1451 |
-
|
| 1452 |
-
# For each unique pitch, keep the note with longest duration
|
| 1453 |
-
unique_notes = []
|
| 1454 |
-
for midi_pitch, pitch_notes in pitch_groups.items():
|
| 1455 |
-
# Sort by duration (longest first)
|
| 1456 |
-
# Get velocity as integer for comparison (handle None values)
|
| 1457 |
-
def get_velocity(note):
|
| 1458 |
-
if hasattr(note, 'volume') and hasattr(note.volume, 'velocity'):
|
| 1459 |
-
vel = note.volume.velocity
|
| 1460 |
-
return vel if vel is not None else 64
|
| 1461 |
-
return 64
|
| 1462 |
-
|
| 1463 |
-
pitch_notes.sort(key=lambda x: (x.quarterLength, get_velocity(x)), reverse=True)
|
| 1464 |
-
best_note = pitch_notes[0]
|
| 1465 |
-
|
| 1466 |
-
# Filter out extremely short notes (< 64th note = 0.0625 quarter notes)
|
| 1467 |
-
# MusicXML can't handle notes shorter than 1024th
|
| 1468 |
-
if best_note.quarterLength >= 0.0625:
|
| 1469 |
-
unique_notes.append(best_note)
|
| 1470 |
-
|
| 1471 |
-
if not unique_notes:
|
| 1472 |
-
continue # Skip if all notes were too short
|
| 1473 |
-
|
| 1474 |
-
# Use bucket quarter note offset directly
|
| 1475 |
-
offset_qn = bucket_qn
|
| 1476 |
-
|
| 1477 |
-
if len(unique_notes) == 1:
|
| 1478 |
-
# Single note - snap duration to avoid impossible tuplets
|
| 1479 |
-
n = note.Note(unique_notes[0].pitch)
|
| 1480 |
-
n.quarterLength = self._snap_duration(unique_notes[0].quarterLength)
|
| 1481 |
-
new_part.insert(offset_qn, n)
|
| 1482 |
-
elif len(unique_notes) > 1:
|
| 1483 |
-
# Multiple notes at same time -> create chord
|
| 1484 |
-
# Use the shortest duration to avoid overlaps, then snap
|
| 1485 |
-
min_duration = min(n.quarterLength for n in unique_notes)
|
| 1486 |
-
|
| 1487 |
-
c = m21_chord.Chord([n.pitch for n in unique_notes])
|
| 1488 |
-
c.quarterLength = self._snap_duration(min_duration)
|
| 1489 |
-
new_part.insert(offset_qn, c)
|
| 1490 |
-
|
| 1491 |
-
# Replace old part with new part
|
| 1492 |
-
score.replace(part, new_part)
|
| 1493 |
-
|
| 1494 |
-
return score
|
| 1495 |
-
|
| 1496 |
-
def _merge_music21_notes(self, score, gap_threshold_qn: float = 0.02) -> stream.Score:
|
| 1497 |
-
"""
|
| 1498 |
-
Merge sequential notes of same pitch with small gaps at music21 level.
|
| 1499 |
-
|
| 1500 |
-
Fixes tiny rests created by makeMeasures() from MIDI→music21 precision loss.
|
| 1501 |
-
MUST run AFTER deduplication but BEFORE makeMeasures.
|
| 1502 |
-
|
| 1503 |
-
Args:
|
| 1504 |
-
score: music21 Score (before makeMeasures)
|
| 1505 |
-
gap_threshold_qn: Max gap to merge (0.02 QN ≈ 20ms @ 120 BPM)
|
| 1506 |
-
|
| 1507 |
-
Returns:
|
| 1508 |
-
Score with merged sequential notes
|
| 1509 |
-
"""
|
| 1510 |
-
from music21 import stream, note, chord as m21_chord
|
| 1511 |
-
from collections import defaultdict
|
| 1512 |
-
|
| 1513 |
-
for part in score.parts:
|
| 1514 |
-
# Collect all notes with timing
|
| 1515 |
-
elements_with_offsets = []
|
| 1516 |
-
|
| 1517 |
-
for element in part.flatten().notesAndRests:
|
| 1518 |
-
if isinstance(element, note.Rest):
|
| 1519 |
-
continue
|
| 1520 |
-
|
| 1521 |
-
offset_qn = element.offset
|
| 1522 |
-
duration_qn = element.quarterLength
|
| 1523 |
-
|
| 1524 |
-
if isinstance(element, note.Note):
|
| 1525 |
-
elements_with_offsets.append({
|
| 1526 |
-
'offset': offset_qn,
|
| 1527 |
-
'end': offset_qn + duration_qn,
|
| 1528 |
-
'pitch': element.pitch.midi,
|
| 1529 |
-
'element': element
|
| 1530 |
-
})
|
| 1531 |
-
elif isinstance(element, m21_chord.Chord):
|
| 1532 |
-
# Track each chord pitch separately
|
| 1533 |
-
for pitch in element.pitches:
|
| 1534 |
-
elements_with_offsets.append({
|
| 1535 |
-
'offset': offset_qn,
|
| 1536 |
-
'end': offset_qn + duration_qn,
|
| 1537 |
-
'pitch': pitch.midi,
|
| 1538 |
-
'element': element,
|
| 1539 |
-
'chord_id': id(element) # Prevent merging same-chord notes
|
| 1540 |
-
})
|
| 1541 |
-
|
| 1542 |
-
# Group by pitch and sort
|
| 1543 |
-
notes_by_pitch = defaultdict(list)
|
| 1544 |
-
for elem in elements_with_offsets:
|
| 1545 |
-
notes_by_pitch[elem['pitch']].append(elem)
|
| 1546 |
-
|
| 1547 |
-
for pitch in notes_by_pitch:
|
| 1548 |
-
notes_by_pitch[pitch].sort(key=lambda x: x['offset'])
|
| 1549 |
-
|
| 1550 |
-
# Track modifications
|
| 1551 |
-
elements_to_remove = set()
|
| 1552 |
-
duration_updates = {}
|
| 1553 |
-
|
| 1554 |
-
# Merge within each pitch group
|
| 1555 |
-
for pitch, note_list in notes_by_pitch.items():
|
| 1556 |
-
i = 0
|
| 1557 |
-
while i < len(note_list):
|
| 1558 |
-
current = note_list[i]
|
| 1559 |
-
|
| 1560 |
-
# Look ahead for mergeable notes
|
| 1561 |
-
j = i + 1
|
| 1562 |
-
while j < len(note_list):
|
| 1563 |
-
next_note = note_list[j]
|
| 1564 |
-
gap = next_note['offset'] - current['end']
|
| 1565 |
-
|
| 1566 |
-
if gap <= gap_threshold_qn:
|
| 1567 |
-
# Don't merge notes from SAME chord
|
| 1568 |
-
if ('chord_id' in current and 'chord_id' in next_note and
|
| 1569 |
-
current['chord_id'] == next_note['chord_id']):
|
| 1570 |
-
break
|
| 1571 |
-
|
| 1572 |
-
# Extend current to cover gap + next note
|
| 1573 |
-
new_end = next_note['end']
|
| 1574 |
-
new_duration = new_end - current['offset']
|
| 1575 |
-
|
| 1576 |
-
duration_updates[id(current['element'])] = new_duration
|
| 1577 |
-
current['end'] = new_end
|
| 1578 |
-
|
| 1579 |
-
elements_to_remove.add(id(next_note['element']))
|
| 1580 |
-
j += 1
|
| 1581 |
-
else:
|
| 1582 |
-
break
|
| 1583 |
-
|
| 1584 |
-
i = j if j > i + 1 else i + 1
|
| 1585 |
-
|
| 1586 |
-
# Rebuild part with modifications
|
| 1587 |
-
new_part = stream.Part()
|
| 1588 |
-
new_part.id = part.id
|
| 1589 |
-
new_part.partName = part.partName
|
| 1590 |
-
|
| 1591 |
-
for element in part.flatten().notesAndRests:
|
| 1592 |
-
elem_id = id(element)
|
| 1593 |
-
|
| 1594 |
-
if elem_id in elements_to_remove:
|
| 1595 |
-
continue
|
| 1596 |
-
|
| 1597 |
-
if elem_id in duration_updates:
|
| 1598 |
-
element.quarterLength = duration_updates[elem_id]
|
| 1599 |
-
|
| 1600 |
-
new_part.insert(element.offset, element)
|
| 1601 |
-
|
| 1602 |
-
score.replace(part, new_part)
|
| 1603 |
-
|
| 1604 |
-
return score
|
| 1605 |
-
|
| 1606 |
-
def _add_ties_to_score(self, score) -> stream.Score:
|
| 1607 |
-
"""
|
| 1608 |
-
Add tie notation to notes that span measure boundaries.
|
| 1609 |
-
|
| 1610 |
-
Uses music21's tie.Tie class:
|
| 1611 |
-
- 'start': Beginning of tied note
|
| 1612 |
-
- 'stop': End of tied note
|
| 1613 |
-
|
| 1614 |
-
Args:
|
| 1615 |
-
score: music21 Score object
|
| 1616 |
-
|
| 1617 |
-
Returns:
|
| 1618 |
-
Score with tie notation added
|
| 1619 |
-
"""
|
| 1620 |
-
from music21 import tie
|
| 1621 |
-
|
| 1622 |
-
for part in score.parts:
|
| 1623 |
-
measures = list(part.getElementsByClass('Measure'))
|
| 1624 |
-
|
| 1625 |
-
# Get time signature to determine expected measure length
|
| 1626 |
-
ts = part.getElementsByClass('TimeSignature')
|
| 1627 |
-
expected_measure_length = ts[0].barDuration.quarterLength if ts else 4.0
|
| 1628 |
-
|
| 1629 |
-
for measure_idx, measure in enumerate(measures):
|
| 1630 |
-
# Get the time signature for this measure if it changed
|
| 1631 |
-
measure_ts = measure.getElementsByClass('TimeSignature')
|
| 1632 |
-
if measure_ts:
|
| 1633 |
-
expected_measure_length = measure_ts[0].barDuration.quarterLength
|
| 1634 |
-
|
| 1635 |
-
for element in measure.notesAndRests:
|
| 1636 |
-
if not isinstance(element, note.Note):
|
| 1637 |
-
continue
|
| 1638 |
-
|
| 1639 |
-
# Check if note extends beyond measure boundary
|
| 1640 |
-
# Use expected_measure_length from time signature, not barDuration
|
| 1641 |
-
# which may have been auto-expanded by music21
|
| 1642 |
-
element_end = element.offset + element.quarterLength
|
| 1643 |
-
|
| 1644 |
-
if element_end > expected_measure_length + 0.01: # Tolerance for floating point
|
| 1645 |
-
# Note crosses boundary - add 'start' tie
|
| 1646 |
-
element.tie = tie.Tie('start')
|
| 1647 |
-
|
| 1648 |
-
# Find continuation in next measure and add 'stop' tie
|
| 1649 |
-
if measure_idx + 1 < len(measures):
|
| 1650 |
-
next_measure = measures[measure_idx + 1]
|
| 1651 |
-
for next_elem in next_measure.notesAndRests:
|
| 1652 |
-
if (isinstance(next_elem, note.Note) and
|
| 1653 |
-
next_elem.pitch.midi == element.pitch.midi and
|
| 1654 |
-
next_elem.offset < 0.1): # At start of measure
|
| 1655 |
-
next_elem.tie = tie.Tie('stop')
|
| 1656 |
-
break
|
| 1657 |
-
|
| 1658 |
-
return score
|
| 1659 |
-
|
| 1660 |
-
def _snap_duration(self, duration) -> float:
|
| 1661 |
-
"""
|
| 1662 |
-
Snap duration to nearest MusicXML-valid note value to avoid impossible tuplets.
|
| 1663 |
|
| 1664 |
-
Valid durations: whole (4.0), half (2.0), quarter (1.0), eighth (0.5),
|
| 1665 |
-
sixteenth (0.25), thirty-second (0.125), sixty-fourth (0.0625)
|
| 1666 |
|
| 1667 |
-
Args:
|
| 1668 |
-
duration: Quarter length as float or Fraction
|
| 1669 |
-
|
| 1670 |
-
Returns:
|
| 1671 |
-
Snapped quarter length
|
| 1672 |
-
"""
|
| 1673 |
-
valid_durations = [4.0, 2.0, 1.0, 0.5, 0.25, 0.125, 0.0625]
|
| 1674 |
-
|
| 1675 |
-
# Convert to float for comparison
|
| 1676 |
-
dur_float = float(duration)
|
| 1677 |
|
| 1678 |
-
# Find nearest valid duration
|
| 1679 |
-
nearest = min(valid_durations, key=lambda x: abs(x - dur_float))
|
| 1680 |
-
|
| 1681 |
-
return nearest
|
| 1682 |
|
| 1683 |
def _snap_to_valid_duration(self, duration: float) -> float:
|
| 1684 |
"""
|
|
@@ -1708,73 +1205,6 @@ class TranscriptionPipeline:
|
|
| 1708 |
|
| 1709 |
return nearest
|
| 1710 |
|
| 1711 |
-
def _normalize_measure_durations(self, score, time_sig_numerator: int = 4, time_sig_denominator: int = 4) -> stream.Score:
|
| 1712 |
-
"""
|
| 1713 |
-
Normalize note durations to fit measures, using detected time signature.
|
| 1714 |
-
|
| 1715 |
-
Instead of removing notes, adjust durations to fill measures correctly.
|
| 1716 |
-
|
| 1717 |
-
Args:
|
| 1718 |
-
score: music21 Score with measures
|
| 1719 |
-
time_sig_numerator: Detected time signature numerator
|
| 1720 |
-
time_sig_denominator: Detected time signature denominator
|
| 1721 |
-
|
| 1722 |
-
Returns:
|
| 1723 |
-
Normalized score
|
| 1724 |
-
"""
|
| 1725 |
-
expected_duration = (time_sig_numerator / time_sig_denominator) * 4.0 # Quarter notes
|
| 1726 |
-
|
| 1727 |
-
for part in score.parts:
|
| 1728 |
-
for measure in part.getElementsByClass('Measure'):
|
| 1729 |
-
# Get all notes and chords
|
| 1730 |
-
elements = list(measure.notesAndRests)
|
| 1731 |
-
|
| 1732 |
-
if not elements:
|
| 1733 |
-
continue
|
| 1734 |
-
|
| 1735 |
-
# Calculate actual duration
|
| 1736 |
-
actual_duration = sum(e.quarterLength for e in elements)
|
| 1737 |
-
|
| 1738 |
-
# Increased tolerance from 0.05 to 0.15 QN (150ms at 120 BPM)
|
| 1739 |
-
# Prevents normalizing "good enough" measures that get made worse by rounding
|
| 1740 |
-
if abs(actual_duration - expected_duration) < 0.15:
|
| 1741 |
-
continue # Already correct (allow tolerance for quantization errors)
|
| 1742 |
-
|
| 1743 |
-
# Normalize durations proportionally
|
| 1744 |
-
scale_factor = expected_duration / actual_duration if actual_duration > 0 else 1.0
|
| 1745 |
-
|
| 1746 |
-
for element in elements:
|
| 1747 |
-
# Scale duration
|
| 1748 |
-
new_duration = element.quarterLength * scale_factor
|
| 1749 |
-
|
| 1750 |
-
# Snap to valid music21 duration
|
| 1751 |
-
element.quarterLength = self._snap_to_valid_duration(new_duration)
|
| 1752 |
-
|
| 1753 |
-
# Verify total duration after normalization
|
| 1754 |
-
new_total = sum(e.quarterLength for e in measure.notesAndRests)
|
| 1755 |
-
|
| 1756 |
-
if abs(new_total - expected_duration) > 0.1:
|
| 1757 |
-
gap = expected_duration - new_total
|
| 1758 |
-
|
| 1759 |
-
if gap > 0.01:
|
| 1760 |
-
# Underfull - add rest to fill
|
| 1761 |
-
rest = note.Rest(quarterLength=gap)
|
| 1762 |
-
measure.append(rest)
|
| 1763 |
-
elif gap < -0.01:
|
| 1764 |
-
# Overfull - proportionally adjust all elements (ZERO data loss)
|
| 1765 |
-
# This is better than removing notes
|
| 1766 |
-
overage = -gap
|
| 1767 |
-
elements = list(measure.notesAndRests)
|
| 1768 |
-
|
| 1769 |
-
print(f" WARNING: Measure overfull by {overage:.3f} QN, adjusting durations proportionally")
|
| 1770 |
-
|
| 1771 |
-
# Proportionally reduce all durations
|
| 1772 |
-
reduction_factor = expected_duration / new_total
|
| 1773 |
-
|
| 1774 |
-
for elem in elements:
|
| 1775 |
-
elem.quarterLength = self._snap_to_valid_duration(elem.quarterLength * reduction_factor)
|
| 1776 |
-
|
| 1777 |
-
return score
|
| 1778 |
|
| 1779 |
def _validate_and_adjust_metadata(
|
| 1780 |
self,
|
|
@@ -2055,93 +1485,7 @@ class TranscriptionPipeline:
|
|
| 2055 |
|
| 2056 |
return score
|
| 2057 |
|
| 2058 |
-
def _fix_tuplet_durations(self, score) -> stream.Score:
|
| 2059 |
-
"""
|
| 2060 |
-
Simplify all durations to prevent music21 from creating impossible tuplets during export.
|
| 2061 |
-
|
| 2062 |
-
music21 creates tuplets on-the-fly during MusicXML export when durations don't
|
| 2063 |
-
fit standard values. By rounding all durations to simple fractions, we prevent
|
| 2064 |
-
the export logic from generating 2048th note tuplets.
|
| 2065 |
-
|
| 2066 |
-
Args:
|
| 2067 |
-
score: music21 Score with measures
|
| 2068 |
-
|
| 2069 |
-
Returns:
|
| 2070 |
-
Cleaned score with simplified durations
|
| 2071 |
-
"""
|
| 2072 |
-
from music21 import note, chord, stream, duration
|
| 2073 |
-
|
| 2074 |
-
simplified_count = 0
|
| 2075 |
-
|
| 2076 |
-
# Simple durations that don't trigger tuplet creation (in quarter notes)
|
| 2077 |
-
SIMPLE_DURATIONS = [
|
| 2078 |
-
4.0, # Whole note
|
| 2079 |
-
3.0, # Dotted half
|
| 2080 |
-
2.0, # Half note
|
| 2081 |
-
1.5, # Dotted quarter
|
| 2082 |
-
1.0, # Quarter note
|
| 2083 |
-
0.75, # Dotted eighth
|
| 2084 |
-
0.5, # Eighth note
|
| 2085 |
-
0.375, # Dotted 16th
|
| 2086 |
-
0.25, # 16th note
|
| 2087 |
-
0.125, # 32nd note (CRITICAL: was missing, caused durations to double!)
|
| 2088 |
-
0.0625, # 64th note
|
| 2089 |
-
0.03125, # 128th note
|
| 2090 |
-
]
|
| 2091 |
-
|
| 2092 |
-
for part in score.parts:
|
| 2093 |
-
for measure in part.getElementsByClass('Measure'):
|
| 2094 |
-
# Process all notes, rests, and chords
|
| 2095 |
-
for element in measure.notesAndRests:
|
| 2096 |
-
original_duration = element.quarterLength
|
| 2097 |
-
|
| 2098 |
-
# Round to nearest simple duration
|
| 2099 |
-
nearest_duration = min(SIMPLE_DURATIONS, key=lambda x: abs(x - original_duration))
|
| 2100 |
-
|
| 2101 |
-
if abs(original_duration - nearest_duration) > 0.01:
|
| 2102 |
-
element.quarterLength = nearest_duration
|
| 2103 |
-
simplified_count += 1
|
| 2104 |
-
|
| 2105 |
-
# Strip any tuplets that might exist
|
| 2106 |
-
if element.duration.tuplets:
|
| 2107 |
-
element.duration.tuplets = ()
|
| 2108 |
-
|
| 2109 |
-
# For chords, also process each note within
|
| 2110 |
-
if isinstance(element, chord.Chord):
|
| 2111 |
-
for n in element.notes:
|
| 2112 |
-
if n.duration.tuplets:
|
| 2113 |
-
n.duration.tuplets = ()
|
| 2114 |
-
|
| 2115 |
-
if simplified_count > 0:
|
| 2116 |
-
print(f" Simplified {simplified_count} durations to prevent tuplet creation during export")
|
| 2117 |
-
|
| 2118 |
-
return score
|
| 2119 |
-
|
| 2120 |
-
def _validate_measures(self, score) -> None:
|
| 2121 |
-
"""
|
| 2122 |
-
Validate that all measures have correct durations matching their time signature.
|
| 2123 |
-
|
| 2124 |
-
Logs warnings for any measures that are overfull or underfull.
|
| 2125 |
-
|
| 2126 |
-
Args:
|
| 2127 |
-
score: music21 Score with measures already created
|
| 2128 |
-
"""
|
| 2129 |
-
for part_idx, part in enumerate(score.parts):
|
| 2130 |
-
for measure_idx, measure in enumerate(part.getElementsByClass('Measure')):
|
| 2131 |
-
# Get time signature for this measure
|
| 2132 |
-
ts = measure.timeSignature or measure.getContextByClass('TimeSignature')
|
| 2133 |
-
if not ts:
|
| 2134 |
-
continue # Skip if no time signature
|
| 2135 |
-
|
| 2136 |
-
expected_duration = ts.barDuration.quarterLength
|
| 2137 |
-
actual_duration = measure.duration.quarterLength
|
| 2138 |
-
|
| 2139 |
-
# Allow small floating-point tolerance (0.01 quarter notes = ~10ms at 120 BPM)
|
| 2140 |
-
tolerance = 0.01
|
| 2141 |
|
| 2142 |
-
if abs(actual_duration - expected_duration) > tolerance:
|
| 2143 |
-
print(f"WARNING: Measure {measure_idx + 1} in part {part_idx} has duration {float(actual_duration):.2f} "
|
| 2144 |
-
f"(expected {float(expected_duration):.2f} for {ts.ratioString} time)")
|
| 2145 |
|
| 2146 |
def _split_into_grand_staff(self, score) -> stream.Score:
|
| 2147 |
"""
|
|
@@ -2740,7 +2084,10 @@ def remove_short_notes(midi_path: Path, min_duration: int = 60) -> Path:
|
|
| 2740 |
def generate_musicxml(midi_path: Path, storage_path: Path) -> Path:
|
| 2741 |
"""Generate MusicXML from MIDI (module-level wrapper)."""
|
| 2742 |
pipeline = TranscriptionPipeline("compat_job", "http://example.com", storage_path)
|
| 2743 |
-
|
|
|
|
|
|
|
|
|
|
| 2744 |
|
| 2745 |
|
| 2746 |
def detect_key_signature(midi_path: Path) -> dict:
|
|
|
|
| 11 |
import mido
|
| 12 |
import librosa
|
| 13 |
import numpy as np
|
| 14 |
+
# basic-pitch removed - using YourMT3+ only
|
|
|
|
| 15 |
from music21 import converter, key, meter, tempo, note, clef, stream, chord as m21_chord
|
| 16 |
|
| 17 |
# Phase 2: Zero-tradeoff solutions with Python 3.10+ compatibility patch
|
|
|
|
| 90 |
self.final_midi_path = midi_path
|
| 91 |
|
| 92 |
self.progress(90, "musicxml", "Generating MusicXML")
|
| 93 |
+
# Use minimal MusicXML generation (YourMT3+ optimized)
|
| 94 |
+
print(f" Using minimal MusicXML generation (YourMT3+)")
|
| 95 |
+
musicxml_path = self.generate_musicxml_minimal(midi_path, stems['other'])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
self.progress(100, "complete", "Transcription complete")
|
| 98 |
return musicxml_path
|
|
|
|
| 174 |
minimum_note_length: int = None
|
| 175 |
) -> Path:
|
| 176 |
"""
|
| 177 |
+
Transcribe audio to MIDI using YourMT3+.
|
| 178 |
|
| 179 |
Args:
|
| 180 |
audio_path: Path to audio file (should be 'other' stem for piano)
|
| 181 |
+
onset_threshold: Deprecated (kept for API compatibility)
|
| 182 |
+
frame_threshold: Deprecated (kept for API compatibility)
|
| 183 |
+
minimum_note_length: Deprecated (kept for API compatibility)
|
| 184 |
|
| 185 |
Returns:
|
| 186 |
Path to generated MIDI file
|
| 187 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
output_dir = self.temp_dir
|
| 189 |
|
| 190 |
+
# Transcribe with YourMT3+ (only transcription method)
|
| 191 |
+
print(f" Transcribing with YourMT3+...")
|
| 192 |
+
midi_path = self.transcribe_with_yourmt3(audio_path)
|
| 193 |
+
print(f" ✓ YourMT3+ transcription complete")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
# Rename final MIDI to standard name for post-processing
|
| 196 |
final_midi_path = output_dir / "piano.mid"
|
|
|
|
| 1037 |
|
| 1038 |
return midi_path
|
| 1039 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1040 |
def generate_musicxml_minimal(self, midi_path: Path, source_audio: Path) -> Path:
|
| 1041 |
"""
|
| 1042 |
Generate MusicXML from clean MIDI (YourMT3+ output) with minimal post-processing.
|
|
|
|
| 1173 |
print(f" ✓ MusicXML generation complete")
|
| 1174 |
return output_path
|
| 1175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1176 |
|
|
|
|
|
|
|
| 1177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1179 |
|
| 1180 |
def _snap_to_valid_duration(self, duration: float) -> float:
|
| 1181 |
"""
|
|
|
|
| 1205 |
|
| 1206 |
return nearest
|
| 1207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1208 |
|
| 1209 |
def _validate_and_adjust_metadata(
|
| 1210 |
self,
|
|
|
|
| 1485 |
|
| 1486 |
return score
|
| 1487 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1488 |
|
|
|
|
|
|
|
|
|
|
| 1489 |
|
| 1490 |
def _split_into_grand_staff(self, score) -> stream.Score:
|
| 1491 |
"""
|
|
|
|
| 2084 |
def generate_musicxml(midi_path: Path, storage_path: Path) -> Path:
|
| 2085 |
"""Generate MusicXML from MIDI (module-level wrapper)."""
|
| 2086 |
pipeline = TranscriptionPipeline("compat_job", "http://example.com", storage_path)
|
| 2087 |
+
# Use minimal pipeline (YourMT3+ optimized)
|
| 2088 |
+
# Note: source_audio path may not exist for module-level calls, but minimal pipeline can handle it
|
| 2089 |
+
audio_path = storage_path / "temp" / "compat_job" / "audio.wav"
|
| 2090 |
+
return pipeline.generate_musicxml_minimal(midi_path, audio_path)
|
| 2091 |
|
| 2092 |
|
| 2093 |
def detect_key_signature(midi_path: Path) -> dict:
|
backend/requirements.txt
CHANGED
|
@@ -18,11 +18,9 @@ scipy
|
|
| 18 |
torch>=2.0.0
|
| 19 |
torchaudio==2.1.0 # Pin to version that uses SoundFile backend, not torchcodec
|
| 20 |
demucs>=3.0.6
|
|
|
|
| 21 |
|
| 22 |
-
#
|
| 23 |
-
basic-pitch==0.4.0 # Fallback transcriber when YourMT3+ service unavailable
|
| 24 |
-
|
| 25 |
-
# YourMT3+ Transcription (integrated into main service)
|
| 26 |
lightning>=2.2.1
|
| 27 |
transformers==4.45.1
|
| 28 |
einops>=0.7.0
|
|
|
|
| 18 |
torch>=2.0.0
|
| 19 |
torchaudio==2.1.0 # Pin to version that uses SoundFile backend, not torchcodec
|
| 20 |
demucs>=3.0.6
|
| 21 |
+
audio-separator>=0.40.0 # BS-RoFormer and UVR models for better vocal separation
|
| 22 |
|
| 23 |
+
# YourMT3+ Transcription (primary transcriber)
|
|
|
|
|
|
|
|
|
|
| 24 |
lightning>=2.2.1
|
| 25 |
transformers==4.45.1
|
| 26 |
einops>=0.7.0
|
frontend/src/components/InstrumentSelector.css
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.instrument-selector {
|
| 2 |
+
margin-bottom: 2rem;
|
| 3 |
+
}
|
| 4 |
+
|
| 5 |
+
.selector-label {
|
| 6 |
+
display: block;
|
| 7 |
+
margin-bottom: 1rem;
|
| 8 |
+
font-weight: bold;
|
| 9 |
+
font-size: 1.1rem;
|
| 10 |
+
color: #333;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
.instrument-grid {
|
| 14 |
+
display: grid;
|
| 15 |
+
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
|
| 16 |
+
gap: 1rem;
|
| 17 |
+
margin-bottom: 0.5rem;
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
.instrument-button {
|
| 21 |
+
display: flex;
|
| 22 |
+
flex-direction: column;
|
| 23 |
+
align-items: center;
|
| 24 |
+
justify-content: center;
|
| 25 |
+
padding: 1rem;
|
| 26 |
+
border: 2px solid #ddd;
|
| 27 |
+
border-radius: 8px;
|
| 28 |
+
background-color: #fff;
|
| 29 |
+
cursor: pointer;
|
| 30 |
+
transition: all 0.2s ease;
|
| 31 |
+
min-height: 100px;
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
.instrument-button:hover {
|
| 35 |
+
border-color: #007bff;
|
| 36 |
+
background-color: #f8f9fa;
|
| 37 |
+
transform: translateY(-2px);
|
| 38 |
+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
.instrument-button.selected {
|
| 42 |
+
border-color: #007bff;
|
| 43 |
+
background: linear-gradient(135deg, #007bff 0%, #0056b3 100%);
|
| 44 |
+
color: white;
|
| 45 |
+
box-shadow: 0 4px 12px rgba(0, 123, 255, 0.3);
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
.instrument-button.selected:hover {
|
| 49 |
+
background: linear-gradient(135deg, #0056b3 0%, #003d82 100%);
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
.instrument-icon {
|
| 53 |
+
font-size: 2rem;
|
| 54 |
+
margin-bottom: 0.5rem;
|
| 55 |
+
display: block;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
.instrument-label {
|
| 59 |
+
font-size: 0.9rem;
|
| 60 |
+
font-weight: 500;
|
| 61 |
+
text-align: center;
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
.selector-hint {
|
| 65 |
+
color: #888;
|
| 66 |
+
font-size: 0.85rem;
|
| 67 |
+
margin-top: 0.5rem;
|
| 68 |
+
text-align: center;
|
| 69 |
+
font-style: italic;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/* Responsive adjustments */
|
| 73 |
+
@media (max-width: 600px) {
|
| 74 |
+
.instrument-grid {
|
| 75 |
+
grid-template-columns: repeat(2, 1fr);
|
| 76 |
+
}
|
| 77 |
+
}
|
frontend/src/components/InstrumentSelector.tsx
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Multi-instrument selector for choosing which instruments to transcribe.
|
| 3 |
+
*/
|
| 4 |
+
import { useState } from 'react';
|
| 5 |
+
import './InstrumentSelector.css';
|
| 6 |
+
|
| 7 |
+
export interface Instrument {
|
| 8 |
+
id: string;
|
| 9 |
+
label: string;
|
| 10 |
+
icon: string;
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
const INSTRUMENTS: Instrument[] = [
|
| 14 |
+
{ id: 'piano', label: 'Piano', icon: '🎹' },
|
| 15 |
+
{ id: 'vocals', label: 'Vocals (Violin)', icon: '🎤' },
|
| 16 |
+
{ id: 'drums', label: 'Drums', icon: '🥁' },
|
| 17 |
+
{ id: 'bass', label: 'Bass', icon: '🎸' },
|
| 18 |
+
{ id: 'guitar', label: 'Guitar', icon: '🎸' },
|
| 19 |
+
{ id: 'other', label: 'Other Instruments', icon: '🎵' }
|
| 20 |
+
];
|
| 21 |
+
|
| 22 |
+
interface InstrumentSelectorProps {
|
| 23 |
+
selectedInstruments: string[];
|
| 24 |
+
onChange: (instruments: string[]) => void;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
export function InstrumentSelector({ selectedInstruments, onChange }: InstrumentSelectorProps) {
|
| 28 |
+
const handleToggle = (instrumentId: string) => {
|
| 29 |
+
const isSelected = selectedInstruments.includes(instrumentId);
|
| 30 |
+
|
| 31 |
+
if (isSelected) {
|
| 32 |
+
// Don't allow deselecting if it's the only selected instrument
|
| 33 |
+
if (selectedInstruments.length === 1) {
|
| 34 |
+
return;
|
| 35 |
+
}
|
| 36 |
+
onChange(selectedInstruments.filter(id => id !== instrumentId));
|
| 37 |
+
} else {
|
| 38 |
+
onChange([...selectedInstruments, instrumentId]);
|
| 39 |
+
}
|
| 40 |
+
};
|
| 41 |
+
|
| 42 |
+
return (
|
| 43 |
+
<div className="instrument-selector">
|
| 44 |
+
<label className="selector-label">Select Instruments:</label>
|
| 45 |
+
<div className="instrument-grid">
|
| 46 |
+
{INSTRUMENTS.map(instrument => (
|
| 47 |
+
<button
|
| 48 |
+
key={instrument.id}
|
| 49 |
+
type="button"
|
| 50 |
+
className={`instrument-button ${selectedInstruments.includes(instrument.id) ? 'selected' : ''}`}
|
| 51 |
+
onClick={() => handleToggle(instrument.id)}
|
| 52 |
+
aria-pressed={selectedInstruments.includes(instrument.id)}
|
| 53 |
+
>
|
| 54 |
+
<span className="instrument-icon">{instrument.icon}</span>
|
| 55 |
+
<span className="instrument-label">{instrument.label}</span>
|
| 56 |
+
</button>
|
| 57 |
+
))}
|
| 58 |
+
</div>
|
| 59 |
+
<p className="selector-hint">
|
| 60 |
+
Select at least one instrument to transcribe
|
| 61 |
+
</p>
|
| 62 |
+
</div>
|
| 63 |
+
);
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
export default InstrumentSelector;
|
frontend/src/components/InstrumentTabs.css
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.instrument-tabs {
|
| 2 |
+
display: flex;
|
| 3 |
+
gap: 0.5rem;
|
| 4 |
+
margin-bottom: 1.5rem;
|
| 5 |
+
padding: 0.5rem;
|
| 6 |
+
background-color: #f8f9fa;
|
| 7 |
+
border-radius: 8px;
|
| 8 |
+
overflow-x: auto;
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
.instrument-tabs.single {
|
| 12 |
+
justify-content: center;
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
.instrument-badge {
|
| 16 |
+
display: flex;
|
| 17 |
+
align-items: center;
|
| 18 |
+
gap: 0.5rem;
|
| 19 |
+
padding: 0.75rem 1.5rem;
|
| 20 |
+
background: linear-gradient(135deg, #007bff 0%, #0056b3 100%);
|
| 21 |
+
color: white;
|
| 22 |
+
border-radius: 6px;
|
| 23 |
+
font-weight: 500;
|
| 24 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
.instrument-tab {
|
| 28 |
+
display: flex;
|
| 29 |
+
align-items: center;
|
| 30 |
+
gap: 0.5rem;
|
| 31 |
+
padding: 0.75rem 1.25rem;
|
| 32 |
+
border: 2px solid #dee2e6;
|
| 33 |
+
border-radius: 6px;
|
| 34 |
+
background-color: white;
|
| 35 |
+
cursor: pointer;
|
| 36 |
+
transition: all 0.2s ease;
|
| 37 |
+
font-size: 0.95rem;
|
| 38 |
+
font-weight: 500;
|
| 39 |
+
white-space: nowrap;
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
.instrument-tab:hover {
|
| 43 |
+
border-color: #007bff;
|
| 44 |
+
background-color: #f8f9fa;
|
| 45 |
+
transform: translateY(-1px);
|
| 46 |
+
box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1);
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
.instrument-tab.active {
|
| 50 |
+
border-color: #007bff;
|
| 51 |
+
background: linear-gradient(135deg, #007bff 0%, #0056b3 100%);
|
| 52 |
+
color: white;
|
| 53 |
+
box-shadow: 0 3px 8px rgba(0, 123, 255, 0.3);
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
.instrument-tab.active:hover {
|
| 57 |
+
background: linear-gradient(135deg, #0056b3 0%, #003d82 100%);
|
| 58 |
+
transform: translateY(-1px);
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
.instrument-tab .instrument-icon,
|
| 62 |
+
.instrument-badge .instrument-icon {
|
| 63 |
+
font-size: 1.25rem;
|
| 64 |
+
line-height: 1;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
.instrument-tab .instrument-label,
|
| 68 |
+
.instrument-badge .instrument-label {
|
| 69 |
+
font-size: 0.95rem;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
/* Responsive */
|
| 73 |
+
@media (max-width: 600px) {
|
| 74 |
+
.instrument-tabs {
|
| 75 |
+
gap: 0.25rem;
|
| 76 |
+
padding: 0.25rem;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
.instrument-tab {
|
| 80 |
+
padding: 0.5rem 0.75rem;
|
| 81 |
+
font-size: 0.85rem;
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
.instrument-tab .instrument-icon {
|
| 85 |
+
font-size: 1.1rem;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
.instrument-tab .instrument-label {
|
| 89 |
+
display: none; /* Hide labels on mobile, show icons only */
|
| 90 |
+
}
|
| 91 |
+
}
|
frontend/src/components/InstrumentTabs.tsx
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Instrument tabs for switching between transcribed instruments.
|
| 3 |
+
*/
|
| 4 |
+
import './InstrumentTabs.css';
|
| 5 |
+
|
| 6 |
+
interface InstrumentInfo {
|
| 7 |
+
id: string;
|
| 8 |
+
label: string;
|
| 9 |
+
icon: string;
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
const INSTRUMENT_INFO: Record<string, InstrumentInfo> = {
|
| 13 |
+
piano: { id: 'piano', label: 'Piano', icon: '🎹' },
|
| 14 |
+
vocals: { id: 'vocals', label: 'Vocals', icon: '🎤' },
|
| 15 |
+
drums: { id: 'drums', label: 'Drums', icon: '🥁' },
|
| 16 |
+
bass: { id: 'bass', label: 'Bass', icon: '🎸' },
|
| 17 |
+
guitar: { id: 'guitar', label: 'Guitar', icon: '🎸' },
|
| 18 |
+
other: { id: 'other', label: 'Other', icon: '🎵' },
|
| 19 |
+
};
|
| 20 |
+
|
| 21 |
+
interface InstrumentTabsProps {
|
| 22 |
+
instruments: string[];
|
| 23 |
+
activeInstrument: string;
|
| 24 |
+
onInstrumentChange: (instrument: string) => void;
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
export function InstrumentTabs({ instruments, activeInstrument, onInstrumentChange }: InstrumentTabsProps) {
|
| 28 |
+
if (instruments.length === 0) {
|
| 29 |
+
return null;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
// If only one instrument, show it as a badge instead of tabs
|
| 33 |
+
if (instruments.length === 1) {
|
| 34 |
+
const instrument = instruments[0];
|
| 35 |
+
const info = INSTRUMENT_INFO[instrument] || { id: instrument, label: instrument, icon: '🎵' };
|
| 36 |
+
return (
|
| 37 |
+
<div className="instrument-tabs single">
|
| 38 |
+
<div className="instrument-badge">
|
| 39 |
+
<span className="instrument-icon">{info.icon}</span>
|
| 40 |
+
<span className="instrument-label">{info.label}</span>
|
| 41 |
+
</div>
|
| 42 |
+
</div>
|
| 43 |
+
);
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
return (
|
| 47 |
+
<div className="instrument-tabs">
|
| 48 |
+
{instruments.map((instrument) => {
|
| 49 |
+
const info = INSTRUMENT_INFO[instrument] || { id: instrument, label: instrument, icon: '🎵' };
|
| 50 |
+
const isActive = instrument === activeInstrument;
|
| 51 |
+
|
| 52 |
+
return (
|
| 53 |
+
<button
|
| 54 |
+
key={instrument}
|
| 55 |
+
className={`instrument-tab ${isActive ? 'active' : ''}`}
|
| 56 |
+
onClick={() => onInstrumentChange(instrument)}
|
| 57 |
+
aria-pressed={isActive}
|
| 58 |
+
>
|
| 59 |
+
<span className="instrument-icon">{info.icon}</span>
|
| 60 |
+
<span className="instrument-label">{info.label}</span>
|
| 61 |
+
</button>
|
| 62 |
+
);
|
| 63 |
+
})}
|
| 64 |
+
</div>
|
| 65 |
+
);
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
export default InstrumentTabs;
|
frontend/src/components/JobSubmission.css
CHANGED
|
@@ -102,3 +102,12 @@ button:hover {
|
|
| 102 |
background-color: #f8d7da;
|
| 103 |
color: #721c24;
|
| 104 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
background-color: #f8d7da;
|
| 103 |
color: #721c24;
|
| 104 |
}
|
| 105 |
+
|
| 106 |
+
.error-alert {
|
| 107 |
+
background-color: #f8d7da;
|
| 108 |
+
color: #721c24;
|
| 109 |
+
padding: 0.75rem 1rem;
|
| 110 |
+
border-radius: 4px;
|
| 111 |
+
margin-top: 1rem;
|
| 112 |
+
border: 1px solid #f5c6cb;
|
| 113 |
+
}
|
frontend/src/components/JobSubmission.tsx
CHANGED
|
@@ -4,6 +4,7 @@
|
|
| 4 |
import { useState, useRef, useEffect } from 'react';
|
| 5 |
import { api } from '../api/client';
|
| 6 |
import type { ProgressUpdate } from '../api/client';
|
|
|
|
| 7 |
import './JobSubmission.css';
|
| 8 |
|
| 9 |
interface JobSubmissionProps {
|
|
@@ -13,6 +14,7 @@ interface JobSubmissionProps {
|
|
| 13 |
|
| 14 |
export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps) {
|
| 15 |
const [youtubeUrl, setYoutubeUrl] = useState('');
|
|
|
|
| 16 |
const [status, setStatus] = useState<'idle' | 'submitting' | 'processing' | 'failed'>('idle');
|
| 17 |
const [error, setError] = useState<string | null>(null);
|
| 18 |
const [progress, setProgress] = useState(0);
|
|
@@ -43,15 +45,24 @@ export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps
|
|
| 43 |
const handleSubmit = async (e: React.FormEvent) => {
|
| 44 |
e.preventDefault();
|
| 45 |
setError(null);
|
|
|
|
|
|
|
| 46 |
const validation = validateUrl(youtubeUrl);
|
| 47 |
if (validation) {
|
| 48 |
setError(validation);
|
| 49 |
return;
|
| 50 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
setStatus('submitting');
|
| 52 |
|
| 53 |
try {
|
| 54 |
-
const response = await api.submitJob(youtubeUrl, { instruments:
|
| 55 |
setYoutubeUrl('');
|
| 56 |
if (onJobSubmitted) onJobSubmitted(response);
|
| 57 |
|
|
@@ -150,6 +161,11 @@ export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps
|
|
| 150 |
|
| 151 |
{(status === 'idle' || status === 'submitting') && (
|
| 152 |
<form onSubmit={handleSubmit}>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
<div className="form-group">
|
| 154 |
<label htmlFor="youtube-url">YouTube URL:</label>
|
| 155 |
<input
|
|
@@ -167,7 +183,7 @@ export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps
|
|
| 167 |
</div>
|
| 168 |
<button type="submit" disabled={status === 'submitting'}>Transcribe</button>
|
| 169 |
{status === 'submitting' && <div>Submitting...</div>}
|
| 170 |
-
{error && <div role="alert">{error}</div>}
|
| 171 |
</form>
|
| 172 |
)}
|
| 173 |
|
|
|
|
| 4 |
import { useState, useRef, useEffect } from 'react';
|
| 5 |
import { api } from '../api/client';
|
| 6 |
import type { ProgressUpdate } from '../api/client';
|
| 7 |
+
import { InstrumentSelector } from './InstrumentSelector';
|
| 8 |
import './JobSubmission.css';
|
| 9 |
|
| 10 |
interface JobSubmissionProps {
|
|
|
|
| 14 |
|
| 15 |
export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps) {
|
| 16 |
const [youtubeUrl, setYoutubeUrl] = useState('');
|
| 17 |
+
const [selectedInstruments, setSelectedInstruments] = useState<string[]>(['piano']);
|
| 18 |
const [status, setStatus] = useState<'idle' | 'submitting' | 'processing' | 'failed'>('idle');
|
| 19 |
const [error, setError] = useState<string | null>(null);
|
| 20 |
const [progress, setProgress] = useState(0);
|
|
|
|
| 45 |
const handleSubmit = async (e: React.FormEvent) => {
|
| 46 |
e.preventDefault();
|
| 47 |
setError(null);
|
| 48 |
+
|
| 49 |
+
// Validate URL
|
| 50 |
const validation = validateUrl(youtubeUrl);
|
| 51 |
if (validation) {
|
| 52 |
setError(validation);
|
| 53 |
return;
|
| 54 |
}
|
| 55 |
+
|
| 56 |
+
// Validate at least one instrument is selected
|
| 57 |
+
if (selectedInstruments.length === 0) {
|
| 58 |
+
setError('Please select at least one instrument');
|
| 59 |
+
return;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
setStatus('submitting');
|
| 63 |
|
| 64 |
try {
|
| 65 |
+
const response = await api.submitJob(youtubeUrl, { instruments: selectedInstruments });
|
| 66 |
setYoutubeUrl('');
|
| 67 |
if (onJobSubmitted) onJobSubmitted(response);
|
| 68 |
|
|
|
|
| 161 |
|
| 162 |
{(status === 'idle' || status === 'submitting') && (
|
| 163 |
<form onSubmit={handleSubmit}>
|
| 164 |
+
<InstrumentSelector
|
| 165 |
+
selectedInstruments={selectedInstruments}
|
| 166 |
+
onChange={setSelectedInstruments}
|
| 167 |
+
/>
|
| 168 |
+
|
| 169 |
<div className="form-group">
|
| 170 |
<label htmlFor="youtube-url">YouTube URL:</label>
|
| 171 |
<input
|
|
|
|
| 183 |
</div>
|
| 184 |
<button type="submit" disabled={status === 'submitting'}>Transcribe</button>
|
| 185 |
{status === 'submitting' && <div>Submitting...</div>}
|
| 186 |
+
{error && <div role="alert" className="error-alert">{error}</div>}
|
| 187 |
</form>
|
| 188 |
)}
|
| 189 |
|
frontend/src/components/PlaybackControls.css
CHANGED
|
@@ -2,36 +2,60 @@
|
|
| 2 |
display: flex;
|
| 3 |
align-items: center;
|
| 4 |
gap: 1rem;
|
| 5 |
-
padding:
|
| 6 |
-
background: #
|
| 7 |
-
border-radius:
|
| 8 |
-
margin:
|
| 9 |
flex-wrap: wrap;
|
|
|
|
|
|
|
| 10 |
}
|
| 11 |
|
| 12 |
.playback-controls button {
|
| 13 |
-
padding: 0.
|
| 14 |
-
font-size:
|
| 15 |
-
|
|
|
|
| 16 |
background: white;
|
| 17 |
-
border-radius:
|
| 18 |
cursor: pointer;
|
| 19 |
-
transition: all 0.2s;
|
|
|
|
| 20 |
}
|
| 21 |
|
| 22 |
.playback-controls button:hover:not(:disabled) {
|
| 23 |
-
background: #
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
}
|
| 25 |
|
| 26 |
.playback-controls button:disabled {
|
| 27 |
-
opacity: 0.
|
| 28 |
cursor: not-allowed;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
}
|
| 30 |
|
| 31 |
.tempo-control {
|
| 32 |
display: flex;
|
| 33 |
align-items: center;
|
| 34 |
-
gap: 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
}
|
| 36 |
|
| 37 |
.tempo-control label {
|
|
@@ -39,14 +63,110 @@
|
|
| 39 |
align-items: center;
|
| 40 |
gap: 0.5rem;
|
| 41 |
font-size: 0.9rem;
|
|
|
|
|
|
|
| 42 |
}
|
| 43 |
|
| 44 |
.tempo-control input[type="range"] {
|
| 45 |
width: 150px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
}
|
| 47 |
|
| 48 |
-
.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
font-size: 0.9rem;
|
| 50 |
-
color: #
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
}
|
|
|
|
| 2 |
display: flex;
|
| 3 |
align-items: center;
|
| 4 |
gap: 1rem;
|
| 5 |
+
padding: 1.25rem;
|
| 6 |
+
background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
|
| 7 |
+
border-radius: 12px;
|
| 8 |
+
margin: 1.5rem 0;
|
| 9 |
flex-wrap: wrap;
|
| 10 |
+
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
|
| 11 |
+
border: 1px solid #dee2e6;
|
| 12 |
}
|
| 13 |
|
| 14 |
.playback-controls button {
|
| 15 |
+
padding: 0.6rem 1.25rem;
|
| 16 |
+
font-size: 0.95rem;
|
| 17 |
+
font-weight: 500;
|
| 18 |
+
border: 2px solid #dee2e6;
|
| 19 |
background: white;
|
| 20 |
+
border-radius: 8px;
|
| 21 |
cursor: pointer;
|
| 22 |
+
transition: all 0.2s ease;
|
| 23 |
+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.05);
|
| 24 |
}
|
| 25 |
|
| 26 |
.playback-controls button:hover:not(:disabled) {
|
| 27 |
+
background: #007bff;
|
| 28 |
+
border-color: #007bff;
|
| 29 |
+
color: white;
|
| 30 |
+
transform: translateY(-1px);
|
| 31 |
+
box-shadow: 0 3px 6px rgba(0, 123, 255, 0.2);
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
.playback-controls button:active:not(:disabled) {
|
| 35 |
+
transform: translateY(0);
|
| 36 |
+
box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
|
| 37 |
}
|
| 38 |
|
| 39 |
.playback-controls button:disabled {
|
| 40 |
+
opacity: 0.4;
|
| 41 |
cursor: not-allowed;
|
| 42 |
+
background: #f8f9fa;
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
.playback-controls button.active {
|
| 46 |
+
background: #007bff;
|
| 47 |
+
border-color: #007bff;
|
| 48 |
+
color: white;
|
| 49 |
}
|
| 50 |
|
| 51 |
.tempo-control {
|
| 52 |
display: flex;
|
| 53 |
align-items: center;
|
| 54 |
+
gap: 0.75rem;
|
| 55 |
+
background: white;
|
| 56 |
+
padding: 0.5rem 0.75rem;
|
| 57 |
+
border-radius: 8px;
|
| 58 |
+
border: 1px solid #dee2e6;
|
| 59 |
}
|
| 60 |
|
| 61 |
.tempo-control label {
|
|
|
|
| 63 |
align-items: center;
|
| 64 |
gap: 0.5rem;
|
| 65 |
font-size: 0.9rem;
|
| 66 |
+
font-weight: 500;
|
| 67 |
+
color: #495057;
|
| 68 |
}
|
| 69 |
|
| 70 |
.tempo-control input[type="range"] {
|
| 71 |
width: 150px;
|
| 72 |
+
height: 6px;
|
| 73 |
+
border-radius: 3px;
|
| 74 |
+
background: #dee2e6;
|
| 75 |
+
outline: none;
|
| 76 |
+
-webkit-appearance: none;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
.tempo-control input[type="range"]::-webkit-slider-thumb {
|
| 80 |
+
-webkit-appearance: none;
|
| 81 |
+
appearance: none;
|
| 82 |
+
width: 16px;
|
| 83 |
+
height: 16px;
|
| 84 |
+
border-radius: 50%;
|
| 85 |
+
background: #007bff;
|
| 86 |
+
cursor: pointer;
|
| 87 |
+
box-shadow: 0 2px 4px rgba(0, 123, 255, 0.3);
|
| 88 |
+
transition: all 0.2s ease;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
.tempo-control input[type="range"]::-webkit-slider-thumb:hover {
|
| 92 |
+
background: #0056b3;
|
| 93 |
+
transform: scale(1.1);
|
| 94 |
+
box-shadow: 0 3px 6px rgba(0, 123, 255, 0.4);
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
.tempo-control input[type="range"]::-moz-range-thumb {
|
| 98 |
+
width: 16px;
|
| 99 |
+
height: 16px;
|
| 100 |
+
border-radius: 50%;
|
| 101 |
+
background: #007bff;
|
| 102 |
+
cursor: pointer;
|
| 103 |
+
border: none;
|
| 104 |
+
box-shadow: 0 2px 4px rgba(0, 123, 255, 0.3);
|
| 105 |
+
transition: all 0.2s ease;
|
| 106 |
}
|
| 107 |
|
| 108 |
+
.tempo-control input[type="range"]::-moz-range-thumb:hover {
|
| 109 |
+
background: #0056b3;
|
| 110 |
+
transform: scale(1.1);
|
| 111 |
+
box-shadow: 0 3px 6px rgba(0, 123, 255, 0.4);
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
.tempo-control span {
|
| 115 |
+
font-weight: 600;
|
| 116 |
+
color: #007bff;
|
| 117 |
+
min-width: 40px;
|
| 118 |
+
text-align: center;
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
.time-display {
|
| 122 |
+
font-size: 0.95rem;
|
| 123 |
+
color: #495057;
|
| 124 |
+
font-family: 'Courier New', monospace;
|
| 125 |
+
font-weight: 500;
|
| 126 |
+
background: white;
|
| 127 |
+
padding: 0.5rem 0.75rem;
|
| 128 |
+
border-radius: 6px;
|
| 129 |
+
border: 1px solid #dee2e6;
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
.volume-control {
|
| 133 |
+
display: flex;
|
| 134 |
+
align-items: center;
|
| 135 |
+
gap: 0.5rem;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
.volume-control label {
|
| 139 |
+
display: flex;
|
| 140 |
+
align-items: center;
|
| 141 |
+
gap: 0.5rem;
|
| 142 |
font-size: 0.9rem;
|
| 143 |
+
color: #495057;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
.volume-control input[type="range"] {
|
| 147 |
+
width: 100px;
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
/* Responsive adjustments */
|
| 151 |
+
@media (max-width: 768px) {
|
| 152 |
+
.playback-controls {
|
| 153 |
+
justify-content: center;
|
| 154 |
+
gap: 0.75rem;
|
| 155 |
+
padding: 1rem;
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
.playback-controls button {
|
| 159 |
+
padding: 0.5rem 1rem;
|
| 160 |
+
font-size: 0.9rem;
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
.tempo-control {
|
| 164 |
+
width: 100%;
|
| 165 |
+
justify-content: space-between;
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
.time-display {
|
| 169 |
+
width: 100%;
|
| 170 |
+
text-align: center;
|
| 171 |
+
}
|
| 172 |
}
|
frontend/src/components/PlaybackControls.tsx
CHANGED
|
@@ -10,7 +10,7 @@ import { useState, useRef, useEffect } from 'react';
|
|
| 10 |
import * as Tone from 'tone';
|
| 11 |
// useNotationStore is optional for tests; guard its usage
|
| 12 |
import { useNotationStore } from '../store/notation';
|
| 13 |
-
import { durationToSeconds } from '../utils/
|
| 14 |
import type { Note } from '../store/notation';
|
| 15 |
import './PlaybackControls.css';
|
| 16 |
|
|
|
|
| 10 |
import * as Tone from 'tone';
|
| 11 |
// useNotationStore is optional for tests; guard its usage
|
| 12 |
import { useNotationStore } from '../store/notation';
|
| 13 |
+
import { durationToSeconds } from '../utils/duration';
|
| 14 |
import type { Note } from '../store/notation';
|
| 15 |
import './PlaybackControls.css';
|
| 16 |
|
frontend/src/components/ScoreEditor.tsx
CHANGED
|
@@ -1,11 +1,13 @@
|
|
| 1 |
/**
|
| 2 |
* Main score editor component integrating notation, playback, and export.
|
|
|
|
| 3 |
*/
|
| 4 |
import { useState, useEffect } from 'react';
|
| 5 |
-
import { getMidiFile, getMetadata } from '../api/client';
|
| 6 |
import { useNotationStore } from '../store/notation';
|
| 7 |
import { NotationCanvas } from './NotationCanvas';
|
| 8 |
import { PlaybackControls } from './PlaybackControls';
|
|
|
|
| 9 |
import './ScoreEditor.css';
|
| 10 |
|
| 11 |
interface ScoreEditorProps {
|
|
@@ -15,7 +17,11 @@ interface ScoreEditorProps {
|
|
| 15 |
export function ScoreEditor({ jobId }: ScoreEditorProps) {
|
| 16 |
const [loading, setLoading] = useState(true);
|
| 17 |
const [error, setError] = useState<string | null>(null);
|
|
|
|
|
|
|
| 18 |
const loadFromMidi = useNotationStore((state) => state.loadFromMidi);
|
|
|
|
|
|
|
| 19 |
|
| 20 |
useEffect(() => {
|
| 21 |
loadScore();
|
|
@@ -26,18 +32,34 @@ export function ScoreEditor({ jobId }: ScoreEditorProps) {
|
|
| 26 |
setLoading(true);
|
| 27 |
setError(null);
|
| 28 |
|
| 29 |
-
//
|
| 30 |
-
const
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
// Load MIDI
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
setLoading(false);
|
| 43 |
} catch (err) {
|
|
@@ -88,11 +110,16 @@ export function ScoreEditor({ jobId }: ScoreEditorProps) {
|
|
| 88 |
<div className="editor-toolbar">
|
| 89 |
<h2>Score Editor</h2>
|
| 90 |
<div className="toolbar-actions">
|
| 91 |
-
<button onClick={handleExportMusicXML}>Export MusicXML</button>
|
| 92 |
<button onClick={handleExportMIDI}>Export MIDI</button>
|
| 93 |
</div>
|
| 94 |
</div>
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
<PlaybackControls />
|
| 97 |
|
| 98 |
<NotationCanvas />
|
|
|
|
| 1 |
/**
|
| 2 |
* Main score editor component integrating notation, playback, and export.
|
| 3 |
+
* Supports multi-instrument transcription.
|
| 4 |
*/
|
| 5 |
import { useState, useEffect } from 'react';
|
| 6 |
+
import { getMidiFile, getMetadata, getJobStatus } from '../api/client';
|
| 7 |
import { useNotationStore } from '../store/notation';
|
| 8 |
import { NotationCanvas } from './NotationCanvas';
|
| 9 |
import { PlaybackControls } from './PlaybackControls';
|
| 10 |
+
import { InstrumentTabs } from './InstrumentTabs';
|
| 11 |
import './ScoreEditor.css';
|
| 12 |
|
| 13 |
interface ScoreEditorProps {
|
|
|
|
| 17 |
export function ScoreEditor({ jobId }: ScoreEditorProps) {
|
| 18 |
const [loading, setLoading] = useState(true);
|
| 19 |
const [error, setError] = useState<string | null>(null);
|
| 20 |
+
const [instruments, setInstruments] = useState<string[]>([]);
|
| 21 |
+
|
| 22 |
const loadFromMidi = useNotationStore((state) => state.loadFromMidi);
|
| 23 |
+
const activeInstrument = useNotationStore((state) => state.activeInstrument);
|
| 24 |
+
const setActiveInstrument = useNotationStore((state) => state.setActiveInstrument);
|
| 25 |
|
| 26 |
useEffect(() => {
|
| 27 |
loadScore();
|
|
|
|
| 32 |
setLoading(true);
|
| 33 |
setError(null);
|
| 34 |
|
| 35 |
+
// Get job status to find which instruments were transcribed
|
| 36 |
+
const jobStatus = await getJobStatus(jobId);
|
| 37 |
+
|
| 38 |
+
// For now, assume piano is the default instrument (backend doesn't yet return instruments list)
|
| 39 |
+
// TODO: Update when backend API returns instruments list in job status
|
| 40 |
+
const transcribedInstruments = ['piano'];
|
| 41 |
+
setInstruments(transcribedInstruments);
|
| 42 |
+
|
| 43 |
+
// Fetch metadata once (shared across all instruments)
|
| 44 |
+
const metadata = await getMetadata(jobId);
|
| 45 |
|
| 46 |
+
// Load MIDI files for each instrument
|
| 47 |
+
for (const instrument of transcribedInstruments) {
|
| 48 |
+
// For MVP, backend only supports piano (single stem)
|
| 49 |
+
// In the future, this will fetch per-instrument MIDI: `/api/v1/scores/${jobId}/midi/${instrument}`
|
| 50 |
+
const midiData = await getMidiFile(jobId);
|
| 51 |
+
|
| 52 |
+
await loadFromMidi(instrument, midiData, {
|
| 53 |
+
tempo: metadata.tempo,
|
| 54 |
+
keySignature: metadata.key_signature,
|
| 55 |
+
timeSignature: metadata.time_signature,
|
| 56 |
+
});
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
// Set first instrument as active
|
| 60 |
+
if (transcribedInstruments.length > 0) {
|
| 61 |
+
setActiveInstrument(transcribedInstruments[0]);
|
| 62 |
+
}
|
| 63 |
|
| 64 |
setLoading(false);
|
| 65 |
} catch (err) {
|
|
|
|
| 110 |
<div className="editor-toolbar">
|
| 111 |
<h2>Score Editor</h2>
|
| 112 |
<div className="toolbar-actions">
|
|
|
|
| 113 |
<button onClick={handleExportMIDI}>Export MIDI</button>
|
| 114 |
</div>
|
| 115 |
</div>
|
| 116 |
|
| 117 |
+
<InstrumentTabs
|
| 118 |
+
instruments={instruments}
|
| 119 |
+
activeInstrument={activeInstrument}
|
| 120 |
+
onInstrumentChange={setActiveInstrument}
|
| 121 |
+
/>
|
| 122 |
+
|
| 123 |
<PlaybackControls />
|
| 124 |
|
| 125 |
<NotationCanvas />
|
frontend/src/store/notation.ts
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
/**
|
| 2 |
* Zustand store for notation state management.
|
|
|
|
| 3 |
*/
|
| 4 |
import { create } from 'zustand';
|
| 5 |
-
import { parseMusicXML } from '../utils/musicxml-parser';
|
| 6 |
import { parseMidiFile, assignChordIds } from '../utils/midi-parser';
|
| 7 |
|
| 8 |
export interface Note {
|
|
@@ -42,15 +42,22 @@ export interface Score {
|
|
| 42 |
}
|
| 43 |
|
| 44 |
interface NotationState {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
score: Score | null;
|
|
|
|
| 46 |
selectedNoteIds: string[];
|
| 47 |
currentTool: 'select' | 'add' | 'delete';
|
| 48 |
currentDuration: string;
|
| 49 |
playingNoteIds: string[]; // Notes currently being played (for visual feedback)
|
| 50 |
|
| 51 |
// Actions
|
| 52 |
-
loadFromMusicXML: (xml: string) => void;
|
| 53 |
loadFromMidi: (
|
|
|
|
| 54 |
midiData: ArrayBuffer,
|
| 55 |
metadata?: {
|
| 56 |
tempo?: number;
|
|
@@ -58,7 +65,7 @@ interface NotationState {
|
|
| 58 |
timeSignature?: { numerator: number; denominator: number };
|
| 59 |
}
|
| 60 |
) => Promise<void>;
|
| 61 |
-
|
| 62 |
addNote: (measureId: string, note: Note) => void;
|
| 63 |
deleteNote: (noteId: string) => void;
|
| 64 |
updateNote: (noteId: string, changes: Partial<Note>) => void;
|
|
@@ -69,72 +76,90 @@ interface NotationState {
|
|
| 69 |
setPlayingNoteIds: (noteIds: string[]) => void;
|
| 70 |
}
|
| 71 |
|
| 72 |
-
export const useNotationStore = create<NotationState>((set,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
score: null,
|
|
|
|
| 74 |
selectedNoteIds: [],
|
| 75 |
currentTool: 'select',
|
| 76 |
currentDuration: 'quarter',
|
| 77 |
playingNoteIds: [],
|
| 78 |
|
| 79 |
-
|
| 80 |
-
try {
|
| 81 |
-
const score = parseMusicXML(xml);
|
| 82 |
-
set({ score });
|
| 83 |
-
} catch (error) {
|
| 84 |
-
console.error('Failed to parse MusicXML:', error);
|
| 85 |
-
// Fallback to empty score
|
| 86 |
-
set({
|
| 87 |
-
score: {
|
| 88 |
-
id: 'score-1',
|
| 89 |
-
title: 'Transcribed Score',
|
| 90 |
-
composer: 'Unknown',
|
| 91 |
-
key: 'C',
|
| 92 |
-
timeSignature: '4/4',
|
| 93 |
-
tempo: 120,
|
| 94 |
-
parts: [],
|
| 95 |
-
measures: [],
|
| 96 |
-
},
|
| 97 |
-
});
|
| 98 |
-
}
|
| 99 |
-
},
|
| 100 |
-
|
| 101 |
-
loadFromMidi: async (midiData, metadata) => {
|
| 102 |
try {
|
| 103 |
let score = await parseMidiFile(midiData, {
|
| 104 |
tempo: metadata?.tempo,
|
| 105 |
timeSignature: metadata?.timeSignature,
|
| 106 |
keySignature: metadata?.keySignature,
|
| 107 |
-
splitAtMiddleC:
|
| 108 |
middleCNote: 60,
|
| 109 |
});
|
| 110 |
|
| 111 |
// Assign chord IDs to simultaneous notes
|
| 112 |
score = assignChordIds(score);
|
| 113 |
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
} catch (error) {
|
| 116 |
console.error('Failed to parse MIDI:', error);
|
| 117 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
set({
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
composer: 'YourMT3+',
|
| 123 |
-
key: metadata?.keySignature || 'C',
|
| 124 |
-
timeSignature: metadata?.timeSignature
|
| 125 |
-
? `${metadata.timeSignature.numerator}/${metadata.timeSignature.denominator}`
|
| 126 |
-
: '4/4',
|
| 127 |
-
tempo: metadata?.tempo || 120,
|
| 128 |
-
parts: [],
|
| 129 |
-
measures: [],
|
| 130 |
-
},
|
| 131 |
});
|
| 132 |
}
|
| 133 |
},
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
},
|
| 139 |
|
| 140 |
addNote: (measureId, note) =>
|
|
|
|
| 1 |
/**
|
| 2 |
* Zustand store for notation state management.
|
| 3 |
+
* Supports multi-instrument transcription.
|
| 4 |
*/
|
| 5 |
import { create } from 'zustand';
|
|
|
|
| 6 |
import { parseMidiFile, assignChordIds } from '../utils/midi-parser';
|
| 7 |
|
| 8 |
export interface Note {
|
|
|
|
| 42 |
}
|
| 43 |
|
| 44 |
interface NotationState {
|
| 45 |
+
// Multi-instrument support
|
| 46 |
+
scores: Map<string, Score>; // instrument -> Score
|
| 47 |
+
activeInstrument: string; // Currently viewing instrument (e.g., 'piano', 'vocals')
|
| 48 |
+
availableInstruments: string[]; // All transcribed instruments
|
| 49 |
+
|
| 50 |
+
// Legacy single-score access (for backward compatibility)
|
| 51 |
score: Score | null;
|
| 52 |
+
|
| 53 |
selectedNoteIds: string[];
|
| 54 |
currentTool: 'select' | 'add' | 'delete';
|
| 55 |
currentDuration: string;
|
| 56 |
playingNoteIds: string[]; // Notes currently being played (for visual feedback)
|
| 57 |
|
| 58 |
// Actions
|
|
|
|
| 59 |
loadFromMidi: (
|
| 60 |
+
instrument: string,
|
| 61 |
midiData: ArrayBuffer,
|
| 62 |
metadata?: {
|
| 63 |
tempo?: number;
|
|
|
|
| 65 |
timeSignature?: { numerator: number; denominator: number };
|
| 66 |
}
|
| 67 |
) => Promise<void>;
|
| 68 |
+
setActiveInstrument: (instrument: string) => void;
|
| 69 |
addNote: (measureId: string, note: Note) => void;
|
| 70 |
deleteNote: (noteId: string) => void;
|
| 71 |
updateNote: (noteId: string, changes: Partial<Note>) => void;
|
|
|
|
| 76 |
setPlayingNoteIds: (noteIds: string[]) => void;
|
| 77 |
}
|
| 78 |
|
| 79 |
+
export const useNotationStore = create<NotationState>((set, get) => ({
|
| 80 |
+
// Multi-instrument state
|
| 81 |
+
scores: new Map(),
|
| 82 |
+
activeInstrument: 'piano',
|
| 83 |
+
availableInstruments: [],
|
| 84 |
+
|
| 85 |
+
// Legacy single-score (points to active instrument's score)
|
| 86 |
score: null,
|
| 87 |
+
|
| 88 |
selectedNoteIds: [],
|
| 89 |
currentTool: 'select',
|
| 90 |
currentDuration: 'quarter',
|
| 91 |
playingNoteIds: [],
|
| 92 |
|
| 93 |
+
loadFromMidi: async (instrument, midiData, metadata) => {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
try {
|
| 95 |
let score = await parseMidiFile(midiData, {
|
| 96 |
tempo: metadata?.tempo,
|
| 97 |
timeSignature: metadata?.timeSignature,
|
| 98 |
keySignature: metadata?.keySignature,
|
| 99 |
+
splitAtMiddleC: instrument === 'piano', // Only split piano into grand staff
|
| 100 |
middleCNote: 60,
|
| 101 |
});
|
| 102 |
|
| 103 |
// Assign chord IDs to simultaneous notes
|
| 104 |
score = assignChordIds(score);
|
| 105 |
|
| 106 |
+
// Update scores map
|
| 107 |
+
const state = get();
|
| 108 |
+
const newScores = new Map(state.scores);
|
| 109 |
+
newScores.set(instrument, score);
|
| 110 |
+
|
| 111 |
+
// Update available instruments if this is a new one
|
| 112 |
+
const newAvailableInstruments = state.availableInstruments.includes(instrument)
|
| 113 |
+
? state.availableInstruments
|
| 114 |
+
: [...state.availableInstruments, instrument];
|
| 115 |
+
|
| 116 |
+
set({
|
| 117 |
+
scores: newScores,
|
| 118 |
+
availableInstruments: newAvailableInstruments,
|
| 119 |
+
// Update legacy score if this is the active instrument
|
| 120 |
+
score: state.activeInstrument === instrument ? score : state.score,
|
| 121 |
+
});
|
| 122 |
} catch (error) {
|
| 123 |
console.error('Failed to parse MIDI:', error);
|
| 124 |
+
// Create fallback empty score
|
| 125 |
+
const emptyScore: Score = {
|
| 126 |
+
id: `score-${instrument}`,
|
| 127 |
+
title: 'Transcribed Score',
|
| 128 |
+
composer: 'YourMT3+',
|
| 129 |
+
key: metadata?.keySignature || 'C',
|
| 130 |
+
timeSignature: metadata?.timeSignature
|
| 131 |
+
? `${metadata.timeSignature.numerator}/${metadata.timeSignature.denominator}`
|
| 132 |
+
: '4/4',
|
| 133 |
+
tempo: metadata?.tempo || 120,
|
| 134 |
+
parts: [],
|
| 135 |
+
measures: [],
|
| 136 |
+
};
|
| 137 |
+
|
| 138 |
+
const state = get();
|
| 139 |
+
const newScores = new Map(state.scores);
|
| 140 |
+
newScores.set(instrument, emptyScore);
|
| 141 |
+
|
| 142 |
+
const newAvailableInstruments = state.availableInstruments.includes(instrument)
|
| 143 |
+
? state.availableInstruments
|
| 144 |
+
: [...state.availableInstruments, instrument];
|
| 145 |
+
|
| 146 |
set({
|
| 147 |
+
scores: newScores,
|
| 148 |
+
availableInstruments: newAvailableInstruments,
|
| 149 |
+
score: state.activeInstrument === instrument ? emptyScore : state.score,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
});
|
| 151 |
}
|
| 152 |
},
|
| 153 |
|
| 154 |
+
setActiveInstrument: (instrument) => {
|
| 155 |
+
const state = get();
|
| 156 |
+
const instrumentScore = state.scores.get(instrument);
|
| 157 |
+
|
| 158 |
+
set({
|
| 159 |
+
activeInstrument: instrument,
|
| 160 |
+
score: instrumentScore || null,
|
| 161 |
+
selectedNoteIds: [], // Clear selection when switching instruments
|
| 162 |
+
});
|
| 163 |
},
|
| 164 |
|
| 165 |
addNote: (measureId, note) =>
|
frontend/src/utils/duration.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Duration conversion utilities for music notation.
|
| 3 |
+
*/
|
| 4 |
+
|
| 5 |
+
/**
|
| 6 |
+
* Convert note duration to seconds based on tempo.
|
| 7 |
+
*
|
| 8 |
+
* @param duration - Note duration type (whole, half, quarter, eighth, 16th, 32nd)
|
| 9 |
+
* @param tempo - Tempo in BPM
|
| 10 |
+
* @param dotted - Whether the note is dotted (increases duration by 50%)
|
| 11 |
+
* @returns Duration in seconds
|
| 12 |
+
*/
|
| 13 |
+
export function durationToSeconds(
|
| 14 |
+
duration: string,
|
| 15 |
+
tempo: number,
|
| 16 |
+
dotted: boolean = false
|
| 17 |
+
): number {
|
| 18 |
+
// Quarter note duration at given tempo
|
| 19 |
+
const quarterNoteDuration = 60 / tempo;
|
| 20 |
+
|
| 21 |
+
// Map durations to quarter note multipliers
|
| 22 |
+
const durationMap: Record<string, number> = {
|
| 23 |
+
'whole': 4,
|
| 24 |
+
'half': 2,
|
| 25 |
+
'quarter': 1,
|
| 26 |
+
'eighth': 0.5,
|
| 27 |
+
'16th': 0.25,
|
| 28 |
+
'32nd': 0.125,
|
| 29 |
+
};
|
| 30 |
+
|
| 31 |
+
const baseDuration = durationMap[duration] || 1;
|
| 32 |
+
const multiplier = dotted ? 1.5 : 1;
|
| 33 |
+
|
| 34 |
+
return quarterNoteDuration * baseDuration * multiplier;
|
| 35 |
+
}
|
frontend/src/utils/musicxml-parser.ts
DELETED
|
@@ -1,275 +0,0 @@
|
|
| 1 |
-
/**
|
| 2 |
-
* Lightweight MusicXML parser for extracting notes and metadata.
|
| 3 |
-
*
|
| 4 |
-
* Supports grand staff with multiple parts (treble + bass for piano).
|
| 5 |
-
*/
|
| 6 |
-
import type { Note, Score, Measure, Part } from '../store/notation';
|
| 7 |
-
|
| 8 |
-
interface ParsedNote {
|
| 9 |
-
pitch: string;
|
| 10 |
-
octave: number;
|
| 11 |
-
duration: number; // in divisions
|
| 12 |
-
type: string; // whole, half, quarter, etc.
|
| 13 |
-
accidental?: string;
|
| 14 |
-
dotted: boolean;
|
| 15 |
-
isRest: boolean;
|
| 16 |
-
}
|
| 17 |
-
|
| 18 |
-
export function parseMusicXML(xml: string): Score {
|
| 19 |
-
const parser = new DOMParser();
|
| 20 |
-
const doc = parser.parseFromString(xml, 'text/xml');
|
| 21 |
-
|
| 22 |
-
// Extract metadata
|
| 23 |
-
const title = doc.querySelector('movement-title')?.textContent ||
|
| 24 |
-
doc.querySelector('work-title')?.textContent ||
|
| 25 |
-
'Untitled';
|
| 26 |
-
const composer = doc.querySelector('creator[type="composer"]')?.textContent || 'Unknown';
|
| 27 |
-
|
| 28 |
-
// Extract key signature
|
| 29 |
-
const fifths = doc.querySelector('key fifths')?.textContent;
|
| 30 |
-
const keyMap: Record<string, string> = {
|
| 31 |
-
'-7': 'Cb', '-6': 'Gb', '-5': 'Db', '-4': 'Ab', '-3': 'Eb', '-2': 'Bb', '-1': 'F',
|
| 32 |
-
'0': 'C', '1': 'G', '2': 'D', '3': 'A', '4': 'E', '5': 'B', '6': 'F#', '7': 'C#'
|
| 33 |
-
};
|
| 34 |
-
const key = fifths ? keyMap[fifths] || 'C' : 'C';
|
| 35 |
-
|
| 36 |
-
// Extract time signature
|
| 37 |
-
const beats = doc.querySelector('time beats')?.textContent || '4';
|
| 38 |
-
const beatType = doc.querySelector('time beat-type')?.textContent || '4';
|
| 39 |
-
const timeSignature = `${beats}/${beatType}`;
|
| 40 |
-
|
| 41 |
-
// Extract tempo
|
| 42 |
-
let tempo = 120;
|
| 43 |
-
const tempoElement = doc.querySelector('sound[tempo]');
|
| 44 |
-
if (tempoElement) {
|
| 45 |
-
const tempoAttr = tempoElement.getAttribute('tempo');
|
| 46 |
-
if (tempoAttr) {
|
| 47 |
-
tempo = parseInt(tempoAttr);
|
| 48 |
-
}
|
| 49 |
-
}
|
| 50 |
-
|
| 51 |
-
// Parse all parts (for grand staff: treble + bass)
|
| 52 |
-
const partElements = doc.querySelectorAll('score-partwise > part');
|
| 53 |
-
const parts: Part[] = [];
|
| 54 |
-
let allMeasures: Measure[] = []; // For backward compatibility
|
| 55 |
-
|
| 56 |
-
partElements.forEach((partEl, partIdx) => {
|
| 57 |
-
const partId = partEl.getAttribute('id') || `part-${partIdx}`;
|
| 58 |
-
|
| 59 |
-
// Get part name and clef
|
| 60 |
-
const partName = doc.querySelector(`score-part[id="${partId}"] part-name`)?.textContent || `Part ${partIdx + 1}`;
|
| 61 |
-
|
| 62 |
-
// Determine clef from first measure
|
| 63 |
-
const firstClefSign = partEl.querySelector('measure clef sign')?.textContent || 'G';
|
| 64 |
-
const clef: 'treble' | 'bass' = firstClefSign === 'F' ? 'bass' : 'treble';
|
| 65 |
-
|
| 66 |
-
const measureElements = partEl.querySelectorAll('measure');
|
| 67 |
-
const measures: Measure[] = [];
|
| 68 |
-
|
| 69 |
-
measureElements.forEach((measureEl, idx) => {
|
| 70 |
-
const measureNumber = parseInt(measureEl.getAttribute('number') || String(idx + 1));
|
| 71 |
-
const notes: Note[] = [];
|
| 72 |
-
|
| 73 |
-
const noteElements = measureEl.querySelectorAll('note');
|
| 74 |
-
let currentChord: Note[] = [];
|
| 75 |
-
let currentChordId: string | null = null;
|
| 76 |
-
|
| 77 |
-
noteElements.forEach((noteEl, noteIdx) => {
|
| 78 |
-
const parsedNote = parseNoteElement(noteEl);
|
| 79 |
-
if (!parsedNote) return;
|
| 80 |
-
|
| 81 |
-
// Check if this note is part of a chord (simultaneous with previous note)
|
| 82 |
-
const isChordMember = noteEl.querySelector('chord') !== null;
|
| 83 |
-
|
| 84 |
-
// Assign chord ID for chord grouping
|
| 85 |
-
if (!isChordMember) {
|
| 86 |
-
// Start new chord group (or single note)
|
| 87 |
-
currentChordId = `chord-${measureNumber}-${noteIdx}`;
|
| 88 |
-
}
|
| 89 |
-
|
| 90 |
-
if (parsedNote.isRest) {
|
| 91 |
-
// Flush any pending chord before adding rest
|
| 92 |
-
if (currentChord.length > 0) {
|
| 93 |
-
notes.push(...currentChord);
|
| 94 |
-
currentChord = [];
|
| 95 |
-
}
|
| 96 |
-
|
| 97 |
-
// Include rests (rests don't have chordId)
|
| 98 |
-
notes.push({
|
| 99 |
-
id: `note-${measureNumber}-${notes.length}`,
|
| 100 |
-
pitch: '',
|
| 101 |
-
duration: parsedNote.type,
|
| 102 |
-
octave: 0,
|
| 103 |
-
startTime: 0,
|
| 104 |
-
dotted: parsedNote.dotted,
|
| 105 |
-
isRest: true,
|
| 106 |
-
chordId: undefined, // Rests are never part of chords
|
| 107 |
-
});
|
| 108 |
-
} else {
|
| 109 |
-
// Build full pitch string for pitched notes
|
| 110 |
-
const pitchName = parsedNote.pitch +
|
| 111 |
-
(parsedNote.accidental === 'sharp' ? '#' :
|
| 112 |
-
parsedNote.accidental === 'flat' ? 'b' : '');
|
| 113 |
-
const fullPitch = pitchName + parsedNote.octave;
|
| 114 |
-
|
| 115 |
-
const note: Note = {
|
| 116 |
-
id: `note-${measureNumber}-${notes.length + currentChord.length}`,
|
| 117 |
-
pitch: fullPitch,
|
| 118 |
-
duration: parsedNote.type,
|
| 119 |
-
octave: parsedNote.octave,
|
| 120 |
-
startTime: 0,
|
| 121 |
-
dotted: parsedNote.dotted,
|
| 122 |
-
accidental: parsedNote.accidental as 'sharp' | 'flat' | 'natural' | undefined,
|
| 123 |
-
isRest: false,
|
| 124 |
-
chordId: currentChordId || undefined, // Assign chord ID for grouping
|
| 125 |
-
};
|
| 126 |
-
|
| 127 |
-
if (isChordMember) {
|
| 128 |
-
// Add to current chord group
|
| 129 |
-
currentChord.push(note);
|
| 130 |
-
} else {
|
| 131 |
-
// Flush previous chord if any
|
| 132 |
-
if (currentChord.length > 0) {
|
| 133 |
-
notes.push(...currentChord);
|
| 134 |
-
currentChord = [];
|
| 135 |
-
}
|
| 136 |
-
// Start new chord group (or single note)
|
| 137 |
-
currentChord = [note];
|
| 138 |
-
}
|
| 139 |
-
}
|
| 140 |
-
});
|
| 141 |
-
|
| 142 |
-
// Flush any remaining chord
|
| 143 |
-
if (currentChord.length > 0) {
|
| 144 |
-
notes.push(...currentChord);
|
| 145 |
-
}
|
| 146 |
-
|
| 147 |
-
// Add ALL measures, even if empty (will show as blank measures)
|
| 148 |
-
measures.push({
|
| 149 |
-
id: `part-${partIdx}-measure-${measureNumber}`,
|
| 150 |
-
number: measureNumber,
|
| 151 |
-
notes,
|
| 152 |
-
});
|
| 153 |
-
});
|
| 154 |
-
|
| 155 |
-
// Add this part to the parts array
|
| 156 |
-
parts.push({
|
| 157 |
-
id: partId,
|
| 158 |
-
name: partName,
|
| 159 |
-
clef,
|
| 160 |
-
measures,
|
| 161 |
-
});
|
| 162 |
-
|
| 163 |
-
// For backward compatibility, use first part's measures
|
| 164 |
-
if (partIdx === 0) {
|
| 165 |
-
allMeasures = measures;
|
| 166 |
-
}
|
| 167 |
-
});
|
| 168 |
-
|
| 169 |
-
// If no parts found, return empty score
|
| 170 |
-
if (parts.length === 0) {
|
| 171 |
-
parts.push({
|
| 172 |
-
id: 'part-0',
|
| 173 |
-
name: 'Piano',
|
| 174 |
-
clef: 'treble',
|
| 175 |
-
measures: [],
|
| 176 |
-
});
|
| 177 |
-
}
|
| 178 |
-
|
| 179 |
-
return {
|
| 180 |
-
id: 'parsed-score',
|
| 181 |
-
title,
|
| 182 |
-
composer,
|
| 183 |
-
key,
|
| 184 |
-
timeSignature,
|
| 185 |
-
tempo,
|
| 186 |
-
parts,
|
| 187 |
-
measures: allMeasures, // Legacy field for backward compat
|
| 188 |
-
};
|
| 189 |
-
}
|
| 190 |
-
|
| 191 |
-
function parseNoteElement(noteEl: Element): ParsedNote | null {
|
| 192 |
-
const durationEl = noteEl.querySelector('duration');
|
| 193 |
-
const typeEl = noteEl.querySelector('type');
|
| 194 |
-
|
| 195 |
-
if (!durationEl || !typeEl) return null;
|
| 196 |
-
|
| 197 |
-
// Check if this is a rest
|
| 198 |
-
const isRest = noteEl.querySelector('rest') !== null;
|
| 199 |
-
|
| 200 |
-
if (isRest) {
|
| 201 |
-
return {
|
| 202 |
-
pitch: '',
|
| 203 |
-
octave: 0,
|
| 204 |
-
duration: parseInt(durationEl.textContent || '0'),
|
| 205 |
-
type: typeEl.textContent || 'quarter',
|
| 206 |
-
dotted: noteEl.querySelector('dot') !== null,
|
| 207 |
-
isRest: true,
|
| 208 |
-
};
|
| 209 |
-
}
|
| 210 |
-
|
| 211 |
-
// Parse pitched note
|
| 212 |
-
const pitchEl = noteEl.querySelector('pitch');
|
| 213 |
-
if (!pitchEl) return null;
|
| 214 |
-
|
| 215 |
-
const step = pitchEl.querySelector('step')?.textContent;
|
| 216 |
-
const octave = pitchEl.querySelector('octave')?.textContent;
|
| 217 |
-
const alter = pitchEl.querySelector('alter')?.textContent; // Semantic pitch alteration
|
| 218 |
-
const accidentalEl = noteEl.querySelector('accidental'); // Visual accidental display
|
| 219 |
-
const dotEl = noteEl.querySelector('dot');
|
| 220 |
-
|
| 221 |
-
if (!step || !octave) return null;
|
| 222 |
-
|
| 223 |
-
// Parse accidental from both <alter> (semantic) and <accidental> (visual) tags
|
| 224 |
-
let accidental: string | undefined;
|
| 225 |
-
|
| 226 |
-
// Priority 1: Use <alter> for pitch accuracy (indicates actual pitch)
|
| 227 |
-
if (alter) {
|
| 228 |
-
const alterValue = parseInt(alter);
|
| 229 |
-
if (alterValue === 1) accidental = 'sharp';
|
| 230 |
-
else if (alterValue === -1) accidental = 'flat';
|
| 231 |
-
else if (alterValue === 0) accidental = 'natural';
|
| 232 |
-
}
|
| 233 |
-
|
| 234 |
-
// Priority 2: If no <alter>, check <accidental> tag (visual notation)
|
| 235 |
-
if (!accidental && accidentalEl) {
|
| 236 |
-
const accType = accidentalEl.textContent;
|
| 237 |
-
if (accType === 'sharp') accidental = 'sharp';
|
| 238 |
-
else if (accType === 'flat') accidental = 'flat';
|
| 239 |
-
else if (accType === 'natural') accidental = 'natural';
|
| 240 |
-
}
|
| 241 |
-
|
| 242 |
-
return {
|
| 243 |
-
pitch: step,
|
| 244 |
-
octave: parseInt(octave),
|
| 245 |
-
duration: parseInt(durationEl.textContent || '0'),
|
| 246 |
-
type: typeEl.textContent || 'quarter',
|
| 247 |
-
accidental,
|
| 248 |
-
dotted: dotEl !== null,
|
| 249 |
-
isRest: false,
|
| 250 |
-
};
|
| 251 |
-
}
|
| 252 |
-
|
| 253 |
-
/**
|
| 254 |
-
* Convert note duration string to seconds based on tempo.
|
| 255 |
-
*/
|
| 256 |
-
export function durationToSeconds(duration: string, tempo: number, dotted: boolean = false): number {
|
| 257 |
-
const quarterNoteDuration = 60 / tempo; // seconds per quarter note
|
| 258 |
-
|
| 259 |
-
const durationMap: Record<string, number> = {
|
| 260 |
-
'whole': quarterNoteDuration * 4,
|
| 261 |
-
'half': quarterNoteDuration * 2,
|
| 262 |
-
'quarter': quarterNoteDuration,
|
| 263 |
-
'eighth': quarterNoteDuration / 2,
|
| 264 |
-
'16th': quarterNoteDuration / 4,
|
| 265 |
-
'32nd': quarterNoteDuration / 8,
|
| 266 |
-
};
|
| 267 |
-
|
| 268 |
-
let baseDuration = durationMap[duration] || quarterNoteDuration;
|
| 269 |
-
|
| 270 |
-
if (dotted) {
|
| 271 |
-
baseDuration *= 1.5;
|
| 272 |
-
}
|
| 273 |
-
|
| 274 |
-
return baseDuration;
|
| 275 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|