calebhan commited on
Commit
0dfd298
·
1 Parent(s): 2b51b9c

updated source separation

Browse files
backend/app_config.py CHANGED
@@ -31,12 +31,13 @@ class Settings(BaseSettings):
31
  gpu_enabled: bool = True
32
  max_video_duration: int = 900 # 15 minutes
33
 
34
- # Transcription Configuration (basic-pitch)
35
- onset_threshold: float = 0.3 # Note onset confidence (0-1). Lower = more notes detected
36
- frame_threshold: float = 0.3 # Frame activation threshold (0-1). Basic-pitch default
37
- minimum_note_length: int = 58 # Minimum note samples (~58ms at 44.1kHz). Basic-pitch default
38
- minimum_frequency_hz: float = 65.0 # C2 (65 Hz) - filter low-frequency noise like F1
39
- maximum_frequency_hz: float | None = None # No upper limit for piano range
 
40
 
41
  # Tempo Detection Configuration
42
  tempo_detection_duration: int = 60 # Seconds of audio to analyze
@@ -66,7 +67,7 @@ class Settings(BaseSettings):
66
 
67
  # Feature Flags
68
  enable_envelope_analysis: bool = True
69
- enable_tie_notation: bool = True
70
 
71
  # Phase 2: Zero-Tradeoff Solutions
72
  # Python compatibility: madmom runtime patch enables Python 3.10+ support
@@ -74,11 +75,17 @@ class Settings(BaseSettings):
74
  use_beat_synchronous_quantization: bool = True # Beat-aligned quantization (eliminates double quantization)
75
 
76
  # Transcription Service Configuration
77
- use_yourmt3_transcription: bool = True # YourMT3+ for 80-85% accuracy (default, falls back to basic-pitch)
78
  transcription_service_url: str = "http://localhost:8000" # Main API URL (YourMT3+ integrated)
79
  transcription_service_timeout: int = 300 # Timeout for transcription requests (seconds)
80
  yourmt3_device: str = _detect_device() # Auto-detect device: 'cuda' (NVIDIA), 'mps' (Apple Silicon), or 'cpu'
81
 
 
 
 
 
 
 
82
  # Grand Staff Configuration
83
  enable_grand_staff: bool = True # Split piano into treble + bass clefs
84
  middle_c_split: int = 60 # MIDI note number for staff split (60 = Middle C)
 
31
  gpu_enabled: bool = True
32
  max_video_duration: int = 900 # 15 minutes
33
 
34
+ # Transcription Configuration (deprecated - kept for API compatibility)
35
+ # These were used by basic-pitch, which has been removed in favor of YourMT3+
36
+ onset_threshold: float = 0.3 # Deprecated
37
+ frame_threshold: float = 0.3 # Deprecated
38
+ minimum_note_length: int = 58 # Deprecated
39
+ minimum_frequency_hz: float = 65.0 # Deprecated
40
+ maximum_frequency_hz: float | None = None # Deprecated
41
 
42
  # Tempo Detection Configuration
43
  tempo_detection_duration: int = 60 # Seconds of audio to analyze
 
67
 
68
  # Feature Flags
69
  enable_envelope_analysis: bool = True
70
+ enable_tie_notation: bool = True # Deprecated (was only used by old generate_musicxml)
71
 
72
  # Phase 2: Zero-Tradeoff Solutions
73
  # Python compatibility: madmom runtime patch enables Python 3.10+ support
 
75
  use_beat_synchronous_quantization: bool = True # Beat-aligned quantization (eliminates double quantization)
76
 
77
  # Transcription Service Configuration
78
+ use_yourmt3_transcription: bool = True # Deprecated (always True now - YourMT3+ is only transcriber)
79
  transcription_service_url: str = "http://localhost:8000" # Main API URL (YourMT3+ integrated)
80
  transcription_service_timeout: int = 300 # Timeout for transcription requests (seconds)
81
  yourmt3_device: str = _detect_device() # Auto-detect device: 'cuda' (NVIDIA), 'mps' (Apple Silicon), or 'cpu'
82
 
83
+ # Source Separation Configuration
84
+ use_two_stage_separation: bool = True # Use BS-RoFormer + Demucs for better quality (vs Demucs only)
85
+ transcribe_vocals: bool = True # Transcribe vocal melody as violin
86
+ vocal_instrument: int = 40 # MIDI program number for vocals (40=Violin, 73=Flute, 65=Alto Sax)
87
+ use_6stem_demucs: bool = True # Use 6-stem Demucs (piano, guitar, drums, bass, other) vs 4-stem
88
+
89
  # Grand Staff Configuration
90
  enable_grand_staff: bool = True # Split piano into treble + bass clefs
91
  middle_c_split: int = 60 # MIDI note number for staff split (60 = Middle C)
backend/audio_separator_wrapper.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Audio Separator Wrapper
3
+
4
+ Provides a clean interface to audio-separator library for 2-stage source separation:
5
+ 1. BS-RoFormer: Remove vocals (SOTA vocal/instrumental separation)
6
+ 2. Demucs: Separate instrumental into piano/guitar/bass/drums/other
7
+
8
+ Based on: https://github.com/nomadkaraoke/python-audio-separator
9
+ """
10
+
11
+ from pathlib import Path
12
+ from typing import Dict, Optional
13
+ import subprocess
14
+ import shutil
15
+ import sys
16
+
17
+
18
+ class AudioSeparator:
19
+ """
20
+ Wrapper for audio-separator with support for multiple separation strategies.
21
+
22
+ Separation strategies:
23
+ 1. Two-stage (vocal removal + instrument separation)
24
+ 2. Direct piano isolation (Demucs 6-stem)
25
+ 3. Legacy Demucs 4-stem (backwards compatibility)
26
+ """
27
+
28
+ def __init__(self, model_dir: Optional[Path] = None):
29
+ """
30
+ Initialize audio separator.
31
+
32
+ Args:
33
+ model_dir: Directory to store downloaded models (default: ~/.audio-separator/)
34
+ """
35
+ self.model_dir = model_dir or Path.home() / ".audio-separator"
36
+ self.model_dir.mkdir(parents=True, exist_ok=True)
37
+
38
+ def separate_vocals(
39
+ self,
40
+ audio_path: Path,
41
+ output_dir: Path,
42
+ model: str = "model_bs_roformer_ep_317_sdr_12.9755.ckpt"
43
+ ) -> Dict[str, Path]:
44
+ """
45
+ Separate vocals from instrumental using BS-RoFormer (SOTA).
46
+
47
+ Args:
48
+ audio_path: Input audio file
49
+ output_dir: Directory for output stems
50
+ model: BS-RoFormer model to use (default: best quality)
51
+
52
+ Returns:
53
+ Dict with keys: 'vocals', 'instrumental'
54
+ """
55
+ output_dir.mkdir(parents=True, exist_ok=True)
56
+
57
+ # Use audio-separator CLI - find it relative to Python executable
58
+ python_bin = Path(sys.executable)
59
+ venv_bin = python_bin.parent
60
+ audio_separator_bin = venv_bin / "audio-separator"
61
+
62
+ # Fall back to PATH if not in venv
63
+ if not audio_separator_bin.exists():
64
+ audio_separator_bin = shutil.which("audio-separator") or "audio-separator"
65
+ else:
66
+ audio_separator_bin = str(audio_separator_bin)
67
+
68
+ # Convert to absolute path for audio-separator
69
+ audio_path_abs = audio_path.resolve()
70
+
71
+ cmd = [
72
+ audio_separator_bin,
73
+ str(audio_path_abs),
74
+ "-m", model,
75
+ "--output_dir", str(output_dir.resolve()),
76
+ "--output_format", "WAV"
77
+ ]
78
+
79
+ if self.model_dir:
80
+ cmd.extend(["--model_file_dir", str(self.model_dir)])
81
+
82
+ result = subprocess.run(cmd, capture_output=True, text=True)
83
+
84
+ # Debug: print stdout/stderr to see what happened
85
+ print(f" [DEBUG] audio-separator return code: {result.returncode}")
86
+ if result.stdout:
87
+ print(f" [DEBUG] stdout: {result.stdout[-1000:]}")
88
+ if result.stderr:
89
+ print(f" [DEBUG] stderr: {result.stderr[-1000:]}")
90
+
91
+ if result.returncode != 0:
92
+ error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
93
+ raise RuntimeError(f"BS-RoFormer vocal separation failed: {error_msg}")
94
+
95
+ # audio-separator creates files with model name appended
96
+ # Pattern: filename_(Vocals)_modelname.wav or filename_(Vocals).wav
97
+
98
+ # Check what files were actually created
99
+ if output_dir.exists():
100
+ actual_files = list(output_dir.glob("*.wav"))
101
+ print(f" [DEBUG] Files created in {output_dir}: {[f.name for f in actual_files]}")
102
+
103
+ # Find vocals and instrumental files by pattern matching
104
+ vocals_files = [f for f in actual_files if "Vocal" in f.name]
105
+ instrumental_files = [f for f in actual_files if "Instrumental" in f.name]
106
+
107
+ if vocals_files and instrumental_files:
108
+ vocals_path = vocals_files[0]
109
+ instrumental_path = instrumental_files[0]
110
+ print(f" ✓ Found vocals: {vocals_path.name}")
111
+ print(f" ✓ Found instrumental: {instrumental_path.name}")
112
+ else:
113
+ raise RuntimeError(f"Could not find output files. Found: {[f.name for f in actual_files]}")
114
+ else:
115
+ raise RuntimeError(f"Output directory {output_dir} does not exist")
116
+
117
+ return {
118
+ 'vocals': vocals_path,
119
+ 'instrumental': instrumental_path
120
+ }
121
+
122
+ def separate_instruments_demucs(
123
+ self,
124
+ audio_path: Path,
125
+ output_dir: Path,
126
+ stems: int = 6
127
+ ) -> Dict[str, Path]:
128
+ """
129
+ Separate instrumental audio into individual instruments using Demucs.
130
+
131
+ Args:
132
+ audio_path: Input audio file (should be instrumental, vocals already removed)
133
+ output_dir: Directory for output stems
134
+ stems: Number of stems (4 or 6)
135
+ 4-stem: vocals, drums, bass, other
136
+ 6-stem: vocals, drums, bass, guitar, piano, other
137
+
138
+ Returns:
139
+ Dict with stem names as keys and paths as values
140
+ """
141
+ output_dir.mkdir(parents=True, exist_ok=True)
142
+
143
+ # Use Demucs directly for instrument separation
144
+ model = "htdemucs_6s" if stems == 6 else "htdemucs"
145
+
146
+ # Find demucs binary relative to Python executable
147
+ python_bin = Path(sys.executable)
148
+ venv_bin = python_bin.parent
149
+ demucs_bin = venv_bin / "demucs"
150
+
151
+ # Fall back to PATH if not in venv
152
+ if not demucs_bin.exists():
153
+ demucs_bin = shutil.which("demucs") or "demucs"
154
+ else:
155
+ demucs_bin = str(demucs_bin)
156
+
157
+ # Convert to absolute path for demucs
158
+ audio_path_abs = audio_path.resolve()
159
+
160
+ cmd = [
161
+ demucs_bin,
162
+ "-n", model,
163
+ "-o", str(output_dir.resolve()),
164
+ str(audio_path_abs)
165
+ ]
166
+
167
+ result = subprocess.run(cmd, capture_output=True, text=True)
168
+
169
+ if result.returncode != 0:
170
+ error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
171
+ raise RuntimeError(f"Demucs instrument separation failed: {error_msg}")
172
+
173
+ # Demucs creates: output_dir/model_name/audio_stem/*.wav
174
+ demucs_output = output_dir / model / audio_path.stem
175
+
176
+ if stems == 6:
177
+ stem_files = {
178
+ 'vocals': demucs_output / "vocals.wav",
179
+ 'drums': demucs_output / "drums.wav",
180
+ 'bass': demucs_output / "bass.wav",
181
+ 'guitar': demucs_output / "guitar.wav",
182
+ 'piano': demucs_output / "piano.wav",
183
+ 'other': demucs_output / "other.wav",
184
+ }
185
+ else:
186
+ stem_files = {
187
+ 'vocals': demucs_output / "vocals.wav",
188
+ 'drums': demucs_output / "drums.wav",
189
+ 'bass': demucs_output / "bass.wav",
190
+ 'other': demucs_output / "other.wav",
191
+ }
192
+
193
+ # Verify all expected stems exist
194
+ missing = [name for name, path in stem_files.items() if not path.exists()]
195
+ if missing:
196
+ raise RuntimeError(f"Missing expected stems: {missing}")
197
+
198
+ return stem_files
199
+
200
+ def two_stage_separation(
201
+ self,
202
+ audio_path: Path,
203
+ output_dir: Path,
204
+ instrument_stems: int = 6
205
+ ) -> Dict[str, Path]:
206
+ """
207
+ Two-stage separation for optimal quality:
208
+ 1. Remove vocals with BS-RoFormer (SOTA vocal separation)
209
+ 2. Separate clean instrumental with Demucs 6-stem (piano, guitar, drums, bass, other)
210
+
211
+ Args:
212
+ audio_path: Input audio file
213
+ output_dir: Directory for output stems
214
+ instrument_stems: Number of instrument stems (4 or 6)
215
+
216
+ Returns:
217
+ Dict with all stems: vocals, piano, guitar, drums, bass, other
218
+ """
219
+ output_dir.mkdir(parents=True, exist_ok=True)
220
+
221
+ # Stage 1: Remove vocals with BS-RoFormer
222
+ print(" Stage 1: Separating vocals with BS-RoFormer...")
223
+ vocal_dir = output_dir / "stage1_vocals"
224
+ vocal_stems = self.separate_vocals(audio_path, vocal_dir)
225
+
226
+ # Stage 2: Separate instrumental with Demucs
227
+ print(f" Stage 2: Separating instruments with Demucs {instrument_stems}-stem...")
228
+ instrument_dir = output_dir / "stage2_instruments"
229
+ instrument_stems_dict = self.separate_instruments_demucs(
230
+ vocal_stems['instrumental'],
231
+ instrument_dir,
232
+ stems=instrument_stems
233
+ )
234
+
235
+ # Combine results (vocals from stage 1, instruments from stage 2)
236
+ all_stems = {
237
+ 'vocals': vocal_stems['vocals'], # From BS-RoFormer (clean)
238
+ }
239
+
240
+ # Add all instrument stems except the duplicate vocals stem from Demucs
241
+ for name, path in instrument_stems_dict.items():
242
+ if name != 'vocals': # Skip Demucs vocals (we have better ones from BS-RoFormer)
243
+ all_stems[name] = path
244
+
245
+ print(f" ✓ 2-stage separation complete: {list(all_stems.keys())}")
246
+
247
+ return all_stems
248
+
249
+
250
+ if __name__ == "__main__":
251
+ # Test the separator
252
+ import argparse
253
+
254
+ parser = argparse.ArgumentParser(description="Test Audio Separator")
255
+ parser.add_argument("audio_file", type=str, help="Path to audio file")
256
+ parser.add_argument("--output", type=str, default="./output_stems",
257
+ help="Output directory for stems")
258
+ parser.add_argument("--mode", type=str, default="two-stage",
259
+ choices=["vocals", "instruments", "two-stage"],
260
+ help="Separation mode")
261
+ args = parser.parse_args()
262
+
263
+ separator = AudioSeparator()
264
+ audio_path = Path(args.audio_file)
265
+ output_dir = Path(args.output)
266
+
267
+ if args.mode == "vocals":
268
+ stems = separator.separate_vocals(audio_path, output_dir)
269
+ print(f"Vocal separation complete:")
270
+ for name, path in stems.items():
271
+ print(f" {name}: {path}")
272
+
273
+ elif args.mode == "instruments":
274
+ stems = separator.separate_instruments_demucs(audio_path, output_dir, stems=6)
275
+ print(f"Instrument separation complete:")
276
+ for name, path in stems.items():
277
+ print(f" {name}: {path}")
278
+
279
+ elif args.mode == "two-stage":
280
+ stems = separator.two_stage_separation(audio_path, output_dir, instrument_stems=6)
281
+ print(f"2-stage separation complete:")
282
+ for name, path in stems.items():
283
+ print(f" {name}: {path}")
backend/evaluation/benchmark.py CHANGED
@@ -10,6 +10,12 @@ from dataclasses import dataclass, asdict
10
  from pathlib import Path
11
  from typing import List, Dict, Optional
12
  import pandas as pd
 
 
 
 
 
 
13
 
14
  from evaluation.metrics import calculate_metrics, TranscriptionMetrics
15
 
@@ -19,7 +25,7 @@ class TestCase:
19
  """Represents a single test case for benchmarking."""
20
  name: str # Descriptive name (e.g., "Chopin_Nocturne_Op9_No2")
21
  audio_path: Path # Path to audio file (WAV/MP3)
22
- ground_truth_midi: Path # Path to ground truth MIDI file
23
  genre: str = "classical" # Genre: classical, pop, jazz, simple
24
  difficulty: str = "medium" # Difficulty: easy, medium, hard
25
  duration: Optional[float] = None # Duration in seconds
@@ -29,7 +35,7 @@ class TestCase:
29
  return {
30
  'name': self.name,
31
  'audio_path': str(self.audio_path),
32
- 'ground_truth_midi': str(self.ground_truth_midi),
33
  'genre': self.genre,
34
  'difficulty': self.difficulty,
35
  'duration': self.duration
@@ -38,10 +44,11 @@ class TestCase:
38
  @classmethod
39
  def from_dict(cls, data: dict) -> 'TestCase':
40
  """Create TestCase from dictionary."""
 
41
  return cls(
42
  name=data['name'],
43
  audio_path=Path(data['audio_path']),
44
- ground_truth_midi=Path(data['ground_truth_midi']),
45
  genre=data.get('genre', 'classical'),
46
  difficulty=data.get('difficulty', 'medium'),
47
  duration=data.get('duration')
@@ -138,18 +145,28 @@ class TranscriptionBenchmark:
138
 
139
  print(f"✅ Transcription completed in {processing_time:.1f}s")
140
 
141
- # Calculate metrics
142
- metrics = calculate_metrics(
143
- predicted_midi,
144
- test_case.ground_truth_midi,
145
- onset_tolerance=self.onset_tolerance
146
- )
147
-
148
- print(f"\n📊 Results:")
149
- print(f" F1 Score: {metrics.f1_score:.3f}")
150
- print(f" Precision: {metrics.precision:.3f}")
151
- print(f" Recall: {metrics.recall:.3f}")
152
- print(f" Onset MAE: {metrics.onset_mae*1000:.1f}ms")
 
 
 
 
 
 
 
 
 
 
153
 
154
  return BenchmarkResult(
155
  test_case_name=test_case.name,
 
10
  from pathlib import Path
11
  from typing import List, Dict, Optional
12
  import pandas as pd
13
+ import sys
14
+
15
+ # Add backend directory to path for imports
16
+ backend_dir = Path(__file__).parent.parent
17
+ if str(backend_dir) not in sys.path:
18
+ sys.path.insert(0, str(backend_dir))
19
 
20
  from evaluation.metrics import calculate_metrics, TranscriptionMetrics
21
 
 
25
  """Represents a single test case for benchmarking."""
26
  name: str # Descriptive name (e.g., "Chopin_Nocturne_Op9_No2")
27
  audio_path: Path # Path to audio file (WAV/MP3)
28
+ ground_truth_midi: Optional[Path] = None # Path to ground truth MIDI file (None for manual review)
29
  genre: str = "classical" # Genre: classical, pop, jazz, simple
30
  difficulty: str = "medium" # Difficulty: easy, medium, hard
31
  duration: Optional[float] = None # Duration in seconds
 
35
  return {
36
  'name': self.name,
37
  'audio_path': str(self.audio_path),
38
+ 'ground_truth_midi': str(self.ground_truth_midi) if self.ground_truth_midi else None,
39
  'genre': self.genre,
40
  'difficulty': self.difficulty,
41
  'duration': self.duration
 
44
  @classmethod
45
  def from_dict(cls, data: dict) -> 'TestCase':
46
  """Create TestCase from dictionary."""
47
+ ground_truth = data.get('ground_truth_midi')
48
  return cls(
49
  name=data['name'],
50
  audio_path=Path(data['audio_path']),
51
+ ground_truth_midi=Path(ground_truth) if ground_truth else None,
52
  genre=data.get('genre', 'classical'),
53
  difficulty=data.get('difficulty', 'medium'),
54
  duration=data.get('duration')
 
145
 
146
  print(f"✅ Transcription completed in {processing_time:.1f}s")
147
 
148
+ # Calculate metrics only if ground truth is available
149
+ if test_case.ground_truth_midi:
150
+ metrics = calculate_metrics(
151
+ predicted_midi,
152
+ test_case.ground_truth_midi,
153
+ onset_tolerance=self.onset_tolerance
154
+ )
155
+
156
+ print(f"\n📊 Results:")
157
+ print(f" F1 Score: {metrics.f1_score:.3f}")
158
+ print(f" Precision: {metrics.precision:.3f}")
159
+ print(f" Recall: {metrics.recall:.3f}")
160
+ print(f" Onset MAE: {metrics.onset_mae*1000:.1f}ms")
161
+ else:
162
+ # No ground truth - create placeholder metrics for manual review
163
+ print(f"\n📝 No ground truth available - MIDI saved for manual review")
164
+ print(f" Output: {predicted_midi}")
165
+ metrics = TranscriptionMetrics(
166
+ precision=0.0, recall=0.0, f1_score=0.0,
167
+ onset_mae=0.0, pitch_accuracy=0.0,
168
+ true_positives=0, false_positives=0, false_negatives=0
169
+ )
170
 
171
  return BenchmarkResult(
172
  test_case_name=test_case.name,
backend/evaluation/results/yourmt3_midi/MIDI-Unprocessed_SMF_02_R1_2004_01-05_ORIG_MID--AUDIO_02_R1_2004_05_Track05_wav.mid DELETED
Binary file (56.9 kB)
 
backend/evaluation/results/yourmt3_results.csv DELETED
@@ -1,2 +0,0 @@
1
- test_case,genre,difficulty,f1_score,precision,recall,onset_mae,pitch_accuracy,true_positives,false_positives,false_negatives,processing_time,success,error
2
- MAESTRO_2004_AUDIO 02 R1 2004 05 Track05 wav,classical,easy,0.9831072434092655,0.9932764416860616,0.9731441601216113,0.005870731363360242,1.0,7682,52,212,114.05283236503601,True,
 
 
 
backend/evaluation/results/yourmt3_results.json DELETED
@@ -1,18 +0,0 @@
1
- [
2
- {
3
- "test_case": "MAESTRO_2004_AUDIO 02 R1 2004 05 Track05 wav",
4
- "genre": "classical",
5
- "difficulty": "easy",
6
- "f1_score": 0.9831072434092655,
7
- "precision": 0.9932764416860616,
8
- "recall": 0.9731441601216113,
9
- "onset_mae": 0.005870731363360242,
10
- "pitch_accuracy": 1.0,
11
- "true_positives": 7682,
12
- "false_positives": 52,
13
- "false_negatives": 212,
14
- "processing_time": 114.05283236503601,
15
- "success": true,
16
- "error": null
17
- }
18
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
backend/pipeline.py CHANGED
@@ -11,8 +11,7 @@ from typing import Optional
11
  import mido
12
  import librosa
13
  import numpy as np
14
- from basic_pitch.inference import predict_and_save
15
- from basic_pitch import ICASSP_2022_MODEL_PATH
16
  from music21 import converter, key, meter, tempo, note, clef, stream, chord as m21_chord
17
 
18
  # Phase 2: Zero-tradeoff solutions with Python 3.10+ compatibility patch
@@ -91,13 +90,9 @@ class TranscriptionPipeline:
91
  self.final_midi_path = midi_path
92
 
93
  self.progress(90, "musicxml", "Generating MusicXML")
94
- # Use minimal generator for YourMT3+, full generator for basic-pitch
95
- if self.config.use_yourmt3_transcription:
96
- print(f" Using minimal MusicXML generation (YourMT3+)")
97
- musicxml_path = self.generate_musicxml_minimal(midi_path, stems['other'])
98
- else:
99
- print(f" Using full MusicXML generation (basic-pitch)")
100
- musicxml_path = self.generate_musicxml(midi_path)
101
 
102
  self.progress(100, "complete", "Transcription complete")
103
  return musicxml_path
@@ -179,75 +174,23 @@ class TranscriptionPipeline:
179
  minimum_note_length: int = None
180
  ) -> Path:
181
  """
182
- Transcribe audio to MIDI using basic-pitch.
183
 
184
  Args:
185
  audio_path: Path to audio file (should be 'other' stem for piano)
186
- onset_threshold: Note onset confidence (0-1). Higher = fewer false positives
187
- frame_threshold: Frame activation threshold (0-1)
188
- minimum_note_length: Minimum note duration in samples (~58ms at 44.1kHz)
189
 
190
  Returns:
191
  Path to generated MIDI file
192
  """
193
- # Use config defaults if not specified
194
- if onset_threshold is None:
195
- onset_threshold = self.config.onset_threshold
196
- if frame_threshold is None:
197
- frame_threshold = self.config.frame_threshold
198
- if minimum_note_length is None:
199
- minimum_note_length = self.config.minimum_note_length
200
-
201
  output_dir = self.temp_dir
202
 
203
- # === STEP 1: Try YourMT3+ first (primary transcriber) ===
204
- use_yourmt3 = self.config.use_yourmt3_transcription
205
- midi_path = None
206
-
207
- if use_yourmt3:
208
- try:
209
- print(f" Transcribing with YourMT3+ (primary transcriber)...")
210
- midi_path = self.transcribe_with_yourmt3(audio_path)
211
- print(f" ✓ YourMT3+ transcription complete")
212
- except Exception as e:
213
- import traceback
214
- print(f" ⚠ YourMT3+ failed: {e}")
215
- print(f" Full error: {traceback.format_exc()}")
216
- print(f" → Falling back to basic-pitch")
217
- midi_path = None
218
-
219
- # === STEP 2: Fallback to basic-pitch if YourMT3+ failed or disabled ===
220
- if midi_path is None:
221
- print(f" Transcribing with basic-pitch (onset={onset_threshold}, frame={frame_threshold})...")
222
-
223
- # Run basic-pitch inference
224
- # predict_and_save creates output files in the output directory
225
- predict_and_save(
226
- audio_path_list=[str(audio_path)],
227
- output_directory=str(output_dir),
228
- save_midi=True,
229
- sonify_midi=False, # Don't create audio
230
- save_model_outputs=False, # Don't save raw outputs
231
- save_notes=False, # Don't save CSV
232
- model_or_model_path=ICASSP_2022_MODEL_PATH,
233
- onset_threshold=onset_threshold,
234
- frame_threshold=frame_threshold,
235
- minimum_note_length=minimum_note_length,
236
- minimum_frequency=self.config.minimum_frequency_hz, # Filter low-frequency noise (F1)
237
- maximum_frequency=self.config.maximum_frequency_hz, # No upper limit
238
- multiple_pitch_bends=False,
239
- melodia_trick=True, # Improves monophonic melody
240
- debug_file=None
241
- )
242
-
243
- # basic-pitch saves as {audio_stem}_basic_pitch.mid
244
- generated_bp_midi = output_dir / f"{audio_path.stem}_basic_pitch.mid"
245
-
246
- if not generated_bp_midi.exists():
247
- raise RuntimeError("basic-pitch did not create MIDI file")
248
-
249
- midi_path = generated_bp_midi
250
- print(f" ✓ basic-pitch transcription complete")
251
 
252
  # Rename final MIDI to standard name for post-processing
253
  final_midi_path = output_dir / "piano.mid"
@@ -1094,163 +1037,6 @@ class TranscriptionPipeline:
1094
 
1095
  return midi_path
1096
 
1097
- def generate_musicxml(self, midi_path: Path) -> Path:
1098
- """
1099
- Convert MIDI to MusicXML with intelligent metadata detection and normalization.
1100
-
1101
- New pipeline order (optimized):
1102
- 1. Detect metadata from audio (tempo, time signature)
1103
- 2. Parse MIDI
1104
- 3. Detect key (ensemble)
1105
- 4. Insert metadata
1106
- 5. Deduplicate overlapping notes
1107
- 6. Add clef
1108
- 7. makeMeasures()
1109
- 8. Normalize measure durations
1110
- 9. Validate measures
1111
- 10. Export MusicXML
1112
-
1113
- Args:
1114
- midi_path: Path to input MIDI file
1115
-
1116
- Returns:
1117
- Path to output MusicXML file
1118
- """
1119
- self.progress(92, "musicxml", "Detecting metadata from audio")
1120
-
1121
- # Step 1: Detect metadata from audio BEFORE parsing MIDI
1122
- audio_path = self.temp_dir / "audio.wav"
1123
-
1124
- if audio_path.exists():
1125
- # Detect tempo
1126
- detected_tempo, tempo_confidence = self.detect_tempo_from_audio(audio_path)
1127
-
1128
- # Detect time signature (needs tempo)
1129
- time_sig_num, time_sig_denom, ts_confidence = self.detect_time_signature(
1130
- audio_path, detected_tempo
1131
- )
1132
- else:
1133
- # Fallback if audio not available
1134
- print(" WARNING: Audio file not found, using defaults")
1135
- detected_tempo, tempo_confidence = 120.0, 0.0
1136
- time_sig_num, time_sig_denom, ts_confidence = 4, 4, 0.0
1137
-
1138
- self.progress(93, "musicxml", "Parsing MIDI")
1139
-
1140
- # Step 2: Parse MIDI
1141
- score = converter.parse(midi_path)
1142
-
1143
- self.progress(94, "musicxml", "Detecting key signature")
1144
-
1145
- # Step 3: Detect key using ensemble methods
1146
- detected_key, key_confidence = self.detect_key_ensemble(score, audio_path)
1147
-
1148
- self.progress(95, "musicxml", "Deduplicating overlapping notes")
1149
-
1150
- # Step 4: Deduplicate overlapping notes (prevent polyphony issues)
1151
- score = self._deduplicate_overlapping_notes(score)
1152
-
1153
- # Step 4.5: Merge sequential notes at music21 level (fixes Issue #8 - tiny rests)
1154
- # This fixes tiny rests from MIDI→music21 precision loss
1155
- # Increased from 0.02 to 0.08 to catch gaps created by quantization (125ms at 120 BPM)
1156
- self.progress(95, "musicxml", "Merging sequential notes")
1157
- score = self._merge_music21_notes(score, gap_threshold_qn=0.08)
1158
-
1159
- # Step 5: Clean up any very short durations BEFORE makeMeasures
1160
- # This prevents music21 from creating impossible tuplets
1161
- for part in score.parts:
1162
- for element in part.flatten().notesAndRests:
1163
- if element.quarterLength < 0.0625: # Shorter than 64th note
1164
- element.quarterLength = 0.0625 # Round up to 64th note
1165
-
1166
- self.progress(96, "musicxml", "Creating measures")
1167
-
1168
- # Step 6: Create measures FIRST (required before grand staff split)
1169
- score = score.makeMeasures()
1170
-
1171
- # Step 7: Split into grand staff (treble + bass clefs) if enabled
1172
- if self.config.enable_grand_staff:
1173
- print(f" Splitting into grand staff (split at MIDI note {self.config.middle_c_split})...")
1174
- score = self._split_into_grand_staff(score)
1175
- print(f" Created {len(score.parts)} staves (treble + bass)")
1176
-
1177
- # Insert metadata into each part (grand staff creates new parts without metadata)
1178
- for part in score.parts:
1179
- # Get the first measure
1180
- measures = part.getElementsByClass('Measure')
1181
- if measures:
1182
- first_measure = measures[0]
1183
- # Insert key, time signature, and tempo into first measure
1184
- first_measure.insert(0, tempo.MetronomeMark(number=detected_tempo))
1185
- first_measure.insert(0, detected_key)
1186
- first_measure.insert(0, meter.TimeSignature(f'{time_sig_num}/{time_sig_denom}'))
1187
- else:
1188
- # Single staff: add treble clef and metadata
1189
- for part in score.parts:
1190
- part.insert(0, clef.TrebleClef())
1191
- part.insert(0, detected_key)
1192
- part.insert(0, meter.TimeSignature(f'{time_sig_num}/{time_sig_denom}'))
1193
- part.insert(0, tempo.MetronomeMark(number=detected_tempo))
1194
- part.partName = "Piano"
1195
-
1196
- # Step 7.5: Add tie notation for sustained notes across measure boundaries
1197
- if self.config.enable_tie_notation:
1198
- print(" Adding ties for sustained notes...")
1199
- score = self._add_ties_to_score(score)
1200
-
1201
- self.progress(97, "musicxml", "Normalizing measure durations")
1202
-
1203
- # Step 8: Remove impossible durations that makeMeasures created
1204
- score = self._remove_impossible_durations(score)
1205
-
1206
- # Step 9: Fix tuplets with impossible durations
1207
- score = self._fix_tuplet_durations(score)
1208
-
1209
- # Step 10: Normalize measure durations
1210
- score = self._normalize_measure_durations(score, time_sig_num, time_sig_denom)
1211
-
1212
- # Step 10.5: Fix any NEW impossible tuplets created during normalization
1213
- # Normalization might add rests that music21 assigns tuplets to
1214
- score = self._fix_tuplet_durations(score)
1215
-
1216
- # Step 11: Validate measures (logging only)
1217
- self._validate_measures(score)
1218
-
1219
- self.progress(98, "musicxml", "Writing MusicXML file")
1220
-
1221
- # Write MusicXML with proper error handling
1222
- output_path = self.temp_dir / f"{self.job_id}.musicxml"
1223
-
1224
- try:
1225
- # Use makeNotation=False to prevent music21 from auto-generating tuplets
1226
- score.write('musicxml', fp=str(output_path), makeNotation=False)
1227
- except Exception as e:
1228
- error_msg = str(e)
1229
-
1230
- # If still getting 2048th note errors after our normalization,
1231
- # it means music21 is creating them during export (not our fault)
1232
- if 'Cannot convert "2048th" duration to MusicXML' in error_msg or \
1233
- 'Cannot convert "4096th" duration to MusicXML' in error_msg:
1234
-
1235
- print(f" ERROR: music21 generated impossible duration during export: {error_msg}")
1236
- print(f" This is a music21 bug. Try re-running with different tempo/time signature.")
1237
-
1238
- # Last resort: try exporting as MIDI instead
1239
- midi_fallback = self.temp_dir / f"{self.job_id}_fallback.mid"
1240
- score.write('midi', fp=str(midi_fallback))
1241
- print(f" Created fallback MIDI export: {midi_fallback}")
1242
-
1243
- raise RuntimeError(
1244
- f"MusicXML export failed due to music21 bug. "
1245
- f"MIDI fallback created at {midi_fallback}. "
1246
- f"Original error: {error_msg}"
1247
- )
1248
- else:
1249
- # Different error, re-raise
1250
- raise
1251
-
1252
- return output_path
1253
-
1254
  def generate_musicxml_minimal(self, midi_path: Path, source_audio: Path) -> Path:
1255
  """
1256
  Generate MusicXML from clean MIDI (YourMT3+ output) with minimal post-processing.
@@ -1387,298 +1173,9 @@ class TranscriptionPipeline:
1387
  print(f" ✓ MusicXML generation complete")
1388
  return output_path
1389
 
1390
- def _deduplicate_overlapping_notes(self, score) -> stream.Score:
1391
- """
1392
- Deduplicate overlapping notes from basic-pitch to prevent MusicXML corruption.
1393
-
1394
- Problem: basic-pitch outputs multiple notes at the same timestamp for polyphonic detection.
1395
- When music21's makeMeasures() processes these, it creates measures with >4.0 beats.
1396
-
1397
- Solution: Group simultaneous notes (within 10ms) into chords, merge duplicate pitches.
1398
-
1399
- Args:
1400
- score: music21 Score object before makeMeasures()
1401
-
1402
- Returns:
1403
- Cleaned score with deduplicated notes
1404
- """
1405
- from music21 import stream, note, chord as m21_chord
1406
- from collections import defaultdict
1407
-
1408
- # Process each part
1409
- for part in score.parts:
1410
- # Collect all notes with their absolute offsets
1411
- notes_by_time = defaultdict(list) # bucket -> [notes]
1412
-
1413
- for element in part.flatten().notesAndRests:
1414
- if isinstance(element, note.Rest):
1415
- continue # Skip rests for deduplication
1416
-
1417
- # Get absolute offset in quarter notes
1418
- offset_qn = element.offset
1419
-
1420
- # Bucket notes that are within 0.005 quarter notes of each other (~5ms at 120 BPM)
1421
- # Finer resolution prevents chord notes from splitting into separate buckets
1422
- bucket = round(offset_qn / 0.005) * 0.005
1423
-
1424
- if isinstance(element, note.Note):
1425
- notes_by_time[bucket].append(element)
1426
- elif isinstance(element, m21_chord.Chord):
1427
- # Explode chords into individual notes for deduplication
1428
- for pitch in element.pitches:
1429
- n = note.Note(pitch)
1430
- n.quarterLength = element.quarterLength
1431
- n.offset = element.offset
1432
- notes_by_time[bucket].append(n)
1433
-
1434
- # Rebuild part with deduplicated notes
1435
- new_part = stream.Part()
1436
-
1437
- # Copy metadata (key, tempo, time signature will be added later)
1438
- new_part.id = part.id
1439
- new_part.partName = part.partName
1440
-
1441
- for bucket_qn in sorted(notes_by_time.keys()):
1442
- bucket_notes = notes_by_time[bucket_qn]
1443
-
1444
- if not bucket_notes:
1445
- continue
1446
-
1447
- # Group by pitch to remove duplicates
1448
- pitch_groups = defaultdict(list)
1449
- for n in bucket_notes:
1450
- pitch_groups[n.pitch.midi].append(n)
1451
-
1452
- # For each unique pitch, keep the note with longest duration
1453
- unique_notes = []
1454
- for midi_pitch, pitch_notes in pitch_groups.items():
1455
- # Sort by duration (longest first)
1456
- # Get velocity as integer for comparison (handle None values)
1457
- def get_velocity(note):
1458
- if hasattr(note, 'volume') and hasattr(note.volume, 'velocity'):
1459
- vel = note.volume.velocity
1460
- return vel if vel is not None else 64
1461
- return 64
1462
-
1463
- pitch_notes.sort(key=lambda x: (x.quarterLength, get_velocity(x)), reverse=True)
1464
- best_note = pitch_notes[0]
1465
-
1466
- # Filter out extremely short notes (< 64th note = 0.0625 quarter notes)
1467
- # MusicXML can't handle notes shorter than 1024th
1468
- if best_note.quarterLength >= 0.0625:
1469
- unique_notes.append(best_note)
1470
-
1471
- if not unique_notes:
1472
- continue # Skip if all notes were too short
1473
-
1474
- # Use bucket quarter note offset directly
1475
- offset_qn = bucket_qn
1476
-
1477
- if len(unique_notes) == 1:
1478
- # Single note - snap duration to avoid impossible tuplets
1479
- n = note.Note(unique_notes[0].pitch)
1480
- n.quarterLength = self._snap_duration(unique_notes[0].quarterLength)
1481
- new_part.insert(offset_qn, n)
1482
- elif len(unique_notes) > 1:
1483
- # Multiple notes at same time -> create chord
1484
- # Use the shortest duration to avoid overlaps, then snap
1485
- min_duration = min(n.quarterLength for n in unique_notes)
1486
-
1487
- c = m21_chord.Chord([n.pitch for n in unique_notes])
1488
- c.quarterLength = self._snap_duration(min_duration)
1489
- new_part.insert(offset_qn, c)
1490
-
1491
- # Replace old part with new part
1492
- score.replace(part, new_part)
1493
-
1494
- return score
1495
-
1496
- def _merge_music21_notes(self, score, gap_threshold_qn: float = 0.02) -> stream.Score:
1497
- """
1498
- Merge sequential notes of same pitch with small gaps at music21 level.
1499
-
1500
- Fixes tiny rests created by makeMeasures() from MIDI→music21 precision loss.
1501
- MUST run AFTER deduplication but BEFORE makeMeasures.
1502
-
1503
- Args:
1504
- score: music21 Score (before makeMeasures)
1505
- gap_threshold_qn: Max gap to merge (0.02 QN ≈ 20ms @ 120 BPM)
1506
-
1507
- Returns:
1508
- Score with merged sequential notes
1509
- """
1510
- from music21 import stream, note, chord as m21_chord
1511
- from collections import defaultdict
1512
-
1513
- for part in score.parts:
1514
- # Collect all notes with timing
1515
- elements_with_offsets = []
1516
-
1517
- for element in part.flatten().notesAndRests:
1518
- if isinstance(element, note.Rest):
1519
- continue
1520
-
1521
- offset_qn = element.offset
1522
- duration_qn = element.quarterLength
1523
-
1524
- if isinstance(element, note.Note):
1525
- elements_with_offsets.append({
1526
- 'offset': offset_qn,
1527
- 'end': offset_qn + duration_qn,
1528
- 'pitch': element.pitch.midi,
1529
- 'element': element
1530
- })
1531
- elif isinstance(element, m21_chord.Chord):
1532
- # Track each chord pitch separately
1533
- for pitch in element.pitches:
1534
- elements_with_offsets.append({
1535
- 'offset': offset_qn,
1536
- 'end': offset_qn + duration_qn,
1537
- 'pitch': pitch.midi,
1538
- 'element': element,
1539
- 'chord_id': id(element) # Prevent merging same-chord notes
1540
- })
1541
-
1542
- # Group by pitch and sort
1543
- notes_by_pitch = defaultdict(list)
1544
- for elem in elements_with_offsets:
1545
- notes_by_pitch[elem['pitch']].append(elem)
1546
-
1547
- for pitch in notes_by_pitch:
1548
- notes_by_pitch[pitch].sort(key=lambda x: x['offset'])
1549
-
1550
- # Track modifications
1551
- elements_to_remove = set()
1552
- duration_updates = {}
1553
-
1554
- # Merge within each pitch group
1555
- for pitch, note_list in notes_by_pitch.items():
1556
- i = 0
1557
- while i < len(note_list):
1558
- current = note_list[i]
1559
-
1560
- # Look ahead for mergeable notes
1561
- j = i + 1
1562
- while j < len(note_list):
1563
- next_note = note_list[j]
1564
- gap = next_note['offset'] - current['end']
1565
-
1566
- if gap <= gap_threshold_qn:
1567
- # Don't merge notes from SAME chord
1568
- if ('chord_id' in current and 'chord_id' in next_note and
1569
- current['chord_id'] == next_note['chord_id']):
1570
- break
1571
-
1572
- # Extend current to cover gap + next note
1573
- new_end = next_note['end']
1574
- new_duration = new_end - current['offset']
1575
-
1576
- duration_updates[id(current['element'])] = new_duration
1577
- current['end'] = new_end
1578
-
1579
- elements_to_remove.add(id(next_note['element']))
1580
- j += 1
1581
- else:
1582
- break
1583
-
1584
- i = j if j > i + 1 else i + 1
1585
-
1586
- # Rebuild part with modifications
1587
- new_part = stream.Part()
1588
- new_part.id = part.id
1589
- new_part.partName = part.partName
1590
-
1591
- for element in part.flatten().notesAndRests:
1592
- elem_id = id(element)
1593
-
1594
- if elem_id in elements_to_remove:
1595
- continue
1596
-
1597
- if elem_id in duration_updates:
1598
- element.quarterLength = duration_updates[elem_id]
1599
-
1600
- new_part.insert(element.offset, element)
1601
-
1602
- score.replace(part, new_part)
1603
-
1604
- return score
1605
-
1606
- def _add_ties_to_score(self, score) -> stream.Score:
1607
- """
1608
- Add tie notation to notes that span measure boundaries.
1609
-
1610
- Uses music21's tie.Tie class:
1611
- - 'start': Beginning of tied note
1612
- - 'stop': End of tied note
1613
-
1614
- Args:
1615
- score: music21 Score object
1616
-
1617
- Returns:
1618
- Score with tie notation added
1619
- """
1620
- from music21 import tie
1621
-
1622
- for part in score.parts:
1623
- measures = list(part.getElementsByClass('Measure'))
1624
-
1625
- # Get time signature to determine expected measure length
1626
- ts = part.getElementsByClass('TimeSignature')
1627
- expected_measure_length = ts[0].barDuration.quarterLength if ts else 4.0
1628
-
1629
- for measure_idx, measure in enumerate(measures):
1630
- # Get the time signature for this measure if it changed
1631
- measure_ts = measure.getElementsByClass('TimeSignature')
1632
- if measure_ts:
1633
- expected_measure_length = measure_ts[0].barDuration.quarterLength
1634
-
1635
- for element in measure.notesAndRests:
1636
- if not isinstance(element, note.Note):
1637
- continue
1638
-
1639
- # Check if note extends beyond measure boundary
1640
- # Use expected_measure_length from time signature, not barDuration
1641
- # which may have been auto-expanded by music21
1642
- element_end = element.offset + element.quarterLength
1643
-
1644
- if element_end > expected_measure_length + 0.01: # Tolerance for floating point
1645
- # Note crosses boundary - add 'start' tie
1646
- element.tie = tie.Tie('start')
1647
-
1648
- # Find continuation in next measure and add 'stop' tie
1649
- if measure_idx + 1 < len(measures):
1650
- next_measure = measures[measure_idx + 1]
1651
- for next_elem in next_measure.notesAndRests:
1652
- if (isinstance(next_elem, note.Note) and
1653
- next_elem.pitch.midi == element.pitch.midi and
1654
- next_elem.offset < 0.1): # At start of measure
1655
- next_elem.tie = tie.Tie('stop')
1656
- break
1657
-
1658
- return score
1659
-
1660
- def _snap_duration(self, duration) -> float:
1661
- """
1662
- Snap duration to nearest MusicXML-valid note value to avoid impossible tuplets.
1663
 
1664
- Valid durations: whole (4.0), half (2.0), quarter (1.0), eighth (0.5),
1665
- sixteenth (0.25), thirty-second (0.125), sixty-fourth (0.0625)
1666
 
1667
- Args:
1668
- duration: Quarter length as float or Fraction
1669
-
1670
- Returns:
1671
- Snapped quarter length
1672
- """
1673
- valid_durations = [4.0, 2.0, 1.0, 0.5, 0.25, 0.125, 0.0625]
1674
-
1675
- # Convert to float for comparison
1676
- dur_float = float(duration)
1677
 
1678
- # Find nearest valid duration
1679
- nearest = min(valid_durations, key=lambda x: abs(x - dur_float))
1680
-
1681
- return nearest
1682
 
1683
  def _snap_to_valid_duration(self, duration: float) -> float:
1684
  """
@@ -1708,73 +1205,6 @@ class TranscriptionPipeline:
1708
 
1709
  return nearest
1710
 
1711
- def _normalize_measure_durations(self, score, time_sig_numerator: int = 4, time_sig_denominator: int = 4) -> stream.Score:
1712
- """
1713
- Normalize note durations to fit measures, using detected time signature.
1714
-
1715
- Instead of removing notes, adjust durations to fill measures correctly.
1716
-
1717
- Args:
1718
- score: music21 Score with measures
1719
- time_sig_numerator: Detected time signature numerator
1720
- time_sig_denominator: Detected time signature denominator
1721
-
1722
- Returns:
1723
- Normalized score
1724
- """
1725
- expected_duration = (time_sig_numerator / time_sig_denominator) * 4.0 # Quarter notes
1726
-
1727
- for part in score.parts:
1728
- for measure in part.getElementsByClass('Measure'):
1729
- # Get all notes and chords
1730
- elements = list(measure.notesAndRests)
1731
-
1732
- if not elements:
1733
- continue
1734
-
1735
- # Calculate actual duration
1736
- actual_duration = sum(e.quarterLength for e in elements)
1737
-
1738
- # Increased tolerance from 0.05 to 0.15 QN (150ms at 120 BPM)
1739
- # Prevents normalizing "good enough" measures that get made worse by rounding
1740
- if abs(actual_duration - expected_duration) < 0.15:
1741
- continue # Already correct (allow tolerance for quantization errors)
1742
-
1743
- # Normalize durations proportionally
1744
- scale_factor = expected_duration / actual_duration if actual_duration > 0 else 1.0
1745
-
1746
- for element in elements:
1747
- # Scale duration
1748
- new_duration = element.quarterLength * scale_factor
1749
-
1750
- # Snap to valid music21 duration
1751
- element.quarterLength = self._snap_to_valid_duration(new_duration)
1752
-
1753
- # Verify total duration after normalization
1754
- new_total = sum(e.quarterLength for e in measure.notesAndRests)
1755
-
1756
- if abs(new_total - expected_duration) > 0.1:
1757
- gap = expected_duration - new_total
1758
-
1759
- if gap > 0.01:
1760
- # Underfull - add rest to fill
1761
- rest = note.Rest(quarterLength=gap)
1762
- measure.append(rest)
1763
- elif gap < -0.01:
1764
- # Overfull - proportionally adjust all elements (ZERO data loss)
1765
- # This is better than removing notes
1766
- overage = -gap
1767
- elements = list(measure.notesAndRests)
1768
-
1769
- print(f" WARNING: Measure overfull by {overage:.3f} QN, adjusting durations proportionally")
1770
-
1771
- # Proportionally reduce all durations
1772
- reduction_factor = expected_duration / new_total
1773
-
1774
- for elem in elements:
1775
- elem.quarterLength = self._snap_to_valid_duration(elem.quarterLength * reduction_factor)
1776
-
1777
- return score
1778
 
1779
  def _validate_and_adjust_metadata(
1780
  self,
@@ -2055,93 +1485,7 @@ class TranscriptionPipeline:
2055
 
2056
  return score
2057
 
2058
- def _fix_tuplet_durations(self, score) -> stream.Score:
2059
- """
2060
- Simplify all durations to prevent music21 from creating impossible tuplets during export.
2061
-
2062
- music21 creates tuplets on-the-fly during MusicXML export when durations don't
2063
- fit standard values. By rounding all durations to simple fractions, we prevent
2064
- the export logic from generating 2048th note tuplets.
2065
-
2066
- Args:
2067
- score: music21 Score with measures
2068
-
2069
- Returns:
2070
- Cleaned score with simplified durations
2071
- """
2072
- from music21 import note, chord, stream, duration
2073
-
2074
- simplified_count = 0
2075
-
2076
- # Simple durations that don't trigger tuplet creation (in quarter notes)
2077
- SIMPLE_DURATIONS = [
2078
- 4.0, # Whole note
2079
- 3.0, # Dotted half
2080
- 2.0, # Half note
2081
- 1.5, # Dotted quarter
2082
- 1.0, # Quarter note
2083
- 0.75, # Dotted eighth
2084
- 0.5, # Eighth note
2085
- 0.375, # Dotted 16th
2086
- 0.25, # 16th note
2087
- 0.125, # 32nd note (CRITICAL: was missing, caused durations to double!)
2088
- 0.0625, # 64th note
2089
- 0.03125, # 128th note
2090
- ]
2091
-
2092
- for part in score.parts:
2093
- for measure in part.getElementsByClass('Measure'):
2094
- # Process all notes, rests, and chords
2095
- for element in measure.notesAndRests:
2096
- original_duration = element.quarterLength
2097
-
2098
- # Round to nearest simple duration
2099
- nearest_duration = min(SIMPLE_DURATIONS, key=lambda x: abs(x - original_duration))
2100
-
2101
- if abs(original_duration - nearest_duration) > 0.01:
2102
- element.quarterLength = nearest_duration
2103
- simplified_count += 1
2104
-
2105
- # Strip any tuplets that might exist
2106
- if element.duration.tuplets:
2107
- element.duration.tuplets = ()
2108
-
2109
- # For chords, also process each note within
2110
- if isinstance(element, chord.Chord):
2111
- for n in element.notes:
2112
- if n.duration.tuplets:
2113
- n.duration.tuplets = ()
2114
-
2115
- if simplified_count > 0:
2116
- print(f" Simplified {simplified_count} durations to prevent tuplet creation during export")
2117
-
2118
- return score
2119
-
2120
- def _validate_measures(self, score) -> None:
2121
- """
2122
- Validate that all measures have correct durations matching their time signature.
2123
-
2124
- Logs warnings for any measures that are overfull or underfull.
2125
-
2126
- Args:
2127
- score: music21 Score with measures already created
2128
- """
2129
- for part_idx, part in enumerate(score.parts):
2130
- for measure_idx, measure in enumerate(part.getElementsByClass('Measure')):
2131
- # Get time signature for this measure
2132
- ts = measure.timeSignature or measure.getContextByClass('TimeSignature')
2133
- if not ts:
2134
- continue # Skip if no time signature
2135
-
2136
- expected_duration = ts.barDuration.quarterLength
2137
- actual_duration = measure.duration.quarterLength
2138
-
2139
- # Allow small floating-point tolerance (0.01 quarter notes = ~10ms at 120 BPM)
2140
- tolerance = 0.01
2141
 
2142
- if abs(actual_duration - expected_duration) > tolerance:
2143
- print(f"WARNING: Measure {measure_idx + 1} in part {part_idx} has duration {float(actual_duration):.2f} "
2144
- f"(expected {float(expected_duration):.2f} for {ts.ratioString} time)")
2145
 
2146
  def _split_into_grand_staff(self, score) -> stream.Score:
2147
  """
@@ -2740,7 +2084,10 @@ def remove_short_notes(midi_path: Path, min_duration: int = 60) -> Path:
2740
  def generate_musicxml(midi_path: Path, storage_path: Path) -> Path:
2741
  """Generate MusicXML from MIDI (module-level wrapper)."""
2742
  pipeline = TranscriptionPipeline("compat_job", "http://example.com", storage_path)
2743
- return pipeline.generate_musicxml(midi_path)
 
 
 
2744
 
2745
 
2746
  def detect_key_signature(midi_path: Path) -> dict:
 
11
  import mido
12
  import librosa
13
  import numpy as np
14
+ # basic-pitch removed - using YourMT3+ only
 
15
  from music21 import converter, key, meter, tempo, note, clef, stream, chord as m21_chord
16
 
17
  # Phase 2: Zero-tradeoff solutions with Python 3.10+ compatibility patch
 
90
  self.final_midi_path = midi_path
91
 
92
  self.progress(90, "musicxml", "Generating MusicXML")
93
+ # Use minimal MusicXML generation (YourMT3+ optimized)
94
+ print(f" Using minimal MusicXML generation (YourMT3+)")
95
+ musicxml_path = self.generate_musicxml_minimal(midi_path, stems['other'])
 
 
 
 
96
 
97
  self.progress(100, "complete", "Transcription complete")
98
  return musicxml_path
 
174
  minimum_note_length: int = None
175
  ) -> Path:
176
  """
177
+ Transcribe audio to MIDI using YourMT3+.
178
 
179
  Args:
180
  audio_path: Path to audio file (should be 'other' stem for piano)
181
+ onset_threshold: Deprecated (kept for API compatibility)
182
+ frame_threshold: Deprecated (kept for API compatibility)
183
+ minimum_note_length: Deprecated (kept for API compatibility)
184
 
185
  Returns:
186
  Path to generated MIDI file
187
  """
 
 
 
 
 
 
 
 
188
  output_dir = self.temp_dir
189
 
190
+ # Transcribe with YourMT3+ (only transcription method)
191
+ print(f" Transcribing with YourMT3+...")
192
+ midi_path = self.transcribe_with_yourmt3(audio_path)
193
+ print(f" ✓ YourMT3+ transcription complete")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
  # Rename final MIDI to standard name for post-processing
196
  final_midi_path = output_dir / "piano.mid"
 
1037
 
1038
  return midi_path
1039
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1040
  def generate_musicxml_minimal(self, midi_path: Path, source_audio: Path) -> Path:
1041
  """
1042
  Generate MusicXML from clean MIDI (YourMT3+ output) with minimal post-processing.
 
1173
  print(f" ✓ MusicXML generation complete")
1174
  return output_path
1175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1176
 
 
 
1177
 
 
 
 
 
 
 
 
 
 
 
1178
 
 
 
 
 
1179
 
1180
  def _snap_to_valid_duration(self, duration: float) -> float:
1181
  """
 
1205
 
1206
  return nearest
1207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1208
 
1209
  def _validate_and_adjust_metadata(
1210
  self,
 
1485
 
1486
  return score
1487
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1488
 
 
 
 
1489
 
1490
  def _split_into_grand_staff(self, score) -> stream.Score:
1491
  """
 
2084
  def generate_musicxml(midi_path: Path, storage_path: Path) -> Path:
2085
  """Generate MusicXML from MIDI (module-level wrapper)."""
2086
  pipeline = TranscriptionPipeline("compat_job", "http://example.com", storage_path)
2087
+ # Use minimal pipeline (YourMT3+ optimized)
2088
+ # Note: source_audio path may not exist for module-level calls, but minimal pipeline can handle it
2089
+ audio_path = storage_path / "temp" / "compat_job" / "audio.wav"
2090
+ return pipeline.generate_musicxml_minimal(midi_path, audio_path)
2091
 
2092
 
2093
  def detect_key_signature(midi_path: Path) -> dict:
backend/requirements.txt CHANGED
@@ -18,11 +18,9 @@ scipy
18
  torch>=2.0.0
19
  torchaudio==2.1.0 # Pin to version that uses SoundFile backend, not torchcodec
20
  demucs>=3.0.6
 
21
 
22
- # Pitch detection (macOS default runtime is CoreML)
23
- basic-pitch==0.4.0 # Fallback transcriber when YourMT3+ service unavailable
24
-
25
- # YourMT3+ Transcription (integrated into main service)
26
  lightning>=2.2.1
27
  transformers==4.45.1
28
  einops>=0.7.0
 
18
  torch>=2.0.0
19
  torchaudio==2.1.0 # Pin to version that uses SoundFile backend, not torchcodec
20
  demucs>=3.0.6
21
+ audio-separator>=0.40.0 # BS-RoFormer and UVR models for better vocal separation
22
 
23
+ # YourMT3+ Transcription (primary transcriber)
 
 
 
24
  lightning>=2.2.1
25
  transformers==4.45.1
26
  einops>=0.7.0
frontend/src/components/InstrumentSelector.css ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .instrument-selector {
2
+ margin-bottom: 2rem;
3
+ }
4
+
5
+ .selector-label {
6
+ display: block;
7
+ margin-bottom: 1rem;
8
+ font-weight: bold;
9
+ font-size: 1.1rem;
10
+ color: #333;
11
+ }
12
+
13
+ .instrument-grid {
14
+ display: grid;
15
+ grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
16
+ gap: 1rem;
17
+ margin-bottom: 0.5rem;
18
+ }
19
+
20
+ .instrument-button {
21
+ display: flex;
22
+ flex-direction: column;
23
+ align-items: center;
24
+ justify-content: center;
25
+ padding: 1rem;
26
+ border: 2px solid #ddd;
27
+ border-radius: 8px;
28
+ background-color: #fff;
29
+ cursor: pointer;
30
+ transition: all 0.2s ease;
31
+ min-height: 100px;
32
+ }
33
+
34
+ .instrument-button:hover {
35
+ border-color: #007bff;
36
+ background-color: #f8f9fa;
37
+ transform: translateY(-2px);
38
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
39
+ }
40
+
41
+ .instrument-button.selected {
42
+ border-color: #007bff;
43
+ background: linear-gradient(135deg, #007bff 0%, #0056b3 100%);
44
+ color: white;
45
+ box-shadow: 0 4px 12px rgba(0, 123, 255, 0.3);
46
+ }
47
+
48
+ .instrument-button.selected:hover {
49
+ background: linear-gradient(135deg, #0056b3 0%, #003d82 100%);
50
+ }
51
+
52
+ .instrument-icon {
53
+ font-size: 2rem;
54
+ margin-bottom: 0.5rem;
55
+ display: block;
56
+ }
57
+
58
+ .instrument-label {
59
+ font-size: 0.9rem;
60
+ font-weight: 500;
61
+ text-align: center;
62
+ }
63
+
64
+ .selector-hint {
65
+ color: #888;
66
+ font-size: 0.85rem;
67
+ margin-top: 0.5rem;
68
+ text-align: center;
69
+ font-style: italic;
70
+ }
71
+
72
+ /* Responsive adjustments */
73
+ @media (max-width: 600px) {
74
+ .instrument-grid {
75
+ grid-template-columns: repeat(2, 1fr);
76
+ }
77
+ }
frontend/src/components/InstrumentSelector.tsx ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Multi-instrument selector for choosing which instruments to transcribe.
3
+ */
4
+ import { useState } from 'react';
5
+ import './InstrumentSelector.css';
6
+
7
+ export interface Instrument {
8
+ id: string;
9
+ label: string;
10
+ icon: string;
11
+ }
12
+
13
+ const INSTRUMENTS: Instrument[] = [
14
+ { id: 'piano', label: 'Piano', icon: '🎹' },
15
+ { id: 'vocals', label: 'Vocals (Violin)', icon: '🎤' },
16
+ { id: 'drums', label: 'Drums', icon: '🥁' },
17
+ { id: 'bass', label: 'Bass', icon: '🎸' },
18
+ { id: 'guitar', label: 'Guitar', icon: '🎸' },
19
+ { id: 'other', label: 'Other Instruments', icon: '🎵' }
20
+ ];
21
+
22
+ interface InstrumentSelectorProps {
23
+ selectedInstruments: string[];
24
+ onChange: (instruments: string[]) => void;
25
+ }
26
+
27
+ export function InstrumentSelector({ selectedInstruments, onChange }: InstrumentSelectorProps) {
28
+ const handleToggle = (instrumentId: string) => {
29
+ const isSelected = selectedInstruments.includes(instrumentId);
30
+
31
+ if (isSelected) {
32
+ // Don't allow deselecting if it's the only selected instrument
33
+ if (selectedInstruments.length === 1) {
34
+ return;
35
+ }
36
+ onChange(selectedInstruments.filter(id => id !== instrumentId));
37
+ } else {
38
+ onChange([...selectedInstruments, instrumentId]);
39
+ }
40
+ };
41
+
42
+ return (
43
+ <div className="instrument-selector">
44
+ <label className="selector-label">Select Instruments:</label>
45
+ <div className="instrument-grid">
46
+ {INSTRUMENTS.map(instrument => (
47
+ <button
48
+ key={instrument.id}
49
+ type="button"
50
+ className={`instrument-button ${selectedInstruments.includes(instrument.id) ? 'selected' : ''}`}
51
+ onClick={() => handleToggle(instrument.id)}
52
+ aria-pressed={selectedInstruments.includes(instrument.id)}
53
+ >
54
+ <span className="instrument-icon">{instrument.icon}</span>
55
+ <span className="instrument-label">{instrument.label}</span>
56
+ </button>
57
+ ))}
58
+ </div>
59
+ <p className="selector-hint">
60
+ Select at least one instrument to transcribe
61
+ </p>
62
+ </div>
63
+ );
64
+ }
65
+
66
+ export default InstrumentSelector;
frontend/src/components/InstrumentTabs.css ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .instrument-tabs {
2
+ display: flex;
3
+ gap: 0.5rem;
4
+ margin-bottom: 1.5rem;
5
+ padding: 0.5rem;
6
+ background-color: #f8f9fa;
7
+ border-radius: 8px;
8
+ overflow-x: auto;
9
+ }
10
+
11
+ .instrument-tabs.single {
12
+ justify-content: center;
13
+ }
14
+
15
+ .instrument-badge {
16
+ display: flex;
17
+ align-items: center;
18
+ gap: 0.5rem;
19
+ padding: 0.75rem 1.5rem;
20
+ background: linear-gradient(135deg, #007bff 0%, #0056b3 100%);
21
+ color: white;
22
+ border-radius: 6px;
23
+ font-weight: 500;
24
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
25
+ }
26
+
27
+ .instrument-tab {
28
+ display: flex;
29
+ align-items: center;
30
+ gap: 0.5rem;
31
+ padding: 0.75rem 1.25rem;
32
+ border: 2px solid #dee2e6;
33
+ border-radius: 6px;
34
+ background-color: white;
35
+ cursor: pointer;
36
+ transition: all 0.2s ease;
37
+ font-size: 0.95rem;
38
+ font-weight: 500;
39
+ white-space: nowrap;
40
+ }
41
+
42
+ .instrument-tab:hover {
43
+ border-color: #007bff;
44
+ background-color: #f8f9fa;
45
+ transform: translateY(-1px);
46
+ box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1);
47
+ }
48
+
49
+ .instrument-tab.active {
50
+ border-color: #007bff;
51
+ background: linear-gradient(135deg, #007bff 0%, #0056b3 100%);
52
+ color: white;
53
+ box-shadow: 0 3px 8px rgba(0, 123, 255, 0.3);
54
+ }
55
+
56
+ .instrument-tab.active:hover {
57
+ background: linear-gradient(135deg, #0056b3 0%, #003d82 100%);
58
+ transform: translateY(-1px);
59
+ }
60
+
61
+ .instrument-tab .instrument-icon,
62
+ .instrument-badge .instrument-icon {
63
+ font-size: 1.25rem;
64
+ line-height: 1;
65
+ }
66
+
67
+ .instrument-tab .instrument-label,
68
+ .instrument-badge .instrument-label {
69
+ font-size: 0.95rem;
70
+ }
71
+
72
+ /* Responsive */
73
+ @media (max-width: 600px) {
74
+ .instrument-tabs {
75
+ gap: 0.25rem;
76
+ padding: 0.25rem;
77
+ }
78
+
79
+ .instrument-tab {
80
+ padding: 0.5rem 0.75rem;
81
+ font-size: 0.85rem;
82
+ }
83
+
84
+ .instrument-tab .instrument-icon {
85
+ font-size: 1.1rem;
86
+ }
87
+
88
+ .instrument-tab .instrument-label {
89
+ display: none; /* Hide labels on mobile, show icons only */
90
+ }
91
+ }
frontend/src/components/InstrumentTabs.tsx ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Instrument tabs for switching between transcribed instruments.
3
+ */
4
+ import './InstrumentTabs.css';
5
+
6
+ interface InstrumentInfo {
7
+ id: string;
8
+ label: string;
9
+ icon: string;
10
+ }
11
+
12
+ const INSTRUMENT_INFO: Record<string, InstrumentInfo> = {
13
+ piano: { id: 'piano', label: 'Piano', icon: '🎹' },
14
+ vocals: { id: 'vocals', label: 'Vocals', icon: '🎤' },
15
+ drums: { id: 'drums', label: 'Drums', icon: '🥁' },
16
+ bass: { id: 'bass', label: 'Bass', icon: '🎸' },
17
+ guitar: { id: 'guitar', label: 'Guitar', icon: '🎸' },
18
+ other: { id: 'other', label: 'Other', icon: '🎵' },
19
+ };
20
+
21
+ interface InstrumentTabsProps {
22
+ instruments: string[];
23
+ activeInstrument: string;
24
+ onInstrumentChange: (instrument: string) => void;
25
+ }
26
+
27
+ export function InstrumentTabs({ instruments, activeInstrument, onInstrumentChange }: InstrumentTabsProps) {
28
+ if (instruments.length === 0) {
29
+ return null;
30
+ }
31
+
32
+ // If only one instrument, show it as a badge instead of tabs
33
+ if (instruments.length === 1) {
34
+ const instrument = instruments[0];
35
+ const info = INSTRUMENT_INFO[instrument] || { id: instrument, label: instrument, icon: '🎵' };
36
+ return (
37
+ <div className="instrument-tabs single">
38
+ <div className="instrument-badge">
39
+ <span className="instrument-icon">{info.icon}</span>
40
+ <span className="instrument-label">{info.label}</span>
41
+ </div>
42
+ </div>
43
+ );
44
+ }
45
+
46
+ return (
47
+ <div className="instrument-tabs">
48
+ {instruments.map((instrument) => {
49
+ const info = INSTRUMENT_INFO[instrument] || { id: instrument, label: instrument, icon: '🎵' };
50
+ const isActive = instrument === activeInstrument;
51
+
52
+ return (
53
+ <button
54
+ key={instrument}
55
+ className={`instrument-tab ${isActive ? 'active' : ''}`}
56
+ onClick={() => onInstrumentChange(instrument)}
57
+ aria-pressed={isActive}
58
+ >
59
+ <span className="instrument-icon">{info.icon}</span>
60
+ <span className="instrument-label">{info.label}</span>
61
+ </button>
62
+ );
63
+ })}
64
+ </div>
65
+ );
66
+ }
67
+
68
+ export default InstrumentTabs;
frontend/src/components/JobSubmission.css CHANGED
@@ -102,3 +102,12 @@ button:hover {
102
  background-color: #f8d7da;
103
  color: #721c24;
104
  }
 
 
 
 
 
 
 
 
 
 
102
  background-color: #f8d7da;
103
  color: #721c24;
104
  }
105
+
106
+ .error-alert {
107
+ background-color: #f8d7da;
108
+ color: #721c24;
109
+ padding: 0.75rem 1rem;
110
+ border-radius: 4px;
111
+ margin-top: 1rem;
112
+ border: 1px solid #f5c6cb;
113
+ }
frontend/src/components/JobSubmission.tsx CHANGED
@@ -4,6 +4,7 @@
4
  import { useState, useRef, useEffect } from 'react';
5
  import { api } from '../api/client';
6
  import type { ProgressUpdate } from '../api/client';
 
7
  import './JobSubmission.css';
8
 
9
  interface JobSubmissionProps {
@@ -13,6 +14,7 @@ interface JobSubmissionProps {
13
 
14
  export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps) {
15
  const [youtubeUrl, setYoutubeUrl] = useState('');
 
16
  const [status, setStatus] = useState<'idle' | 'submitting' | 'processing' | 'failed'>('idle');
17
  const [error, setError] = useState<string | null>(null);
18
  const [progress, setProgress] = useState(0);
@@ -43,15 +45,24 @@ export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps
43
  const handleSubmit = async (e: React.FormEvent) => {
44
  e.preventDefault();
45
  setError(null);
 
 
46
  const validation = validateUrl(youtubeUrl);
47
  if (validation) {
48
  setError(validation);
49
  return;
50
  }
 
 
 
 
 
 
 
51
  setStatus('submitting');
52
 
53
  try {
54
- const response = await api.submitJob(youtubeUrl, { instruments: ['piano'] });
55
  setYoutubeUrl('');
56
  if (onJobSubmitted) onJobSubmitted(response);
57
 
@@ -150,6 +161,11 @@ export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps
150
 
151
  {(status === 'idle' || status === 'submitting') && (
152
  <form onSubmit={handleSubmit}>
 
 
 
 
 
153
  <div className="form-group">
154
  <label htmlFor="youtube-url">YouTube URL:</label>
155
  <input
@@ -167,7 +183,7 @@ export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps
167
  </div>
168
  <button type="submit" disabled={status === 'submitting'}>Transcribe</button>
169
  {status === 'submitting' && <div>Submitting...</div>}
170
- {error && <div role="alert">{error}</div>}
171
  </form>
172
  )}
173
 
 
4
  import { useState, useRef, useEffect } from 'react';
5
  import { api } from '../api/client';
6
  import type { ProgressUpdate } from '../api/client';
7
+ import { InstrumentSelector } from './InstrumentSelector';
8
  import './JobSubmission.css';
9
 
10
  interface JobSubmissionProps {
 
14
 
15
  export function JobSubmission({ onComplete, onJobSubmitted }: JobSubmissionProps) {
16
  const [youtubeUrl, setYoutubeUrl] = useState('');
17
+ const [selectedInstruments, setSelectedInstruments] = useState<string[]>(['piano']);
18
  const [status, setStatus] = useState<'idle' | 'submitting' | 'processing' | 'failed'>('idle');
19
  const [error, setError] = useState<string | null>(null);
20
  const [progress, setProgress] = useState(0);
 
45
  const handleSubmit = async (e: React.FormEvent) => {
46
  e.preventDefault();
47
  setError(null);
48
+
49
+ // Validate URL
50
  const validation = validateUrl(youtubeUrl);
51
  if (validation) {
52
  setError(validation);
53
  return;
54
  }
55
+
56
+ // Validate at least one instrument is selected
57
+ if (selectedInstruments.length === 0) {
58
+ setError('Please select at least one instrument');
59
+ return;
60
+ }
61
+
62
  setStatus('submitting');
63
 
64
  try {
65
+ const response = await api.submitJob(youtubeUrl, { instruments: selectedInstruments });
66
  setYoutubeUrl('');
67
  if (onJobSubmitted) onJobSubmitted(response);
68
 
 
161
 
162
  {(status === 'idle' || status === 'submitting') && (
163
  <form onSubmit={handleSubmit}>
164
+ <InstrumentSelector
165
+ selectedInstruments={selectedInstruments}
166
+ onChange={setSelectedInstruments}
167
+ />
168
+
169
  <div className="form-group">
170
  <label htmlFor="youtube-url">YouTube URL:</label>
171
  <input
 
183
  </div>
184
  <button type="submit" disabled={status === 'submitting'}>Transcribe</button>
185
  {status === 'submitting' && <div>Submitting...</div>}
186
+ {error && <div role="alert" className="error-alert">{error}</div>}
187
  </form>
188
  )}
189
 
frontend/src/components/PlaybackControls.css CHANGED
@@ -2,36 +2,60 @@
2
  display: flex;
3
  align-items: center;
4
  gap: 1rem;
5
- padding: 1rem;
6
- background: #f5f5f5;
7
- border-radius: 8px;
8
- margin: 1rem 0;
9
  flex-wrap: wrap;
 
 
10
  }
11
 
12
  .playback-controls button {
13
- padding: 0.5rem 1rem;
14
- font-size: 1rem;
15
- border: 1px solid #ccc;
 
16
  background: white;
17
- border-radius: 4px;
18
  cursor: pointer;
19
- transition: all 0.2s;
 
20
  }
21
 
22
  .playback-controls button:hover:not(:disabled) {
23
- background: #e0e0e0;
 
 
 
 
 
 
 
 
 
24
  }
25
 
26
  .playback-controls button:disabled {
27
- opacity: 0.5;
28
  cursor: not-allowed;
 
 
 
 
 
 
 
29
  }
30
 
31
  .tempo-control {
32
  display: flex;
33
  align-items: center;
34
- gap: 0.5rem;
 
 
 
 
35
  }
36
 
37
  .tempo-control label {
@@ -39,14 +63,110 @@
39
  align-items: center;
40
  gap: 0.5rem;
41
  font-size: 0.9rem;
 
 
42
  }
43
 
44
  .tempo-control input[type="range"] {
45
  width: 150px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  }
47
 
48
- .position-indicator {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  font-size: 0.9rem;
50
- color: #666;
51
- font-family: monospace;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  }
 
2
  display: flex;
3
  align-items: center;
4
  gap: 1rem;
5
+ padding: 1.25rem;
6
+ background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
7
+ border-radius: 12px;
8
+ margin: 1.5rem 0;
9
  flex-wrap: wrap;
10
+ box-shadow: 0 2px 8px rgba(0, 0, 0, 0.08);
11
+ border: 1px solid #dee2e6;
12
  }
13
 
14
  .playback-controls button {
15
+ padding: 0.6rem 1.25rem;
16
+ font-size: 0.95rem;
17
+ font-weight: 500;
18
+ border: 2px solid #dee2e6;
19
  background: white;
20
+ border-radius: 8px;
21
  cursor: pointer;
22
+ transition: all 0.2s ease;
23
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.05);
24
  }
25
 
26
  .playback-controls button:hover:not(:disabled) {
27
+ background: #007bff;
28
+ border-color: #007bff;
29
+ color: white;
30
+ transform: translateY(-1px);
31
+ box-shadow: 0 3px 6px rgba(0, 123, 255, 0.2);
32
+ }
33
+
34
+ .playback-controls button:active:not(:disabled) {
35
+ transform: translateY(0);
36
+ box-shadow: 0 1px 3px rgba(0, 0, 0, 0.1);
37
  }
38
 
39
  .playback-controls button:disabled {
40
+ opacity: 0.4;
41
  cursor: not-allowed;
42
+ background: #f8f9fa;
43
+ }
44
+
45
+ .playback-controls button.active {
46
+ background: #007bff;
47
+ border-color: #007bff;
48
+ color: white;
49
  }
50
 
51
  .tempo-control {
52
  display: flex;
53
  align-items: center;
54
+ gap: 0.75rem;
55
+ background: white;
56
+ padding: 0.5rem 0.75rem;
57
+ border-radius: 8px;
58
+ border: 1px solid #dee2e6;
59
  }
60
 
61
  .tempo-control label {
 
63
  align-items: center;
64
  gap: 0.5rem;
65
  font-size: 0.9rem;
66
+ font-weight: 500;
67
+ color: #495057;
68
  }
69
 
70
  .tempo-control input[type="range"] {
71
  width: 150px;
72
+ height: 6px;
73
+ border-radius: 3px;
74
+ background: #dee2e6;
75
+ outline: none;
76
+ -webkit-appearance: none;
77
+ }
78
+
79
+ .tempo-control input[type="range"]::-webkit-slider-thumb {
80
+ -webkit-appearance: none;
81
+ appearance: none;
82
+ width: 16px;
83
+ height: 16px;
84
+ border-radius: 50%;
85
+ background: #007bff;
86
+ cursor: pointer;
87
+ box-shadow: 0 2px 4px rgba(0, 123, 255, 0.3);
88
+ transition: all 0.2s ease;
89
+ }
90
+
91
+ .tempo-control input[type="range"]::-webkit-slider-thumb:hover {
92
+ background: #0056b3;
93
+ transform: scale(1.1);
94
+ box-shadow: 0 3px 6px rgba(0, 123, 255, 0.4);
95
+ }
96
+
97
+ .tempo-control input[type="range"]::-moz-range-thumb {
98
+ width: 16px;
99
+ height: 16px;
100
+ border-radius: 50%;
101
+ background: #007bff;
102
+ cursor: pointer;
103
+ border: none;
104
+ box-shadow: 0 2px 4px rgba(0, 123, 255, 0.3);
105
+ transition: all 0.2s ease;
106
  }
107
 
108
+ .tempo-control input[type="range"]::-moz-range-thumb:hover {
109
+ background: #0056b3;
110
+ transform: scale(1.1);
111
+ box-shadow: 0 3px 6px rgba(0, 123, 255, 0.4);
112
+ }
113
+
114
+ .tempo-control span {
115
+ font-weight: 600;
116
+ color: #007bff;
117
+ min-width: 40px;
118
+ text-align: center;
119
+ }
120
+
121
+ .time-display {
122
+ font-size: 0.95rem;
123
+ color: #495057;
124
+ font-family: 'Courier New', monospace;
125
+ font-weight: 500;
126
+ background: white;
127
+ padding: 0.5rem 0.75rem;
128
+ border-radius: 6px;
129
+ border: 1px solid #dee2e6;
130
+ }
131
+
132
+ .volume-control {
133
+ display: flex;
134
+ align-items: center;
135
+ gap: 0.5rem;
136
+ }
137
+
138
+ .volume-control label {
139
+ display: flex;
140
+ align-items: center;
141
+ gap: 0.5rem;
142
  font-size: 0.9rem;
143
+ color: #495057;
144
+ }
145
+
146
+ .volume-control input[type="range"] {
147
+ width: 100px;
148
+ }
149
+
150
+ /* Responsive adjustments */
151
+ @media (max-width: 768px) {
152
+ .playback-controls {
153
+ justify-content: center;
154
+ gap: 0.75rem;
155
+ padding: 1rem;
156
+ }
157
+
158
+ .playback-controls button {
159
+ padding: 0.5rem 1rem;
160
+ font-size: 0.9rem;
161
+ }
162
+
163
+ .tempo-control {
164
+ width: 100%;
165
+ justify-content: space-between;
166
+ }
167
+
168
+ .time-display {
169
+ width: 100%;
170
+ text-align: center;
171
+ }
172
  }
frontend/src/components/PlaybackControls.tsx CHANGED
@@ -10,7 +10,7 @@ import { useState, useRef, useEffect } from 'react';
10
  import * as Tone from 'tone';
11
  // useNotationStore is optional for tests; guard its usage
12
  import { useNotationStore } from '../store/notation';
13
- import { durationToSeconds } from '../utils/musicxml-parser';
14
  import type { Note } from '../store/notation';
15
  import './PlaybackControls.css';
16
 
 
10
  import * as Tone from 'tone';
11
  // useNotationStore is optional for tests; guard its usage
12
  import { useNotationStore } from '../store/notation';
13
+ import { durationToSeconds } from '../utils/duration';
14
  import type { Note } from '../store/notation';
15
  import './PlaybackControls.css';
16
 
frontend/src/components/ScoreEditor.tsx CHANGED
@@ -1,11 +1,13 @@
1
  /**
2
  * Main score editor component integrating notation, playback, and export.
 
3
  */
4
  import { useState, useEffect } from 'react';
5
- import { getMidiFile, getMetadata } from '../api/client';
6
  import { useNotationStore } from '../store/notation';
7
  import { NotationCanvas } from './NotationCanvas';
8
  import { PlaybackControls } from './PlaybackControls';
 
9
  import './ScoreEditor.css';
10
 
11
  interface ScoreEditorProps {
@@ -15,7 +17,11 @@ interface ScoreEditorProps {
15
  export function ScoreEditor({ jobId }: ScoreEditorProps) {
16
  const [loading, setLoading] = useState(true);
17
  const [error, setError] = useState<string | null>(null);
 
 
18
  const loadFromMidi = useNotationStore((state) => state.loadFromMidi);
 
 
19
 
20
  useEffect(() => {
21
  loadScore();
@@ -26,18 +32,34 @@ export function ScoreEditor({ jobId }: ScoreEditorProps) {
26
  setLoading(true);
27
  setError(null);
28
 
29
- // Fetch MIDI file and metadata in parallel
30
- const [midiData, metadata] = await Promise.all([
31
- getMidiFile(jobId),
32
- getMetadata(jobId),
33
- ]);
 
 
 
 
 
34
 
35
- // Load MIDI into notation store
36
- await loadFromMidi(midiData, {
37
- tempo: metadata.tempo,
38
- keySignature: metadata.key_signature,
39
- timeSignature: metadata.time_signature,
40
- });
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  setLoading(false);
43
  } catch (err) {
@@ -88,11 +110,16 @@ export function ScoreEditor({ jobId }: ScoreEditorProps) {
88
  <div className="editor-toolbar">
89
  <h2>Score Editor</h2>
90
  <div className="toolbar-actions">
91
- <button onClick={handleExportMusicXML}>Export MusicXML</button>
92
  <button onClick={handleExportMIDI}>Export MIDI</button>
93
  </div>
94
  </div>
95
 
 
 
 
 
 
 
96
  <PlaybackControls />
97
 
98
  <NotationCanvas />
 
1
  /**
2
  * Main score editor component integrating notation, playback, and export.
3
+ * Supports multi-instrument transcription.
4
  */
5
  import { useState, useEffect } from 'react';
6
+ import { getMidiFile, getMetadata, getJobStatus } from '../api/client';
7
  import { useNotationStore } from '../store/notation';
8
  import { NotationCanvas } from './NotationCanvas';
9
  import { PlaybackControls } from './PlaybackControls';
10
+ import { InstrumentTabs } from './InstrumentTabs';
11
  import './ScoreEditor.css';
12
 
13
  interface ScoreEditorProps {
 
17
  export function ScoreEditor({ jobId }: ScoreEditorProps) {
18
  const [loading, setLoading] = useState(true);
19
  const [error, setError] = useState<string | null>(null);
20
+ const [instruments, setInstruments] = useState<string[]>([]);
21
+
22
  const loadFromMidi = useNotationStore((state) => state.loadFromMidi);
23
+ const activeInstrument = useNotationStore((state) => state.activeInstrument);
24
+ const setActiveInstrument = useNotationStore((state) => state.setActiveInstrument);
25
 
26
  useEffect(() => {
27
  loadScore();
 
32
  setLoading(true);
33
  setError(null);
34
 
35
+ // Get job status to find which instruments were transcribed
36
+ const jobStatus = await getJobStatus(jobId);
37
+
38
+ // For now, assume piano is the default instrument (backend doesn't yet return instruments list)
39
+ // TODO: Update when backend API returns instruments list in job status
40
+ const transcribedInstruments = ['piano'];
41
+ setInstruments(transcribedInstruments);
42
+
43
+ // Fetch metadata once (shared across all instruments)
44
+ const metadata = await getMetadata(jobId);
45
 
46
+ // Load MIDI files for each instrument
47
+ for (const instrument of transcribedInstruments) {
48
+ // For MVP, backend only supports piano (single stem)
49
+ // In the future, this will fetch per-instrument MIDI: `/api/v1/scores/${jobId}/midi/${instrument}`
50
+ const midiData = await getMidiFile(jobId);
51
+
52
+ await loadFromMidi(instrument, midiData, {
53
+ tempo: metadata.tempo,
54
+ keySignature: metadata.key_signature,
55
+ timeSignature: metadata.time_signature,
56
+ });
57
+ }
58
+
59
+ // Set first instrument as active
60
+ if (transcribedInstruments.length > 0) {
61
+ setActiveInstrument(transcribedInstruments[0]);
62
+ }
63
 
64
  setLoading(false);
65
  } catch (err) {
 
110
  <div className="editor-toolbar">
111
  <h2>Score Editor</h2>
112
  <div className="toolbar-actions">
 
113
  <button onClick={handleExportMIDI}>Export MIDI</button>
114
  </div>
115
  </div>
116
 
117
+ <InstrumentTabs
118
+ instruments={instruments}
119
+ activeInstrument={activeInstrument}
120
+ onInstrumentChange={setActiveInstrument}
121
+ />
122
+
123
  <PlaybackControls />
124
 
125
  <NotationCanvas />
frontend/src/store/notation.ts CHANGED
@@ -1,8 +1,8 @@
1
  /**
2
  * Zustand store for notation state management.
 
3
  */
4
  import { create } from 'zustand';
5
- import { parseMusicXML } from '../utils/musicxml-parser';
6
  import { parseMidiFile, assignChordIds } from '../utils/midi-parser';
7
 
8
  export interface Note {
@@ -42,15 +42,22 @@ export interface Score {
42
  }
43
 
44
  interface NotationState {
 
 
 
 
 
 
45
  score: Score | null;
 
46
  selectedNoteIds: string[];
47
  currentTool: 'select' | 'add' | 'delete';
48
  currentDuration: string;
49
  playingNoteIds: string[]; // Notes currently being played (for visual feedback)
50
 
51
  // Actions
52
- loadFromMusicXML: (xml: string) => void;
53
  loadFromMidi: (
 
54
  midiData: ArrayBuffer,
55
  metadata?: {
56
  tempo?: number;
@@ -58,7 +65,7 @@ interface NotationState {
58
  timeSignature?: { numerator: number; denominator: number };
59
  }
60
  ) => Promise<void>;
61
- exportToMusicXML: () => string;
62
  addNote: (measureId: string, note: Note) => void;
63
  deleteNote: (noteId: string) => void;
64
  updateNote: (noteId: string, changes: Partial<Note>) => void;
@@ -69,72 +76,90 @@ interface NotationState {
69
  setPlayingNoteIds: (noteIds: string[]) => void;
70
  }
71
 
72
- export const useNotationStore = create<NotationState>((set, _get) => ({
 
 
 
 
 
 
73
  score: null,
 
74
  selectedNoteIds: [],
75
  currentTool: 'select',
76
  currentDuration: 'quarter',
77
  playingNoteIds: [],
78
 
79
- loadFromMusicXML: (xml: string) => {
80
- try {
81
- const score = parseMusicXML(xml);
82
- set({ score });
83
- } catch (error) {
84
- console.error('Failed to parse MusicXML:', error);
85
- // Fallback to empty score
86
- set({
87
- score: {
88
- id: 'score-1',
89
- title: 'Transcribed Score',
90
- composer: 'Unknown',
91
- key: 'C',
92
- timeSignature: '4/4',
93
- tempo: 120,
94
- parts: [],
95
- measures: [],
96
- },
97
- });
98
- }
99
- },
100
-
101
- loadFromMidi: async (midiData, metadata) => {
102
  try {
103
  let score = await parseMidiFile(midiData, {
104
  tempo: metadata?.tempo,
105
  timeSignature: metadata?.timeSignature,
106
  keySignature: metadata?.keySignature,
107
- splitAtMiddleC: true,
108
  middleCNote: 60,
109
  });
110
 
111
  // Assign chord IDs to simultaneous notes
112
  score = assignChordIds(score);
113
 
114
- set({ score });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  } catch (error) {
116
  console.error('Failed to parse MIDI:', error);
117
- // Fallback to empty score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  set({
119
- score: {
120
- id: 'score-1',
121
- title: 'Transcribed Score',
122
- composer: 'YourMT3+',
123
- key: metadata?.keySignature || 'C',
124
- timeSignature: metadata?.timeSignature
125
- ? `${metadata.timeSignature.numerator}/${metadata.timeSignature.denominator}`
126
- : '4/4',
127
- tempo: metadata?.tempo || 120,
128
- parts: [],
129
- measures: [],
130
- },
131
  });
132
  }
133
  },
134
 
135
- exportToMusicXML: () => {
136
- // TODO: Implement MusicXML generation
137
- return '<?xml version="1.0"?><score-partwise></score-partwise>';
 
 
 
 
 
 
138
  },
139
 
140
  addNote: (measureId, note) =>
 
1
  /**
2
  * Zustand store for notation state management.
3
+ * Supports multi-instrument transcription.
4
  */
5
  import { create } from 'zustand';
 
6
  import { parseMidiFile, assignChordIds } from '../utils/midi-parser';
7
 
8
  export interface Note {
 
42
  }
43
 
44
  interface NotationState {
45
+ // Multi-instrument support
46
+ scores: Map<string, Score>; // instrument -> Score
47
+ activeInstrument: string; // Currently viewing instrument (e.g., 'piano', 'vocals')
48
+ availableInstruments: string[]; // All transcribed instruments
49
+
50
+ // Legacy single-score access (for backward compatibility)
51
  score: Score | null;
52
+
53
  selectedNoteIds: string[];
54
  currentTool: 'select' | 'add' | 'delete';
55
  currentDuration: string;
56
  playingNoteIds: string[]; // Notes currently being played (for visual feedback)
57
 
58
  // Actions
 
59
  loadFromMidi: (
60
+ instrument: string,
61
  midiData: ArrayBuffer,
62
  metadata?: {
63
  tempo?: number;
 
65
  timeSignature?: { numerator: number; denominator: number };
66
  }
67
  ) => Promise<void>;
68
+ setActiveInstrument: (instrument: string) => void;
69
  addNote: (measureId: string, note: Note) => void;
70
  deleteNote: (noteId: string) => void;
71
  updateNote: (noteId: string, changes: Partial<Note>) => void;
 
76
  setPlayingNoteIds: (noteIds: string[]) => void;
77
  }
78
 
79
+ export const useNotationStore = create<NotationState>((set, get) => ({
80
+ // Multi-instrument state
81
+ scores: new Map(),
82
+ activeInstrument: 'piano',
83
+ availableInstruments: [],
84
+
85
+ // Legacy single-score (points to active instrument's score)
86
  score: null,
87
+
88
  selectedNoteIds: [],
89
  currentTool: 'select',
90
  currentDuration: 'quarter',
91
  playingNoteIds: [],
92
 
93
+ loadFromMidi: async (instrument, midiData, metadata) => {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  try {
95
  let score = await parseMidiFile(midiData, {
96
  tempo: metadata?.tempo,
97
  timeSignature: metadata?.timeSignature,
98
  keySignature: metadata?.keySignature,
99
+ splitAtMiddleC: instrument === 'piano', // Only split piano into grand staff
100
  middleCNote: 60,
101
  });
102
 
103
  // Assign chord IDs to simultaneous notes
104
  score = assignChordIds(score);
105
 
106
+ // Update scores map
107
+ const state = get();
108
+ const newScores = new Map(state.scores);
109
+ newScores.set(instrument, score);
110
+
111
+ // Update available instruments if this is a new one
112
+ const newAvailableInstruments = state.availableInstruments.includes(instrument)
113
+ ? state.availableInstruments
114
+ : [...state.availableInstruments, instrument];
115
+
116
+ set({
117
+ scores: newScores,
118
+ availableInstruments: newAvailableInstruments,
119
+ // Update legacy score if this is the active instrument
120
+ score: state.activeInstrument === instrument ? score : state.score,
121
+ });
122
  } catch (error) {
123
  console.error('Failed to parse MIDI:', error);
124
+ // Create fallback empty score
125
+ const emptyScore: Score = {
126
+ id: `score-${instrument}`,
127
+ title: 'Transcribed Score',
128
+ composer: 'YourMT3+',
129
+ key: metadata?.keySignature || 'C',
130
+ timeSignature: metadata?.timeSignature
131
+ ? `${metadata.timeSignature.numerator}/${metadata.timeSignature.denominator}`
132
+ : '4/4',
133
+ tempo: metadata?.tempo || 120,
134
+ parts: [],
135
+ measures: [],
136
+ };
137
+
138
+ const state = get();
139
+ const newScores = new Map(state.scores);
140
+ newScores.set(instrument, emptyScore);
141
+
142
+ const newAvailableInstruments = state.availableInstruments.includes(instrument)
143
+ ? state.availableInstruments
144
+ : [...state.availableInstruments, instrument];
145
+
146
  set({
147
+ scores: newScores,
148
+ availableInstruments: newAvailableInstruments,
149
+ score: state.activeInstrument === instrument ? emptyScore : state.score,
 
 
 
 
 
 
 
 
 
150
  });
151
  }
152
  },
153
 
154
+ setActiveInstrument: (instrument) => {
155
+ const state = get();
156
+ const instrumentScore = state.scores.get(instrument);
157
+
158
+ set({
159
+ activeInstrument: instrument,
160
+ score: instrumentScore || null,
161
+ selectedNoteIds: [], // Clear selection when switching instruments
162
+ });
163
  },
164
 
165
  addNote: (measureId, note) =>
frontend/src/utils/duration.ts ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Duration conversion utilities for music notation.
3
+ */
4
+
5
+ /**
6
+ * Convert note duration to seconds based on tempo.
7
+ *
8
+ * @param duration - Note duration type (whole, half, quarter, eighth, 16th, 32nd)
9
+ * @param tempo - Tempo in BPM
10
+ * @param dotted - Whether the note is dotted (increases duration by 50%)
11
+ * @returns Duration in seconds
12
+ */
13
+ export function durationToSeconds(
14
+ duration: string,
15
+ tempo: number,
16
+ dotted: boolean = false
17
+ ): number {
18
+ // Quarter note duration at given tempo
19
+ const quarterNoteDuration = 60 / tempo;
20
+
21
+ // Map durations to quarter note multipliers
22
+ const durationMap: Record<string, number> = {
23
+ 'whole': 4,
24
+ 'half': 2,
25
+ 'quarter': 1,
26
+ 'eighth': 0.5,
27
+ '16th': 0.25,
28
+ '32nd': 0.125,
29
+ };
30
+
31
+ const baseDuration = durationMap[duration] || 1;
32
+ const multiplier = dotted ? 1.5 : 1;
33
+
34
+ return quarterNoteDuration * baseDuration * multiplier;
35
+ }
frontend/src/utils/musicxml-parser.ts DELETED
@@ -1,275 +0,0 @@
1
- /**
2
- * Lightweight MusicXML parser for extracting notes and metadata.
3
- *
4
- * Supports grand staff with multiple parts (treble + bass for piano).
5
- */
6
- import type { Note, Score, Measure, Part } from '../store/notation';
7
-
8
- interface ParsedNote {
9
- pitch: string;
10
- octave: number;
11
- duration: number; // in divisions
12
- type: string; // whole, half, quarter, etc.
13
- accidental?: string;
14
- dotted: boolean;
15
- isRest: boolean;
16
- }
17
-
18
- export function parseMusicXML(xml: string): Score {
19
- const parser = new DOMParser();
20
- const doc = parser.parseFromString(xml, 'text/xml');
21
-
22
- // Extract metadata
23
- const title = doc.querySelector('movement-title')?.textContent ||
24
- doc.querySelector('work-title')?.textContent ||
25
- 'Untitled';
26
- const composer = doc.querySelector('creator[type="composer"]')?.textContent || 'Unknown';
27
-
28
- // Extract key signature
29
- const fifths = doc.querySelector('key fifths')?.textContent;
30
- const keyMap: Record<string, string> = {
31
- '-7': 'Cb', '-6': 'Gb', '-5': 'Db', '-4': 'Ab', '-3': 'Eb', '-2': 'Bb', '-1': 'F',
32
- '0': 'C', '1': 'G', '2': 'D', '3': 'A', '4': 'E', '5': 'B', '6': 'F#', '7': 'C#'
33
- };
34
- const key = fifths ? keyMap[fifths] || 'C' : 'C';
35
-
36
- // Extract time signature
37
- const beats = doc.querySelector('time beats')?.textContent || '4';
38
- const beatType = doc.querySelector('time beat-type')?.textContent || '4';
39
- const timeSignature = `${beats}/${beatType}`;
40
-
41
- // Extract tempo
42
- let tempo = 120;
43
- const tempoElement = doc.querySelector('sound[tempo]');
44
- if (tempoElement) {
45
- const tempoAttr = tempoElement.getAttribute('tempo');
46
- if (tempoAttr) {
47
- tempo = parseInt(tempoAttr);
48
- }
49
- }
50
-
51
- // Parse all parts (for grand staff: treble + bass)
52
- const partElements = doc.querySelectorAll('score-partwise > part');
53
- const parts: Part[] = [];
54
- let allMeasures: Measure[] = []; // For backward compatibility
55
-
56
- partElements.forEach((partEl, partIdx) => {
57
- const partId = partEl.getAttribute('id') || `part-${partIdx}`;
58
-
59
- // Get part name and clef
60
- const partName = doc.querySelector(`score-part[id="${partId}"] part-name`)?.textContent || `Part ${partIdx + 1}`;
61
-
62
- // Determine clef from first measure
63
- const firstClefSign = partEl.querySelector('measure clef sign')?.textContent || 'G';
64
- const clef: 'treble' | 'bass' = firstClefSign === 'F' ? 'bass' : 'treble';
65
-
66
- const measureElements = partEl.querySelectorAll('measure');
67
- const measures: Measure[] = [];
68
-
69
- measureElements.forEach((measureEl, idx) => {
70
- const measureNumber = parseInt(measureEl.getAttribute('number') || String(idx + 1));
71
- const notes: Note[] = [];
72
-
73
- const noteElements = measureEl.querySelectorAll('note');
74
- let currentChord: Note[] = [];
75
- let currentChordId: string | null = null;
76
-
77
- noteElements.forEach((noteEl, noteIdx) => {
78
- const parsedNote = parseNoteElement(noteEl);
79
- if (!parsedNote) return;
80
-
81
- // Check if this note is part of a chord (simultaneous with previous note)
82
- const isChordMember = noteEl.querySelector('chord') !== null;
83
-
84
- // Assign chord ID for chord grouping
85
- if (!isChordMember) {
86
- // Start new chord group (or single note)
87
- currentChordId = `chord-${measureNumber}-${noteIdx}`;
88
- }
89
-
90
- if (parsedNote.isRest) {
91
- // Flush any pending chord before adding rest
92
- if (currentChord.length > 0) {
93
- notes.push(...currentChord);
94
- currentChord = [];
95
- }
96
-
97
- // Include rests (rests don't have chordId)
98
- notes.push({
99
- id: `note-${measureNumber}-${notes.length}`,
100
- pitch: '',
101
- duration: parsedNote.type,
102
- octave: 0,
103
- startTime: 0,
104
- dotted: parsedNote.dotted,
105
- isRest: true,
106
- chordId: undefined, // Rests are never part of chords
107
- });
108
- } else {
109
- // Build full pitch string for pitched notes
110
- const pitchName = parsedNote.pitch +
111
- (parsedNote.accidental === 'sharp' ? '#' :
112
- parsedNote.accidental === 'flat' ? 'b' : '');
113
- const fullPitch = pitchName + parsedNote.octave;
114
-
115
- const note: Note = {
116
- id: `note-${measureNumber}-${notes.length + currentChord.length}`,
117
- pitch: fullPitch,
118
- duration: parsedNote.type,
119
- octave: parsedNote.octave,
120
- startTime: 0,
121
- dotted: parsedNote.dotted,
122
- accidental: parsedNote.accidental as 'sharp' | 'flat' | 'natural' | undefined,
123
- isRest: false,
124
- chordId: currentChordId || undefined, // Assign chord ID for grouping
125
- };
126
-
127
- if (isChordMember) {
128
- // Add to current chord group
129
- currentChord.push(note);
130
- } else {
131
- // Flush previous chord if any
132
- if (currentChord.length > 0) {
133
- notes.push(...currentChord);
134
- currentChord = [];
135
- }
136
- // Start new chord group (or single note)
137
- currentChord = [note];
138
- }
139
- }
140
- });
141
-
142
- // Flush any remaining chord
143
- if (currentChord.length > 0) {
144
- notes.push(...currentChord);
145
- }
146
-
147
- // Add ALL measures, even if empty (will show as blank measures)
148
- measures.push({
149
- id: `part-${partIdx}-measure-${measureNumber}`,
150
- number: measureNumber,
151
- notes,
152
- });
153
- });
154
-
155
- // Add this part to the parts array
156
- parts.push({
157
- id: partId,
158
- name: partName,
159
- clef,
160
- measures,
161
- });
162
-
163
- // For backward compatibility, use first part's measures
164
- if (partIdx === 0) {
165
- allMeasures = measures;
166
- }
167
- });
168
-
169
- // If no parts found, return empty score
170
- if (parts.length === 0) {
171
- parts.push({
172
- id: 'part-0',
173
- name: 'Piano',
174
- clef: 'treble',
175
- measures: [],
176
- });
177
- }
178
-
179
- return {
180
- id: 'parsed-score',
181
- title,
182
- composer,
183
- key,
184
- timeSignature,
185
- tempo,
186
- parts,
187
- measures: allMeasures, // Legacy field for backward compat
188
- };
189
- }
190
-
191
- function parseNoteElement(noteEl: Element): ParsedNote | null {
192
- const durationEl = noteEl.querySelector('duration');
193
- const typeEl = noteEl.querySelector('type');
194
-
195
- if (!durationEl || !typeEl) return null;
196
-
197
- // Check if this is a rest
198
- const isRest = noteEl.querySelector('rest') !== null;
199
-
200
- if (isRest) {
201
- return {
202
- pitch: '',
203
- octave: 0,
204
- duration: parseInt(durationEl.textContent || '0'),
205
- type: typeEl.textContent || 'quarter',
206
- dotted: noteEl.querySelector('dot') !== null,
207
- isRest: true,
208
- };
209
- }
210
-
211
- // Parse pitched note
212
- const pitchEl = noteEl.querySelector('pitch');
213
- if (!pitchEl) return null;
214
-
215
- const step = pitchEl.querySelector('step')?.textContent;
216
- const octave = pitchEl.querySelector('octave')?.textContent;
217
- const alter = pitchEl.querySelector('alter')?.textContent; // Semantic pitch alteration
218
- const accidentalEl = noteEl.querySelector('accidental'); // Visual accidental display
219
- const dotEl = noteEl.querySelector('dot');
220
-
221
- if (!step || !octave) return null;
222
-
223
- // Parse accidental from both <alter> (semantic) and <accidental> (visual) tags
224
- let accidental: string | undefined;
225
-
226
- // Priority 1: Use <alter> for pitch accuracy (indicates actual pitch)
227
- if (alter) {
228
- const alterValue = parseInt(alter);
229
- if (alterValue === 1) accidental = 'sharp';
230
- else if (alterValue === -1) accidental = 'flat';
231
- else if (alterValue === 0) accidental = 'natural';
232
- }
233
-
234
- // Priority 2: If no <alter>, check <accidental> tag (visual notation)
235
- if (!accidental && accidentalEl) {
236
- const accType = accidentalEl.textContent;
237
- if (accType === 'sharp') accidental = 'sharp';
238
- else if (accType === 'flat') accidental = 'flat';
239
- else if (accType === 'natural') accidental = 'natural';
240
- }
241
-
242
- return {
243
- pitch: step,
244
- octave: parseInt(octave),
245
- duration: parseInt(durationEl.textContent || '0'),
246
- type: typeEl.textContent || 'quarter',
247
- accidental,
248
- dotted: dotEl !== null,
249
- isRest: false,
250
- };
251
- }
252
-
253
- /**
254
- * Convert note duration string to seconds based on tempo.
255
- */
256
- export function durationToSeconds(duration: string, tempo: number, dotted: boolean = false): number {
257
- const quarterNoteDuration = 60 / tempo; // seconds per quarter note
258
-
259
- const durationMap: Record<string, number> = {
260
- 'whole': quarterNoteDuration * 4,
261
- 'half': quarterNoteDuration * 2,
262
- 'quarter': quarterNoteDuration,
263
- 'eighth': quarterNoteDuration / 2,
264
- '16th': quarterNoteDuration / 4,
265
- '32nd': quarterNoteDuration / 8,
266
- };
267
-
268
- let baseDuration = durationMap[duration] || quarterNoteDuration;
269
-
270
- if (dotted) {
271
- baseDuration *= 1.5;
272
- }
273
-
274
- return baseDuration;
275
- }