Ewan Claude Opus 4.6 commited on
Commit
56c8033
·
1 Parent(s): f9c4bfd

Add Full Song mode with Demucs source separation

Browse files

New "Full Song" tab on the upload screen uses Demucs to separate
any audio into stems, then transcribes piano and bass parts
independently. Bass notes render in amber on the piano roll.

- New transcriber/separate.py: Demucs wrapper
- New transcriber/optimize_bass.py: simplified bass optimization
- New API endpoint POST /api/transcribe-full with async polling
- Upload screen: Solo Piano / Full Song tab switcher
- Color scheme: bass instrument color (amber)
- MIDI parser: instrument detection from MIDI program number
- Dockerfile: CPU-only PyTorch + Demucs + pre-downloaded model

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Dockerfile CHANGED
@@ -26,6 +26,13 @@ RUN pip install --no-cache-dir \
26
  yt-dlp mir-eval resampy scikit-learn && \
27
  pip install --no-cache-dir --no-deps basic-pitch
28
 
 
 
 
 
 
 
 
29
  # Copy application code
30
  COPY transcriber/ /app/transcriber/
31
  COPY api/ /app/api/
 
26
  yt-dlp mir-eval resampy scikit-learn && \
27
  pip install --no-cache-dir --no-deps basic-pitch
28
 
29
+ # Install Demucs for full-song source separation (CPU-only PyTorch)
30
+ RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu && \
31
+ pip install --no-cache-dir demucs
32
+
33
+ # Pre-download the htdemucs model so first request isn't slow
34
+ RUN python -c "import demucs.pretrained; demucs.pretrained.get_model('htdemucs')"
35
+
36
  # Copy application code
37
  COPY transcriber/ /app/transcriber/
38
  COPY api/ /app/api/
api/server.py CHANGED
@@ -1,11 +1,15 @@
1
  """FastAPI backend for the piano tutorial transcription pipeline."""
2
 
3
  import json
 
4
  import sys
5
  import tempfile
 
 
6
  import uuid
7
  from pathlib import Path
8
 
 
9
  from fastapi import FastAPI, UploadFile, File, HTTPException
10
  from fastapi.responses import FileResponse, JSONResponse
11
  from fastapi.staticfiles import StaticFiles
@@ -105,6 +109,137 @@ async def get_chords(job_id: str):
105
  return JSONResponse(chord_data)
106
 
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  @app.get("/api/health")
109
  async def health():
110
  return {"status": "ok"}
 
1
  """FastAPI backend for the piano tutorial transcription pipeline."""
2
 
3
  import json
4
+ import shutil
5
  import sys
6
  import tempfile
7
+ import threading
8
+ import traceback
9
  import uuid
10
  from pathlib import Path
11
 
12
+ import pretty_midi
13
  from fastapi import FastAPI, UploadFile, File, HTTPException
14
  from fastapi.responses import FileResponse, JSONResponse
15
  from fastapi.staticfiles import StaticFiles
 
109
  return JSONResponse(chord_data)
110
 
111
 
112
+ # ── Full-song mode (Demucs source separation) ──────────────────────────
113
+
114
+ # In-memory job status for async full-song transcription
115
+ job_status = {}
116
+
117
+
118
+ def merge_stems(piano_midi_path, bass_midi_path, output_path):
119
+ """Merge piano and bass MIDI into a single multi-track file."""
120
+ piano = pretty_midi.PrettyMIDI(str(piano_midi_path))
121
+ bass = pretty_midi.PrettyMIDI(str(bass_midi_path))
122
+
123
+ merged = pretty_midi.PrettyMIDI()
124
+
125
+ # Track 0: Piano (program 0)
126
+ piano_inst = pretty_midi.Instrument(program=0, name="Piano")
127
+ for inst in piano.instruments:
128
+ piano_inst.notes.extend(inst.notes)
129
+ merged.instruments.append(piano_inst)
130
+
131
+ # Track 1: Bass (program 33)
132
+ bass_inst = pretty_midi.Instrument(program=33, name="Bass")
133
+ for inst in bass.instruments:
134
+ bass_inst.notes.extend(inst.notes)
135
+ merged.instruments.append(bass_inst)
136
+
137
+ merged.write(str(output_path))
138
+
139
+
140
+ def run_full_transcription(job_id, audio_path, job_dir):
141
+ """Background worker for full-song transcription with Demucs."""
142
+ try:
143
+ # Step 1: Demucs separation
144
+ job_status[job_id] = {"step": 1, "label": "Separating instruments with AI...", "done": False}
145
+ from separate import separate
146
+ stems = separate(str(audio_path), str(job_dir / "stems"))
147
+
148
+ # Step 2: Transcribe piano from "other" stem
149
+ job_status[job_id] = {"step": 2, "label": "Transcribing piano part...", "done": False}
150
+ from transcribe import transcribe as run_transcribe
151
+ piano_raw = job_dir / "piano_raw.mid"
152
+ run_transcribe(stems["other"], str(piano_raw))
153
+
154
+ # Step 3: Transcribe bass stem
155
+ job_status[job_id] = {"step": 3, "label": "Transcribing bass part...", "done": False}
156
+ bass_raw = job_dir / "bass_raw.mid"
157
+ run_transcribe(stems["bass"], str(bass_raw))
158
+
159
+ # Step 4: Optimize both
160
+ job_status[job_id] = {"step": 4, "label": "Optimizing note accuracy...", "done": False}
161
+ from optimize import optimize
162
+ from optimize_bass import optimize_bass
163
+
164
+ piano_opt = job_dir / "piano_optimized.mid"
165
+ optimize(stems["other"], str(piano_raw), str(piano_opt))
166
+
167
+ bass_opt = job_dir / "bass_optimized.mid"
168
+ optimize_bass(stems["bass"], str(bass_raw), str(bass_opt))
169
+
170
+ # Step 5: Merge into single multi-track MIDI
171
+ job_status[job_id] = {"step": 5, "label": "Assembling final result...", "done": False}
172
+ merged_path = job_dir / "transcription.mid"
173
+ merge_stems(str(piano_opt), str(bass_opt), str(merged_path))
174
+
175
+ # Run chord detection on piano part
176
+ from chords import detect_chords
177
+ chords_path = job_dir / "transcription_chords.json"
178
+ detect_chords(str(piano_opt), str(chords_path))
179
+ chord_data = None
180
+ if chords_path.exists():
181
+ with open(chords_path) as f:
182
+ chord_data = json.load(f)
183
+
184
+ # Clean up large stem files
185
+ stems_dir = job_dir / "stems"
186
+ if stems_dir.exists():
187
+ shutil.rmtree(stems_dir)
188
+ for f in [piano_raw, bass_raw, piano_opt, bass_opt]:
189
+ f.unlink(missing_ok=True)
190
+
191
+ job_status[job_id] = {
192
+ "step": 6, "label": "Done!", "done": True,
193
+ "result": {
194
+ "job_id": job_id,
195
+ "midi_url": f"/api/jobs/{job_id}/midi",
196
+ "chords_url": f"/api/jobs/{job_id}/chords",
197
+ "chords": chord_data,
198
+ },
199
+ }
200
+
201
+ except Exception as e:
202
+ traceback.print_exc()
203
+ job_status[job_id] = {
204
+ "step": -1, "label": str(e)[:200], "done": True, "error": str(e)[:200],
205
+ }
206
+
207
+
208
+ @app.post("/api/transcribe-full")
209
+ async def transcribe_full(file: UploadFile = File(...)):
210
+ """Start full-song transcription with Demucs source separation.
211
+
212
+ Returns immediately with a job_id. Poll /api/jobs/{job_id}/status.
213
+ """
214
+ job_id = str(uuid.uuid4())[:8]
215
+ job_dir = WORK_DIR / job_id
216
+ job_dir.mkdir(exist_ok=True)
217
+
218
+ suffix = Path(file.filename).suffix or ".m4a"
219
+ audio_path = job_dir / f"upload{suffix}"
220
+ content = await file.read()
221
+ audio_path.write_bytes(content)
222
+
223
+ job_status[job_id] = {"step": 0, "label": "Starting...", "done": False}
224
+ thread = threading.Thread(
225
+ target=run_full_transcription,
226
+ args=(job_id, audio_path, job_dir),
227
+ daemon=True,
228
+ )
229
+ thread.start()
230
+
231
+ return JSONResponse({"job_id": job_id})
232
+
233
+
234
+ @app.get("/api/jobs/{job_id}/status")
235
+ async def get_job_status(job_id: str):
236
+ """Get the current status of a full-song transcription job."""
237
+ status = job_status.get(job_id)
238
+ if status is None:
239
+ raise HTTPException(404, f"No job found with id {job_id}")
240
+ return JSONResponse(status)
241
+
242
+
243
  @app.get("/api/health")
244
  async def health():
245
  return {"status": "ok"}
app/src/App.jsx CHANGED
@@ -15,6 +15,7 @@ const API_BASE = import.meta.env.DEV ? 'http://localhost:8000' : '';
15
  function UploadScreen({ onFileSelected }) {
16
  const [isDragging, setIsDragging] = useState(false);
17
  const [errorMsg, setErrorMsg] = useState('');
 
18
  const fileInputRef = useRef(null);
19
 
20
  const handleFile = useCallback((file) => {
@@ -25,8 +26,8 @@ function UploadScreen({ onFileSelected }) {
25
  return;
26
  }
27
  setErrorMsg('');
28
- onFileSelected(file);
29
- }, [onFileSelected]);
30
 
31
  const handleDrop = useCallback((e) => {
32
  e.preventDefault();
@@ -56,10 +57,25 @@ function UploadScreen({ onFileSelected }) {
56
  <p className="upload-tagline">Your AI piano teacher</p>
57
  </div>
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  <p className="upload-description">
60
- Drop a song and Mr. Octopus will transcribe it into a piano tutorial
61
- you can follow along with, note by note. Works best with clearly
62
- recorded solo piano pieces.
63
  </p>
64
 
65
  <div
@@ -93,7 +109,7 @@ function UploadScreen({ onFileSelected }) {
93
  );
94
  }
95
 
96
- function LoadingScreen({ status }) {
97
  return (
98
  <div className="upload-screen">
99
  <div className="upload-processing">
@@ -101,7 +117,7 @@ function LoadingScreen({ status }) {
101
  <OctopusLogo size={72} />
102
  </div>
103
  <h2>{status}</h2>
104
- <p className="loading-sub">This usually takes 20-30 seconds</p>
105
  <div className="loading-bar">
106
  <div className="loading-bar-fill" />
107
  </div>
@@ -115,6 +131,7 @@ export default function App() {
115
  const [dimensions, setDimensions] = useState({ width: 800, height: 600 });
116
  const [screen, setScreen] = useState('upload'); // 'upload' | 'loading' | 'player'
117
  const [loadingStatus, setLoadingStatus] = useState('');
 
118
  const [chords, setChords] = useState([]);
119
  const [activeTab, setActiveTab] = useState('roll'); // 'roll' | 'sheet'
120
 
@@ -151,39 +168,89 @@ export default function App() {
151
  seekTo(0);
152
  }, [isPlaying, pause, seekTo]);
153
 
154
- const handleFileSelected = useCallback(async (file) => {
 
 
 
 
 
 
 
 
 
 
 
 
155
  stopPlayback();
156
  setScreen('loading');
157
- setLoadingStatus('Transcribing your song...');
158
- try {
159
- const form = new FormData();
160
- form.append('file', file);
161
- const res = await fetch(`${API_BASE}/api/transcribe`, {
162
- method: 'POST',
163
- body: form,
164
- });
165
- if (!res.ok) {
166
- const err = await res.json().catch(() => ({ detail: res.statusText }));
167
- throw new Error(err.detail || 'Transcription failed');
168
- }
169
- const data = await res.json();
170
 
171
- setLoadingStatus('Loading piano sounds...');
172
- const midiRes = await fetch(`${API_BASE}${data.midi_url}`);
173
- const blob = await midiRes.blob();
174
- loadFromBlob(blob, file.name.replace(/\.[^.]+$/, '.mid'));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
- if (data.chords) {
177
- const chordList = data.chords?.chords || data.chords || [];
178
- setChords(Array.isArray(chordList) ? chordList : []);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  }
180
- // Screen transition to 'player' happens via the useEffect above
181
- // once both samplesLoaded and notes.length > 0
182
- } catch (e) {
183
- setScreen('upload');
184
- alert(e.message || 'Something went wrong. Please try again.');
185
  }
186
- }, [loadFromBlob, stopPlayback]);
187
 
188
  const handleNewSong = useCallback(() => {
189
  stopPlayback();
@@ -226,7 +293,7 @@ export default function App() {
226
  }
227
 
228
  if (screen === 'loading') {
229
- return <LoadingScreen status={loadingStatus} />;
230
  }
231
 
232
  return (
 
15
  function UploadScreen({ onFileSelected }) {
16
  const [isDragging, setIsDragging] = useState(false);
17
  const [errorMsg, setErrorMsg] = useState('');
18
+ const [mode, setMode] = useState('solo'); // 'solo' | 'full'
19
  const fileInputRef = useRef(null);
20
 
21
  const handleFile = useCallback((file) => {
 
26
  return;
27
  }
28
  setErrorMsg('');
29
+ onFileSelected(file, mode);
30
+ }, [onFileSelected, mode]);
31
 
32
  const handleDrop = useCallback((e) => {
33
  e.preventDefault();
 
57
  <p className="upload-tagline">Your AI piano teacher</p>
58
  </div>
59
 
60
+ <div className="upload-mode-tabs">
61
+ <button
62
+ className={`upload-mode-tab ${mode === 'solo' ? 'active' : ''}`}
63
+ onClick={() => setMode('solo')}
64
+ >
65
+ Solo Piano
66
+ </button>
67
+ <button
68
+ className={`upload-mode-tab ${mode === 'full' ? 'active' : ''}`}
69
+ onClick={() => setMode('full')}
70
+ >
71
+ Full Song
72
+ </button>
73
+ </div>
74
+
75
  <p className="upload-description">
76
+ {mode === 'solo'
77
+ ? 'Drop a song and Mr. Octopus will transcribe it into a piano tutorial you can follow along with, note by note. Works best with clearly recorded solo piano pieces.'
78
+ : 'Drop any song and Mr. Octopus will separate the instruments using AI, then transcribe the piano and bass parts. Works with full band recordings, even AI-generated music.'}
79
  </p>
80
 
81
  <div
 
109
  );
110
  }
111
 
112
+ function LoadingScreen({ status, estimate }) {
113
  return (
114
  <div className="upload-screen">
115
  <div className="upload-processing">
 
117
  <OctopusLogo size={72} />
118
  </div>
119
  <h2>{status}</h2>
120
+ <p className="loading-sub">{estimate || 'This usually takes 20-30 seconds'}</p>
121
  <div className="loading-bar">
122
  <div className="loading-bar-fill" />
123
  </div>
 
131
  const [dimensions, setDimensions] = useState({ width: 800, height: 600 });
132
  const [screen, setScreen] = useState('upload'); // 'upload' | 'loading' | 'player'
133
  const [loadingStatus, setLoadingStatus] = useState('');
134
+ const [loadingEstimate, setLoadingEstimate] = useState('');
135
  const [chords, setChords] = useState([]);
136
  const [activeTab, setActiveTab] = useState('roll'); // 'roll' | 'sheet'
137
 
 
168
  seekTo(0);
169
  }, [isPlaying, pause, seekTo]);
170
 
171
+ const loadResult = useCallback(async (data, fileName) => {
172
+ setLoadingStatus('Loading piano sounds...');
173
+ const midiRes = await fetch(`${API_BASE}${data.midi_url}`);
174
+ const blob = await midiRes.blob();
175
+ loadFromBlob(blob, fileName.replace(/\.[^.]+$/, '.mid'));
176
+
177
+ if (data.chords) {
178
+ const chordList = data.chords?.chords || data.chords || [];
179
+ setChords(Array.isArray(chordList) ? chordList : []);
180
+ }
181
+ }, [loadFromBlob]);
182
+
183
+ const handleFileSelected = useCallback(async (file, mode = 'solo') => {
184
  stopPlayback();
185
  setScreen('loading');
 
 
 
 
 
 
 
 
 
 
 
 
 
186
 
187
+ if (mode === 'full') {
188
+ // Full song: async with polling
189
+ setLoadingStatus('Uploading...');
190
+ setLoadingEstimate('This usually takes 5-8 minutes');
191
+ try {
192
+ const form = new FormData();
193
+ form.append('file', file);
194
+ const res = await fetch(`${API_BASE}/api/transcribe-full`, {
195
+ method: 'POST',
196
+ body: form,
197
+ });
198
+ if (!res.ok) {
199
+ const err = await res.json().catch(() => ({ detail: res.statusText }));
200
+ throw new Error(err.detail || 'Failed to start transcription');
201
+ }
202
+ const { job_id } = await res.json();
203
+
204
+ // Poll for status
205
+ const poll = async () => {
206
+ try {
207
+ const statusRes = await fetch(`${API_BASE}/api/jobs/${job_id}/status`);
208
+ const status = await statusRes.json();
209
+
210
+ if (status.error) {
211
+ throw new Error(status.error);
212
+ }
213
 
214
+ setLoadingStatus(status.label);
215
+
216
+ if (status.done && status.result) {
217
+ await loadResult(status.result, file.name);
218
+ } else {
219
+ setTimeout(poll, 2000);
220
+ }
221
+ } catch (e) {
222
+ setScreen('upload');
223
+ alert(e.message || 'Something went wrong. Please try again.');
224
+ }
225
+ };
226
+ poll();
227
+ } catch (e) {
228
+ setScreen('upload');
229
+ alert(e.message || 'Something went wrong. Please try again.');
230
+ }
231
+ } else {
232
+ // Solo piano: existing synchronous flow
233
+ setLoadingStatus('Transcribing your song...');
234
+ setLoadingEstimate('This usually takes 20-30 seconds');
235
+ try {
236
+ const form = new FormData();
237
+ form.append('file', file);
238
+ const res = await fetch(`${API_BASE}/api/transcribe`, {
239
+ method: 'POST',
240
+ body: form,
241
+ });
242
+ if (!res.ok) {
243
+ const err = await res.json().catch(() => ({ detail: res.statusText }));
244
+ throw new Error(err.detail || 'Transcription failed');
245
+ }
246
+ const data = await res.json();
247
+ await loadResult(data, file.name);
248
+ } catch (e) {
249
+ setScreen('upload');
250
+ alert(e.message || 'Something went wrong. Please try again.');
251
  }
 
 
 
 
 
252
  }
253
+ }, [loadResult, stopPlayback]);
254
 
255
  const handleNewSong = useCallback(() => {
256
  stopPlayback();
 
293
  }
294
 
295
  if (screen === 'loading') {
296
+ return <LoadingScreen status={loadingStatus} estimate={loadingEstimate} />;
297
  }
298
 
299
  return (
app/src/components/PianoRoll.jsx CHANGED
@@ -55,18 +55,18 @@ function drawFallingNotes(ctx, notes, currentTime, hitLineY, positionMap) {
55
  const w = pos.width - padding * 2;
56
 
57
  // Glow
58
- ctx.shadowColor = noteGlowColor(note.midi);
59
  ctx.shadowBlur = 12;
60
 
61
  // Note body
62
- ctx.fillStyle = noteColor(note.midi);
63
  drawRoundedRect(ctx, x, clippedTop, w, height, 4);
64
  ctx.fill();
65
 
66
  // Brighter edge at the bottom (hitting edge)
67
  if (noteBottom <= hitLineY && noteBottom >= hitLineY - 3) {
68
  ctx.shadowBlur = 20;
69
- ctx.fillStyle = noteGlowColor(note.midi);
70
  ctx.fillRect(x, hitLineY - 3, w, 3);
71
  }
72
  }
 
55
  const w = pos.width - padding * 2;
56
 
57
  // Glow
58
+ ctx.shadowColor = noteGlowColor(note.midi, note.instrument);
59
  ctx.shadowBlur = 12;
60
 
61
  // Note body
62
+ ctx.fillStyle = noteColor(note.midi, note.instrument);
63
  drawRoundedRect(ctx, x, clippedTop, w, height, 4);
64
  ctx.fill();
65
 
66
  // Brighter edge at the bottom (hitting edge)
67
  if (noteBottom <= hitLineY && noteBottom >= hitLineY - 3) {
68
  ctx.shadowBlur = 20;
69
+ ctx.fillStyle = noteGlowColor(note.midi, note.instrument);
70
  ctx.fillRect(x, hitLineY - 3, w, 3);
71
  }
72
  }
app/src/index.css CHANGED
@@ -92,6 +92,40 @@ body {
92
  margin-top: 8px;
93
  }
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  .upload-description {
96
  color: var(--text-muted);
97
  font-size: 14px;
 
92
  margin-top: 8px;
93
  }
94
 
95
+ .upload-mode-tabs {
96
+ display: flex;
97
+ gap: 4px;
98
+ margin-bottom: 20px;
99
+ background: var(--surface-2);
100
+ border-radius: var(--radius);
101
+ padding: 4px;
102
+ border: 1px solid var(--border);
103
+ }
104
+
105
+ .upload-mode-tab {
106
+ flex: 1;
107
+ padding: 10px 20px;
108
+ border: none;
109
+ border-radius: 7px;
110
+ background: transparent;
111
+ color: var(--text-muted);
112
+ font-size: 14px;
113
+ font-weight: 600;
114
+ font-family: inherit;
115
+ cursor: pointer;
116
+ transition: all 0.15s;
117
+ }
118
+
119
+ .upload-mode-tab:hover {
120
+ color: var(--text);
121
+ }
122
+
123
+ .upload-mode-tab.active {
124
+ background: var(--primary);
125
+ color: white;
126
+ box-shadow: 0 2px 8px var(--primary-glow);
127
+ }
128
+
129
  .upload-description {
130
  color: var(--text-muted);
131
  font-size: 14px;
app/src/utils/colorScheme.js CHANGED
@@ -10,6 +10,10 @@ export const COLORS = {
10
  rightHand: '#06b6d4',
11
  rightHandGlow: '#22d3ee',
12
 
 
 
 
 
13
  // Piano key colors
14
  whiteKey: '#e8e8e8',
15
  whiteKeyActive: '#c4b5fd',
@@ -26,10 +30,12 @@ export const COLORS = {
26
 
27
  export const MIDI_SPLIT_POINT = 60; // Middle C (C4)
28
 
29
- export function noteColor(midiNumber) {
 
30
  return midiNumber < MIDI_SPLIT_POINT ? COLORS.leftHand : COLORS.rightHand;
31
  }
32
 
33
- export function noteGlowColor(midiNumber) {
 
34
  return midiNumber < MIDI_SPLIT_POINT ? COLORS.leftHandGlow : COLORS.rightHandGlow;
35
  }
 
10
  rightHand: '#06b6d4',
11
  rightHandGlow: '#22d3ee',
12
 
13
+ // Instrument colors
14
+ bass: '#f59e0b',
15
+ bassGlow: '#fbbf24',
16
+
17
  // Piano key colors
18
  whiteKey: '#e8e8e8',
19
  whiteKeyActive: '#c4b5fd',
 
30
 
31
  export const MIDI_SPLIT_POINT = 60; // Middle C (C4)
32
 
33
+ export function noteColor(midiNumber, instrument = 'piano') {
34
+ if (instrument === 'bass') return COLORS.bass;
35
  return midiNumber < MIDI_SPLIT_POINT ? COLORS.leftHand : COLORS.rightHand;
36
  }
37
 
38
+ export function noteGlowColor(midiNumber, instrument = 'piano') {
39
+ if (instrument === 'bass') return COLORS.bassGlow;
40
  return midiNumber < MIDI_SPLIT_POINT ? COLORS.leftHandGlow : COLORS.rightHandGlow;
41
  }
app/src/utils/midiHelpers.js CHANGED
@@ -105,6 +105,9 @@ export function parseMidiFile(midiObject) {
105
  const notes = [];
106
 
107
  midiObject.tracks.forEach((track) => {
 
 
 
108
  track.notes.forEach((note) => {
109
  notes.push({
110
  midi: note.midi,
@@ -113,6 +116,7 @@ export function parseMidiFile(midiObject) {
113
  duration: note.duration,
114
  velocity: note.velocity,
115
  hand: note.midi < MIDI_SPLIT_POINT ? 'left' : 'right',
 
116
  });
117
  });
118
  });
 
105
  const notes = [];
106
 
107
  midiObject.tracks.forEach((track) => {
108
+ const program = track.instrument?.number ?? 0;
109
+ const instrument = (program >= 32 && program <= 39) ? 'bass' : 'piano';
110
+
111
  track.notes.forEach((note) => {
112
  notes.push({
113
  midi: note.midi,
 
116
  duration: note.duration,
117
  velocity: note.velocity,
118
  hand: note.midi < MIDI_SPLIT_POINT ? 'left' : 'right',
119
+ instrument,
120
  });
121
  });
122
  });
transcriber/optimize_bass.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Simplified optimization pipeline for bass transcriptions.
2
+
3
+ Cherry-picks relevant steps from optimize.py. Bass is mostly monophonic,
4
+ so we skip chord alignment, harmonic ghost removal, CQT extension,
5
+ playability limits, and chord detection.
6
+ """
7
+
8
+ import copy
9
+ from pathlib import Path
10
+
11
+ import numpy as np
12
+ import pretty_midi
13
+ import librosa
14
+
15
+ from optimize import (
16
+ remove_leading_silence_notes,
17
+ remove_trailing_silence_notes,
18
+ remove_low_energy_notes,
19
+ quantize_to_beat_grid,
20
+ correct_onsets,
21
+ apply_global_offset,
22
+ )
23
+
24
+
25
+ def optimize_bass(original_audio_path, midi_path, output_path=None):
26
+ """Optimization pipeline tailored for bass transcriptions.
27
+
28
+ Args:
29
+ original_audio_path: Path to bass stem audio (WAV from Demucs)
30
+ midi_path: Path to raw MIDI from Basic Pitch
31
+ output_path: Output path (defaults to overwrite midi_path)
32
+
33
+ Returns:
34
+ pretty_midi.PrettyMIDI: Optimized bass MIDI
35
+ """
36
+ if output_path is None:
37
+ output_path = midi_path
38
+ output_path = str(output_path)
39
+
40
+ sr = 22050
41
+ hop_length = 512
42
+
43
+ y, _ = librosa.load(str(original_audio_path), sr=sr, mono=True)
44
+
45
+ onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
46
+ ref_onset_frames = librosa.onset.onset_detect(
47
+ onset_envelope=onset_env, sr=sr, hop_length=hop_length,
48
+ backtrack=False, delta=0.04
49
+ )
50
+ ref_onsets = librosa.frames_to_time(ref_onset_frames, sr=sr, hop_length=hop_length)
51
+
52
+ midi_data = pretty_midi.PrettyMIDI(str(midi_path))
53
+
54
+ # Step 0: Remove silence artifacts
55
+ midi_data, _, music_start = remove_leading_silence_notes(midi_data, y, sr)
56
+ midi_data, _, _ = remove_trailing_silence_notes(midi_data, y, sr)
57
+
58
+ # Step 1: Remove low-energy hallucinations
59
+ midi_data, _ = remove_low_energy_notes(midi_data, y, sr, hop_length)
60
+
61
+ # Step 2: Remove notes above bass range (> C4 = MIDI 60)
62
+ for inst in midi_data.instruments:
63
+ before = len(inst.notes)
64
+ inst.notes = [n for n in inst.notes if n.pitch <= 64]
65
+ removed = before - len(inst.notes)
66
+ if removed:
67
+ print(f" Bass: removed {removed} notes above bass range")
68
+
69
+ # Step 3: Quantize to beat grid
70
+ midi_data, _, tempo = quantize_to_beat_grid(midi_data, y, sr, hop_length, strength=1.0)
71
+
72
+ # Step 4-6: Onset correction (3 passes)
73
+ midi_data, _, _, _, _, _ = correct_onsets(midi_data, ref_onsets)
74
+ midi_data, _, _, _, _, _ = correct_onsets(midi_data, ref_onsets, min_off=0.01, max_off=0.06)
75
+ midi_data, _, _, _, _, _ = correct_onsets(midi_data, ref_onsets, min_off=0.005, max_off=0.025)
76
+
77
+ # Step 7: Global offset
78
+ midi_data, _ = apply_global_offset(midi_data, ref_onsets)
79
+
80
+ # Step 8: Fix overlaps and enforce minimum duration
81
+ for inst in midi_data.instruments:
82
+ notes = sorted(inst.notes, key=lambda n: (n.start, n.pitch))
83
+ for i, note in enumerate(notes):
84
+ # Enforce minimum duration
85
+ if note.end - note.start < 0.10:
86
+ note.end = note.start + 0.10
87
+ # Trim overlap with next same-pitch note
88
+ for j in range(i + 1, len(notes)):
89
+ if notes[j].pitch == note.pitch and notes[j].start < note.end:
90
+ note.end = max(note.start + 0.01, notes[j].start - 0.005)
91
+ break
92
+ inst.notes = [n for n in notes if n.end - n.start > 0.01]
93
+
94
+ # Shift to t=0 if there was leading silence
95
+ if music_start > 0.1:
96
+ for inst in midi_data.instruments:
97
+ for note in inst.notes:
98
+ note.start = max(0, note.start - music_start)
99
+ note.end = max(note.start + 0.01, note.end - music_start)
100
+
101
+ # Set instrument to Electric Bass
102
+ for inst in midi_data.instruments:
103
+ inst.program = 33 # Electric Bass (finger)
104
+ inst.name = "Bass"
105
+
106
+ total = sum(len(inst.notes) for inst in midi_data.instruments)
107
+ print(f" Bass optimization complete: {total} notes")
108
+
109
+ midi_data.write(output_path)
110
+ return midi_data
transcriber/separate.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Demucs source separation for full-song transcription."""
2
+
3
+ import subprocess
4
+ import sys
5
+ from pathlib import Path
6
+
7
+
8
+ def separate(input_path: str, output_dir: str, model: str = "htdemucs") -> dict:
9
+ """Run Demucs source separation on an audio file.
10
+
11
+ Args:
12
+ input_path: Path to input audio file
13
+ output_dir: Directory to write separated stems
14
+ model: Demucs model name (default: htdemucs)
15
+
16
+ Returns:
17
+ dict with stem paths: {
18
+ 'vocals': str,
19
+ 'drums': str,
20
+ 'bass': str,
21
+ 'other': str, # contains piano, guitar, synths, etc.
22
+ }
23
+ """
24
+ input_path = Path(input_path)
25
+ output_dir = Path(output_dir)
26
+ output_dir.mkdir(parents=True, exist_ok=True)
27
+
28
+ python = sys.executable
29
+
30
+ cmd = [
31
+ python, "-m", "demucs",
32
+ "--name", model,
33
+ "--out", str(output_dir),
34
+ str(input_path),
35
+ ]
36
+
37
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
38
+ if result.returncode != 0:
39
+ raise RuntimeError(f"Demucs failed: {result.stderr[:500]}")
40
+
41
+ # Demucs outputs to: output_dir/{model}/{filename_without_ext}/
42
+ stems_dir = output_dir / model / input_path.stem
43
+
44
+ if not stems_dir.exists():
45
+ raise FileNotFoundError(f"Demucs output not found at {stems_dir}")
46
+
47
+ stems = {}
48
+ for name in ("vocals", "drums", "bass", "other"):
49
+ path = stems_dir / f"{name}.wav"
50
+ if not path.exists():
51
+ raise FileNotFoundError(f"Missing stem: {path}")
52
+ stems[name] = str(path)
53
+
54
+ return stems