Spaces:
Sleeping
Sleeping
Ewan Claude Opus 4.6 commited on
Commit ·
56c8033
1
Parent(s): f9c4bfd
Add Full Song mode with Demucs source separation
Browse filesNew "Full Song" tab on the upload screen uses Demucs to separate
any audio into stems, then transcribes piano and bass parts
independently. Bass notes render in amber on the piano roll.
- New transcriber/separate.py: Demucs wrapper
- New transcriber/optimize_bass.py: simplified bass optimization
- New API endpoint POST /api/transcribe-full with async polling
- Upload screen: Solo Piano / Full Song tab switcher
- Color scheme: bass instrument color (amber)
- MIDI parser: instrument detection from MIDI program number
- Dockerfile: CPU-only PyTorch + Demucs + pre-downloaded model
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- Dockerfile +7 -0
- api/server.py +135 -0
- app/src/App.jsx +102 -35
- app/src/components/PianoRoll.jsx +3 -3
- app/src/index.css +34 -0
- app/src/utils/colorScheme.js +8 -2
- app/src/utils/midiHelpers.js +4 -0
- transcriber/optimize_bass.py +110 -0
- transcriber/separate.py +54 -0
Dockerfile
CHANGED
|
@@ -26,6 +26,13 @@ RUN pip install --no-cache-dir \
|
|
| 26 |
yt-dlp mir-eval resampy scikit-learn && \
|
| 27 |
pip install --no-cache-dir --no-deps basic-pitch
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
# Copy application code
|
| 30 |
COPY transcriber/ /app/transcriber/
|
| 31 |
COPY api/ /app/api/
|
|
|
|
| 26 |
yt-dlp mir-eval resampy scikit-learn && \
|
| 27 |
pip install --no-cache-dir --no-deps basic-pitch
|
| 28 |
|
| 29 |
+
# Install Demucs for full-song source separation (CPU-only PyTorch)
|
| 30 |
+
RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu && \
|
| 31 |
+
pip install --no-cache-dir demucs
|
| 32 |
+
|
| 33 |
+
# Pre-download the htdemucs model so first request isn't slow
|
| 34 |
+
RUN python -c "import demucs.pretrained; demucs.pretrained.get_model('htdemucs')"
|
| 35 |
+
|
| 36 |
# Copy application code
|
| 37 |
COPY transcriber/ /app/transcriber/
|
| 38 |
COPY api/ /app/api/
|
api/server.py
CHANGED
|
@@ -1,11 +1,15 @@
|
|
| 1 |
"""FastAPI backend for the piano tutorial transcription pipeline."""
|
| 2 |
|
| 3 |
import json
|
|
|
|
| 4 |
import sys
|
| 5 |
import tempfile
|
|
|
|
|
|
|
| 6 |
import uuid
|
| 7 |
from pathlib import Path
|
| 8 |
|
|
|
|
| 9 |
from fastapi import FastAPI, UploadFile, File, HTTPException
|
| 10 |
from fastapi.responses import FileResponse, JSONResponse
|
| 11 |
from fastapi.staticfiles import StaticFiles
|
|
@@ -105,6 +109,137 @@ async def get_chords(job_id: str):
|
|
| 105 |
return JSONResponse(chord_data)
|
| 106 |
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
@app.get("/api/health")
|
| 109 |
async def health():
|
| 110 |
return {"status": "ok"}
|
|
|
|
| 1 |
"""FastAPI backend for the piano tutorial transcription pipeline."""
|
| 2 |
|
| 3 |
import json
|
| 4 |
+
import shutil
|
| 5 |
import sys
|
| 6 |
import tempfile
|
| 7 |
+
import threading
|
| 8 |
+
import traceback
|
| 9 |
import uuid
|
| 10 |
from pathlib import Path
|
| 11 |
|
| 12 |
+
import pretty_midi
|
| 13 |
from fastapi import FastAPI, UploadFile, File, HTTPException
|
| 14 |
from fastapi.responses import FileResponse, JSONResponse
|
| 15 |
from fastapi.staticfiles import StaticFiles
|
|
|
|
| 109 |
return JSONResponse(chord_data)
|
| 110 |
|
| 111 |
|
| 112 |
+
# ── Full-song mode (Demucs source separation) ──────────────────────────
|
| 113 |
+
|
| 114 |
+
# In-memory job status for async full-song transcription
|
| 115 |
+
job_status = {}
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def merge_stems(piano_midi_path, bass_midi_path, output_path):
|
| 119 |
+
"""Merge piano and bass MIDI into a single multi-track file."""
|
| 120 |
+
piano = pretty_midi.PrettyMIDI(str(piano_midi_path))
|
| 121 |
+
bass = pretty_midi.PrettyMIDI(str(bass_midi_path))
|
| 122 |
+
|
| 123 |
+
merged = pretty_midi.PrettyMIDI()
|
| 124 |
+
|
| 125 |
+
# Track 0: Piano (program 0)
|
| 126 |
+
piano_inst = pretty_midi.Instrument(program=0, name="Piano")
|
| 127 |
+
for inst in piano.instruments:
|
| 128 |
+
piano_inst.notes.extend(inst.notes)
|
| 129 |
+
merged.instruments.append(piano_inst)
|
| 130 |
+
|
| 131 |
+
# Track 1: Bass (program 33)
|
| 132 |
+
bass_inst = pretty_midi.Instrument(program=33, name="Bass")
|
| 133 |
+
for inst in bass.instruments:
|
| 134 |
+
bass_inst.notes.extend(inst.notes)
|
| 135 |
+
merged.instruments.append(bass_inst)
|
| 136 |
+
|
| 137 |
+
merged.write(str(output_path))
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def run_full_transcription(job_id, audio_path, job_dir):
|
| 141 |
+
"""Background worker for full-song transcription with Demucs."""
|
| 142 |
+
try:
|
| 143 |
+
# Step 1: Demucs separation
|
| 144 |
+
job_status[job_id] = {"step": 1, "label": "Separating instruments with AI...", "done": False}
|
| 145 |
+
from separate import separate
|
| 146 |
+
stems = separate(str(audio_path), str(job_dir / "stems"))
|
| 147 |
+
|
| 148 |
+
# Step 2: Transcribe piano from "other" stem
|
| 149 |
+
job_status[job_id] = {"step": 2, "label": "Transcribing piano part...", "done": False}
|
| 150 |
+
from transcribe import transcribe as run_transcribe
|
| 151 |
+
piano_raw = job_dir / "piano_raw.mid"
|
| 152 |
+
run_transcribe(stems["other"], str(piano_raw))
|
| 153 |
+
|
| 154 |
+
# Step 3: Transcribe bass stem
|
| 155 |
+
job_status[job_id] = {"step": 3, "label": "Transcribing bass part...", "done": False}
|
| 156 |
+
bass_raw = job_dir / "bass_raw.mid"
|
| 157 |
+
run_transcribe(stems["bass"], str(bass_raw))
|
| 158 |
+
|
| 159 |
+
# Step 4: Optimize both
|
| 160 |
+
job_status[job_id] = {"step": 4, "label": "Optimizing note accuracy...", "done": False}
|
| 161 |
+
from optimize import optimize
|
| 162 |
+
from optimize_bass import optimize_bass
|
| 163 |
+
|
| 164 |
+
piano_opt = job_dir / "piano_optimized.mid"
|
| 165 |
+
optimize(stems["other"], str(piano_raw), str(piano_opt))
|
| 166 |
+
|
| 167 |
+
bass_opt = job_dir / "bass_optimized.mid"
|
| 168 |
+
optimize_bass(stems["bass"], str(bass_raw), str(bass_opt))
|
| 169 |
+
|
| 170 |
+
# Step 5: Merge into single multi-track MIDI
|
| 171 |
+
job_status[job_id] = {"step": 5, "label": "Assembling final result...", "done": False}
|
| 172 |
+
merged_path = job_dir / "transcription.mid"
|
| 173 |
+
merge_stems(str(piano_opt), str(bass_opt), str(merged_path))
|
| 174 |
+
|
| 175 |
+
# Run chord detection on piano part
|
| 176 |
+
from chords import detect_chords
|
| 177 |
+
chords_path = job_dir / "transcription_chords.json"
|
| 178 |
+
detect_chords(str(piano_opt), str(chords_path))
|
| 179 |
+
chord_data = None
|
| 180 |
+
if chords_path.exists():
|
| 181 |
+
with open(chords_path) as f:
|
| 182 |
+
chord_data = json.load(f)
|
| 183 |
+
|
| 184 |
+
# Clean up large stem files
|
| 185 |
+
stems_dir = job_dir / "stems"
|
| 186 |
+
if stems_dir.exists():
|
| 187 |
+
shutil.rmtree(stems_dir)
|
| 188 |
+
for f in [piano_raw, bass_raw, piano_opt, bass_opt]:
|
| 189 |
+
f.unlink(missing_ok=True)
|
| 190 |
+
|
| 191 |
+
job_status[job_id] = {
|
| 192 |
+
"step": 6, "label": "Done!", "done": True,
|
| 193 |
+
"result": {
|
| 194 |
+
"job_id": job_id,
|
| 195 |
+
"midi_url": f"/api/jobs/{job_id}/midi",
|
| 196 |
+
"chords_url": f"/api/jobs/{job_id}/chords",
|
| 197 |
+
"chords": chord_data,
|
| 198 |
+
},
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
except Exception as e:
|
| 202 |
+
traceback.print_exc()
|
| 203 |
+
job_status[job_id] = {
|
| 204 |
+
"step": -1, "label": str(e)[:200], "done": True, "error": str(e)[:200],
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
@app.post("/api/transcribe-full")
|
| 209 |
+
async def transcribe_full(file: UploadFile = File(...)):
|
| 210 |
+
"""Start full-song transcription with Demucs source separation.
|
| 211 |
+
|
| 212 |
+
Returns immediately with a job_id. Poll /api/jobs/{job_id}/status.
|
| 213 |
+
"""
|
| 214 |
+
job_id = str(uuid.uuid4())[:8]
|
| 215 |
+
job_dir = WORK_DIR / job_id
|
| 216 |
+
job_dir.mkdir(exist_ok=True)
|
| 217 |
+
|
| 218 |
+
suffix = Path(file.filename).suffix or ".m4a"
|
| 219 |
+
audio_path = job_dir / f"upload{suffix}"
|
| 220 |
+
content = await file.read()
|
| 221 |
+
audio_path.write_bytes(content)
|
| 222 |
+
|
| 223 |
+
job_status[job_id] = {"step": 0, "label": "Starting...", "done": False}
|
| 224 |
+
thread = threading.Thread(
|
| 225 |
+
target=run_full_transcription,
|
| 226 |
+
args=(job_id, audio_path, job_dir),
|
| 227 |
+
daemon=True,
|
| 228 |
+
)
|
| 229 |
+
thread.start()
|
| 230 |
+
|
| 231 |
+
return JSONResponse({"job_id": job_id})
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
@app.get("/api/jobs/{job_id}/status")
|
| 235 |
+
async def get_job_status(job_id: str):
|
| 236 |
+
"""Get the current status of a full-song transcription job."""
|
| 237 |
+
status = job_status.get(job_id)
|
| 238 |
+
if status is None:
|
| 239 |
+
raise HTTPException(404, f"No job found with id {job_id}")
|
| 240 |
+
return JSONResponse(status)
|
| 241 |
+
|
| 242 |
+
|
| 243 |
@app.get("/api/health")
|
| 244 |
async def health():
|
| 245 |
return {"status": "ok"}
|
app/src/App.jsx
CHANGED
|
@@ -15,6 +15,7 @@ const API_BASE = import.meta.env.DEV ? 'http://localhost:8000' : '';
|
|
| 15 |
function UploadScreen({ onFileSelected }) {
|
| 16 |
const [isDragging, setIsDragging] = useState(false);
|
| 17 |
const [errorMsg, setErrorMsg] = useState('');
|
|
|
|
| 18 |
const fileInputRef = useRef(null);
|
| 19 |
|
| 20 |
const handleFile = useCallback((file) => {
|
|
@@ -25,8 +26,8 @@ function UploadScreen({ onFileSelected }) {
|
|
| 25 |
return;
|
| 26 |
}
|
| 27 |
setErrorMsg('');
|
| 28 |
-
onFileSelected(file);
|
| 29 |
-
}, [onFileSelected]);
|
| 30 |
|
| 31 |
const handleDrop = useCallback((e) => {
|
| 32 |
e.preventDefault();
|
|
@@ -56,10 +57,25 @@ function UploadScreen({ onFileSelected }) {
|
|
| 56 |
<p className="upload-tagline">Your AI piano teacher</p>
|
| 57 |
</div>
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
<p className="upload-description">
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
</p>
|
| 64 |
|
| 65 |
<div
|
|
@@ -93,7 +109,7 @@ function UploadScreen({ onFileSelected }) {
|
|
| 93 |
);
|
| 94 |
}
|
| 95 |
|
| 96 |
-
function LoadingScreen({ status }) {
|
| 97 |
return (
|
| 98 |
<div className="upload-screen">
|
| 99 |
<div className="upload-processing">
|
|
@@ -101,7 +117,7 @@ function LoadingScreen({ status }) {
|
|
| 101 |
<OctopusLogo size={72} />
|
| 102 |
</div>
|
| 103 |
<h2>{status}</h2>
|
| 104 |
-
<p className="loading-sub">This usually takes 20-30 seconds</p>
|
| 105 |
<div className="loading-bar">
|
| 106 |
<div className="loading-bar-fill" />
|
| 107 |
</div>
|
|
@@ -115,6 +131,7 @@ export default function App() {
|
|
| 115 |
const [dimensions, setDimensions] = useState({ width: 800, height: 600 });
|
| 116 |
const [screen, setScreen] = useState('upload'); // 'upload' | 'loading' | 'player'
|
| 117 |
const [loadingStatus, setLoadingStatus] = useState('');
|
|
|
|
| 118 |
const [chords, setChords] = useState([]);
|
| 119 |
const [activeTab, setActiveTab] = useState('roll'); // 'roll' | 'sheet'
|
| 120 |
|
|
@@ -151,39 +168,89 @@ export default function App() {
|
|
| 151 |
seekTo(0);
|
| 152 |
}, [isPlaying, pause, seekTo]);
|
| 153 |
|
| 154 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
stopPlayback();
|
| 156 |
setScreen('loading');
|
| 157 |
-
setLoadingStatus('Transcribing your song...');
|
| 158 |
-
try {
|
| 159 |
-
const form = new FormData();
|
| 160 |
-
form.append('file', file);
|
| 161 |
-
const res = await fetch(`${API_BASE}/api/transcribe`, {
|
| 162 |
-
method: 'POST',
|
| 163 |
-
body: form,
|
| 164 |
-
});
|
| 165 |
-
if (!res.ok) {
|
| 166 |
-
const err = await res.json().catch(() => ({ detail: res.statusText }));
|
| 167 |
-
throw new Error(err.detail || 'Transcription failed');
|
| 168 |
-
}
|
| 169 |
-
const data = await res.json();
|
| 170 |
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
}
|
| 180 |
-
// Screen transition to 'player' happens via the useEffect above
|
| 181 |
-
// once both samplesLoaded and notes.length > 0
|
| 182 |
-
} catch (e) {
|
| 183 |
-
setScreen('upload');
|
| 184 |
-
alert(e.message || 'Something went wrong. Please try again.');
|
| 185 |
}
|
| 186 |
-
}, [
|
| 187 |
|
| 188 |
const handleNewSong = useCallback(() => {
|
| 189 |
stopPlayback();
|
|
@@ -226,7 +293,7 @@ export default function App() {
|
|
| 226 |
}
|
| 227 |
|
| 228 |
if (screen === 'loading') {
|
| 229 |
-
return <LoadingScreen status={loadingStatus} />;
|
| 230 |
}
|
| 231 |
|
| 232 |
return (
|
|
|
|
| 15 |
function UploadScreen({ onFileSelected }) {
|
| 16 |
const [isDragging, setIsDragging] = useState(false);
|
| 17 |
const [errorMsg, setErrorMsg] = useState('');
|
| 18 |
+
const [mode, setMode] = useState('solo'); // 'solo' | 'full'
|
| 19 |
const fileInputRef = useRef(null);
|
| 20 |
|
| 21 |
const handleFile = useCallback((file) => {
|
|
|
|
| 26 |
return;
|
| 27 |
}
|
| 28 |
setErrorMsg('');
|
| 29 |
+
onFileSelected(file, mode);
|
| 30 |
+
}, [onFileSelected, mode]);
|
| 31 |
|
| 32 |
const handleDrop = useCallback((e) => {
|
| 33 |
e.preventDefault();
|
|
|
|
| 57 |
<p className="upload-tagline">Your AI piano teacher</p>
|
| 58 |
</div>
|
| 59 |
|
| 60 |
+
<div className="upload-mode-tabs">
|
| 61 |
+
<button
|
| 62 |
+
className={`upload-mode-tab ${mode === 'solo' ? 'active' : ''}`}
|
| 63 |
+
onClick={() => setMode('solo')}
|
| 64 |
+
>
|
| 65 |
+
Solo Piano
|
| 66 |
+
</button>
|
| 67 |
+
<button
|
| 68 |
+
className={`upload-mode-tab ${mode === 'full' ? 'active' : ''}`}
|
| 69 |
+
onClick={() => setMode('full')}
|
| 70 |
+
>
|
| 71 |
+
Full Song
|
| 72 |
+
</button>
|
| 73 |
+
</div>
|
| 74 |
+
|
| 75 |
<p className="upload-description">
|
| 76 |
+
{mode === 'solo'
|
| 77 |
+
? 'Drop a song and Mr. Octopus will transcribe it into a piano tutorial you can follow along with, note by note. Works best with clearly recorded solo piano pieces.'
|
| 78 |
+
: 'Drop any song and Mr. Octopus will separate the instruments using AI, then transcribe the piano and bass parts. Works with full band recordings, even AI-generated music.'}
|
| 79 |
</p>
|
| 80 |
|
| 81 |
<div
|
|
|
|
| 109 |
);
|
| 110 |
}
|
| 111 |
|
| 112 |
+
function LoadingScreen({ status, estimate }) {
|
| 113 |
return (
|
| 114 |
<div className="upload-screen">
|
| 115 |
<div className="upload-processing">
|
|
|
|
| 117 |
<OctopusLogo size={72} />
|
| 118 |
</div>
|
| 119 |
<h2>{status}</h2>
|
| 120 |
+
<p className="loading-sub">{estimate || 'This usually takes 20-30 seconds'}</p>
|
| 121 |
<div className="loading-bar">
|
| 122 |
<div className="loading-bar-fill" />
|
| 123 |
</div>
|
|
|
|
| 131 |
const [dimensions, setDimensions] = useState({ width: 800, height: 600 });
|
| 132 |
const [screen, setScreen] = useState('upload'); // 'upload' | 'loading' | 'player'
|
| 133 |
const [loadingStatus, setLoadingStatus] = useState('');
|
| 134 |
+
const [loadingEstimate, setLoadingEstimate] = useState('');
|
| 135 |
const [chords, setChords] = useState([]);
|
| 136 |
const [activeTab, setActiveTab] = useState('roll'); // 'roll' | 'sheet'
|
| 137 |
|
|
|
|
| 168 |
seekTo(0);
|
| 169 |
}, [isPlaying, pause, seekTo]);
|
| 170 |
|
| 171 |
+
const loadResult = useCallback(async (data, fileName) => {
|
| 172 |
+
setLoadingStatus('Loading piano sounds...');
|
| 173 |
+
const midiRes = await fetch(`${API_BASE}${data.midi_url}`);
|
| 174 |
+
const blob = await midiRes.blob();
|
| 175 |
+
loadFromBlob(blob, fileName.replace(/\.[^.]+$/, '.mid'));
|
| 176 |
+
|
| 177 |
+
if (data.chords) {
|
| 178 |
+
const chordList = data.chords?.chords || data.chords || [];
|
| 179 |
+
setChords(Array.isArray(chordList) ? chordList : []);
|
| 180 |
+
}
|
| 181 |
+
}, [loadFromBlob]);
|
| 182 |
+
|
| 183 |
+
const handleFileSelected = useCallback(async (file, mode = 'solo') => {
|
| 184 |
stopPlayback();
|
| 185 |
setScreen('loading');
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
+
if (mode === 'full') {
|
| 188 |
+
// Full song: async with polling
|
| 189 |
+
setLoadingStatus('Uploading...');
|
| 190 |
+
setLoadingEstimate('This usually takes 5-8 minutes');
|
| 191 |
+
try {
|
| 192 |
+
const form = new FormData();
|
| 193 |
+
form.append('file', file);
|
| 194 |
+
const res = await fetch(`${API_BASE}/api/transcribe-full`, {
|
| 195 |
+
method: 'POST',
|
| 196 |
+
body: form,
|
| 197 |
+
});
|
| 198 |
+
if (!res.ok) {
|
| 199 |
+
const err = await res.json().catch(() => ({ detail: res.statusText }));
|
| 200 |
+
throw new Error(err.detail || 'Failed to start transcription');
|
| 201 |
+
}
|
| 202 |
+
const { job_id } = await res.json();
|
| 203 |
+
|
| 204 |
+
// Poll for status
|
| 205 |
+
const poll = async () => {
|
| 206 |
+
try {
|
| 207 |
+
const statusRes = await fetch(`${API_BASE}/api/jobs/${job_id}/status`);
|
| 208 |
+
const status = await statusRes.json();
|
| 209 |
+
|
| 210 |
+
if (status.error) {
|
| 211 |
+
throw new Error(status.error);
|
| 212 |
+
}
|
| 213 |
|
| 214 |
+
setLoadingStatus(status.label);
|
| 215 |
+
|
| 216 |
+
if (status.done && status.result) {
|
| 217 |
+
await loadResult(status.result, file.name);
|
| 218 |
+
} else {
|
| 219 |
+
setTimeout(poll, 2000);
|
| 220 |
+
}
|
| 221 |
+
} catch (e) {
|
| 222 |
+
setScreen('upload');
|
| 223 |
+
alert(e.message || 'Something went wrong. Please try again.');
|
| 224 |
+
}
|
| 225 |
+
};
|
| 226 |
+
poll();
|
| 227 |
+
} catch (e) {
|
| 228 |
+
setScreen('upload');
|
| 229 |
+
alert(e.message || 'Something went wrong. Please try again.');
|
| 230 |
+
}
|
| 231 |
+
} else {
|
| 232 |
+
// Solo piano: existing synchronous flow
|
| 233 |
+
setLoadingStatus('Transcribing your song...');
|
| 234 |
+
setLoadingEstimate('This usually takes 20-30 seconds');
|
| 235 |
+
try {
|
| 236 |
+
const form = new FormData();
|
| 237 |
+
form.append('file', file);
|
| 238 |
+
const res = await fetch(`${API_BASE}/api/transcribe`, {
|
| 239 |
+
method: 'POST',
|
| 240 |
+
body: form,
|
| 241 |
+
});
|
| 242 |
+
if (!res.ok) {
|
| 243 |
+
const err = await res.json().catch(() => ({ detail: res.statusText }));
|
| 244 |
+
throw new Error(err.detail || 'Transcription failed');
|
| 245 |
+
}
|
| 246 |
+
const data = await res.json();
|
| 247 |
+
await loadResult(data, file.name);
|
| 248 |
+
} catch (e) {
|
| 249 |
+
setScreen('upload');
|
| 250 |
+
alert(e.message || 'Something went wrong. Please try again.');
|
| 251 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
}
|
| 253 |
+
}, [loadResult, stopPlayback]);
|
| 254 |
|
| 255 |
const handleNewSong = useCallback(() => {
|
| 256 |
stopPlayback();
|
|
|
|
| 293 |
}
|
| 294 |
|
| 295 |
if (screen === 'loading') {
|
| 296 |
+
return <LoadingScreen status={loadingStatus} estimate={loadingEstimate} />;
|
| 297 |
}
|
| 298 |
|
| 299 |
return (
|
app/src/components/PianoRoll.jsx
CHANGED
|
@@ -55,18 +55,18 @@ function drawFallingNotes(ctx, notes, currentTime, hitLineY, positionMap) {
|
|
| 55 |
const w = pos.width - padding * 2;
|
| 56 |
|
| 57 |
// Glow
|
| 58 |
-
ctx.shadowColor = noteGlowColor(note.midi);
|
| 59 |
ctx.shadowBlur = 12;
|
| 60 |
|
| 61 |
// Note body
|
| 62 |
-
ctx.fillStyle = noteColor(note.midi);
|
| 63 |
drawRoundedRect(ctx, x, clippedTop, w, height, 4);
|
| 64 |
ctx.fill();
|
| 65 |
|
| 66 |
// Brighter edge at the bottom (hitting edge)
|
| 67 |
if (noteBottom <= hitLineY && noteBottom >= hitLineY - 3) {
|
| 68 |
ctx.shadowBlur = 20;
|
| 69 |
-
ctx.fillStyle = noteGlowColor(note.midi);
|
| 70 |
ctx.fillRect(x, hitLineY - 3, w, 3);
|
| 71 |
}
|
| 72 |
}
|
|
|
|
| 55 |
const w = pos.width - padding * 2;
|
| 56 |
|
| 57 |
// Glow
|
| 58 |
+
ctx.shadowColor = noteGlowColor(note.midi, note.instrument);
|
| 59 |
ctx.shadowBlur = 12;
|
| 60 |
|
| 61 |
// Note body
|
| 62 |
+
ctx.fillStyle = noteColor(note.midi, note.instrument);
|
| 63 |
drawRoundedRect(ctx, x, clippedTop, w, height, 4);
|
| 64 |
ctx.fill();
|
| 65 |
|
| 66 |
// Brighter edge at the bottom (hitting edge)
|
| 67 |
if (noteBottom <= hitLineY && noteBottom >= hitLineY - 3) {
|
| 68 |
ctx.shadowBlur = 20;
|
| 69 |
+
ctx.fillStyle = noteGlowColor(note.midi, note.instrument);
|
| 70 |
ctx.fillRect(x, hitLineY - 3, w, 3);
|
| 71 |
}
|
| 72 |
}
|
app/src/index.css
CHANGED
|
@@ -92,6 +92,40 @@ body {
|
|
| 92 |
margin-top: 8px;
|
| 93 |
}
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
.upload-description {
|
| 96 |
color: var(--text-muted);
|
| 97 |
font-size: 14px;
|
|
|
|
| 92 |
margin-top: 8px;
|
| 93 |
}
|
| 94 |
|
| 95 |
+
.upload-mode-tabs {
|
| 96 |
+
display: flex;
|
| 97 |
+
gap: 4px;
|
| 98 |
+
margin-bottom: 20px;
|
| 99 |
+
background: var(--surface-2);
|
| 100 |
+
border-radius: var(--radius);
|
| 101 |
+
padding: 4px;
|
| 102 |
+
border: 1px solid var(--border);
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
.upload-mode-tab {
|
| 106 |
+
flex: 1;
|
| 107 |
+
padding: 10px 20px;
|
| 108 |
+
border: none;
|
| 109 |
+
border-radius: 7px;
|
| 110 |
+
background: transparent;
|
| 111 |
+
color: var(--text-muted);
|
| 112 |
+
font-size: 14px;
|
| 113 |
+
font-weight: 600;
|
| 114 |
+
font-family: inherit;
|
| 115 |
+
cursor: pointer;
|
| 116 |
+
transition: all 0.15s;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
.upload-mode-tab:hover {
|
| 120 |
+
color: var(--text);
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
.upload-mode-tab.active {
|
| 124 |
+
background: var(--primary);
|
| 125 |
+
color: white;
|
| 126 |
+
box-shadow: 0 2px 8px var(--primary-glow);
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
.upload-description {
|
| 130 |
color: var(--text-muted);
|
| 131 |
font-size: 14px;
|
app/src/utils/colorScheme.js
CHANGED
|
@@ -10,6 +10,10 @@ export const COLORS = {
|
|
| 10 |
rightHand: '#06b6d4',
|
| 11 |
rightHandGlow: '#22d3ee',
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
// Piano key colors
|
| 14 |
whiteKey: '#e8e8e8',
|
| 15 |
whiteKeyActive: '#c4b5fd',
|
|
@@ -26,10 +30,12 @@ export const COLORS = {
|
|
| 26 |
|
| 27 |
export const MIDI_SPLIT_POINT = 60; // Middle C (C4)
|
| 28 |
|
| 29 |
-
export function noteColor(midiNumber) {
|
|
|
|
| 30 |
return midiNumber < MIDI_SPLIT_POINT ? COLORS.leftHand : COLORS.rightHand;
|
| 31 |
}
|
| 32 |
|
| 33 |
-
export function noteGlowColor(midiNumber) {
|
|
|
|
| 34 |
return midiNumber < MIDI_SPLIT_POINT ? COLORS.leftHandGlow : COLORS.rightHandGlow;
|
| 35 |
}
|
|
|
|
| 10 |
rightHand: '#06b6d4',
|
| 11 |
rightHandGlow: '#22d3ee',
|
| 12 |
|
| 13 |
+
// Instrument colors
|
| 14 |
+
bass: '#f59e0b',
|
| 15 |
+
bassGlow: '#fbbf24',
|
| 16 |
+
|
| 17 |
// Piano key colors
|
| 18 |
whiteKey: '#e8e8e8',
|
| 19 |
whiteKeyActive: '#c4b5fd',
|
|
|
|
| 30 |
|
| 31 |
export const MIDI_SPLIT_POINT = 60; // Middle C (C4)
|
| 32 |
|
| 33 |
+
export function noteColor(midiNumber, instrument = 'piano') {
|
| 34 |
+
if (instrument === 'bass') return COLORS.bass;
|
| 35 |
return midiNumber < MIDI_SPLIT_POINT ? COLORS.leftHand : COLORS.rightHand;
|
| 36 |
}
|
| 37 |
|
| 38 |
+
export function noteGlowColor(midiNumber, instrument = 'piano') {
|
| 39 |
+
if (instrument === 'bass') return COLORS.bassGlow;
|
| 40 |
return midiNumber < MIDI_SPLIT_POINT ? COLORS.leftHandGlow : COLORS.rightHandGlow;
|
| 41 |
}
|
app/src/utils/midiHelpers.js
CHANGED
|
@@ -105,6 +105,9 @@ export function parseMidiFile(midiObject) {
|
|
| 105 |
const notes = [];
|
| 106 |
|
| 107 |
midiObject.tracks.forEach((track) => {
|
|
|
|
|
|
|
|
|
|
| 108 |
track.notes.forEach((note) => {
|
| 109 |
notes.push({
|
| 110 |
midi: note.midi,
|
|
@@ -113,6 +116,7 @@ export function parseMidiFile(midiObject) {
|
|
| 113 |
duration: note.duration,
|
| 114 |
velocity: note.velocity,
|
| 115 |
hand: note.midi < MIDI_SPLIT_POINT ? 'left' : 'right',
|
|
|
|
| 116 |
});
|
| 117 |
});
|
| 118 |
});
|
|
|
|
| 105 |
const notes = [];
|
| 106 |
|
| 107 |
midiObject.tracks.forEach((track) => {
|
| 108 |
+
const program = track.instrument?.number ?? 0;
|
| 109 |
+
const instrument = (program >= 32 && program <= 39) ? 'bass' : 'piano';
|
| 110 |
+
|
| 111 |
track.notes.forEach((note) => {
|
| 112 |
notes.push({
|
| 113 |
midi: note.midi,
|
|
|
|
| 116 |
duration: note.duration,
|
| 117 |
velocity: note.velocity,
|
| 118 |
hand: note.midi < MIDI_SPLIT_POINT ? 'left' : 'right',
|
| 119 |
+
instrument,
|
| 120 |
});
|
| 121 |
});
|
| 122 |
});
|
transcriber/optimize_bass.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Simplified optimization pipeline for bass transcriptions.
|
| 2 |
+
|
| 3 |
+
Cherry-picks relevant steps from optimize.py. Bass is mostly monophonic,
|
| 4 |
+
so we skip chord alignment, harmonic ghost removal, CQT extension,
|
| 5 |
+
playability limits, and chord detection.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import copy
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
import pretty_midi
|
| 13 |
+
import librosa
|
| 14 |
+
|
| 15 |
+
from optimize import (
|
| 16 |
+
remove_leading_silence_notes,
|
| 17 |
+
remove_trailing_silence_notes,
|
| 18 |
+
remove_low_energy_notes,
|
| 19 |
+
quantize_to_beat_grid,
|
| 20 |
+
correct_onsets,
|
| 21 |
+
apply_global_offset,
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def optimize_bass(original_audio_path, midi_path, output_path=None):
|
| 26 |
+
"""Optimization pipeline tailored for bass transcriptions.
|
| 27 |
+
|
| 28 |
+
Args:
|
| 29 |
+
original_audio_path: Path to bass stem audio (WAV from Demucs)
|
| 30 |
+
midi_path: Path to raw MIDI from Basic Pitch
|
| 31 |
+
output_path: Output path (defaults to overwrite midi_path)
|
| 32 |
+
|
| 33 |
+
Returns:
|
| 34 |
+
pretty_midi.PrettyMIDI: Optimized bass MIDI
|
| 35 |
+
"""
|
| 36 |
+
if output_path is None:
|
| 37 |
+
output_path = midi_path
|
| 38 |
+
output_path = str(output_path)
|
| 39 |
+
|
| 40 |
+
sr = 22050
|
| 41 |
+
hop_length = 512
|
| 42 |
+
|
| 43 |
+
y, _ = librosa.load(str(original_audio_path), sr=sr, mono=True)
|
| 44 |
+
|
| 45 |
+
onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
|
| 46 |
+
ref_onset_frames = librosa.onset.onset_detect(
|
| 47 |
+
onset_envelope=onset_env, sr=sr, hop_length=hop_length,
|
| 48 |
+
backtrack=False, delta=0.04
|
| 49 |
+
)
|
| 50 |
+
ref_onsets = librosa.frames_to_time(ref_onset_frames, sr=sr, hop_length=hop_length)
|
| 51 |
+
|
| 52 |
+
midi_data = pretty_midi.PrettyMIDI(str(midi_path))
|
| 53 |
+
|
| 54 |
+
# Step 0: Remove silence artifacts
|
| 55 |
+
midi_data, _, music_start = remove_leading_silence_notes(midi_data, y, sr)
|
| 56 |
+
midi_data, _, _ = remove_trailing_silence_notes(midi_data, y, sr)
|
| 57 |
+
|
| 58 |
+
# Step 1: Remove low-energy hallucinations
|
| 59 |
+
midi_data, _ = remove_low_energy_notes(midi_data, y, sr, hop_length)
|
| 60 |
+
|
| 61 |
+
# Step 2: Remove notes above bass range (> C4 = MIDI 60)
|
| 62 |
+
for inst in midi_data.instruments:
|
| 63 |
+
before = len(inst.notes)
|
| 64 |
+
inst.notes = [n for n in inst.notes if n.pitch <= 64]
|
| 65 |
+
removed = before - len(inst.notes)
|
| 66 |
+
if removed:
|
| 67 |
+
print(f" Bass: removed {removed} notes above bass range")
|
| 68 |
+
|
| 69 |
+
# Step 3: Quantize to beat grid
|
| 70 |
+
midi_data, _, tempo = quantize_to_beat_grid(midi_data, y, sr, hop_length, strength=1.0)
|
| 71 |
+
|
| 72 |
+
# Step 4-6: Onset correction (3 passes)
|
| 73 |
+
midi_data, _, _, _, _, _ = correct_onsets(midi_data, ref_onsets)
|
| 74 |
+
midi_data, _, _, _, _, _ = correct_onsets(midi_data, ref_onsets, min_off=0.01, max_off=0.06)
|
| 75 |
+
midi_data, _, _, _, _, _ = correct_onsets(midi_data, ref_onsets, min_off=0.005, max_off=0.025)
|
| 76 |
+
|
| 77 |
+
# Step 7: Global offset
|
| 78 |
+
midi_data, _ = apply_global_offset(midi_data, ref_onsets)
|
| 79 |
+
|
| 80 |
+
# Step 8: Fix overlaps and enforce minimum duration
|
| 81 |
+
for inst in midi_data.instruments:
|
| 82 |
+
notes = sorted(inst.notes, key=lambda n: (n.start, n.pitch))
|
| 83 |
+
for i, note in enumerate(notes):
|
| 84 |
+
# Enforce minimum duration
|
| 85 |
+
if note.end - note.start < 0.10:
|
| 86 |
+
note.end = note.start + 0.10
|
| 87 |
+
# Trim overlap with next same-pitch note
|
| 88 |
+
for j in range(i + 1, len(notes)):
|
| 89 |
+
if notes[j].pitch == note.pitch and notes[j].start < note.end:
|
| 90 |
+
note.end = max(note.start + 0.01, notes[j].start - 0.005)
|
| 91 |
+
break
|
| 92 |
+
inst.notes = [n for n in notes if n.end - n.start > 0.01]
|
| 93 |
+
|
| 94 |
+
# Shift to t=0 if there was leading silence
|
| 95 |
+
if music_start > 0.1:
|
| 96 |
+
for inst in midi_data.instruments:
|
| 97 |
+
for note in inst.notes:
|
| 98 |
+
note.start = max(0, note.start - music_start)
|
| 99 |
+
note.end = max(note.start + 0.01, note.end - music_start)
|
| 100 |
+
|
| 101 |
+
# Set instrument to Electric Bass
|
| 102 |
+
for inst in midi_data.instruments:
|
| 103 |
+
inst.program = 33 # Electric Bass (finger)
|
| 104 |
+
inst.name = "Bass"
|
| 105 |
+
|
| 106 |
+
total = sum(len(inst.notes) for inst in midi_data.instruments)
|
| 107 |
+
print(f" Bass optimization complete: {total} notes")
|
| 108 |
+
|
| 109 |
+
midi_data.write(output_path)
|
| 110 |
+
return midi_data
|
transcriber/separate.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Demucs source separation for full-song transcription."""
|
| 2 |
+
|
| 3 |
+
import subprocess
|
| 4 |
+
import sys
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def separate(input_path: str, output_dir: str, model: str = "htdemucs") -> dict:
|
| 9 |
+
"""Run Demucs source separation on an audio file.
|
| 10 |
+
|
| 11 |
+
Args:
|
| 12 |
+
input_path: Path to input audio file
|
| 13 |
+
output_dir: Directory to write separated stems
|
| 14 |
+
model: Demucs model name (default: htdemucs)
|
| 15 |
+
|
| 16 |
+
Returns:
|
| 17 |
+
dict with stem paths: {
|
| 18 |
+
'vocals': str,
|
| 19 |
+
'drums': str,
|
| 20 |
+
'bass': str,
|
| 21 |
+
'other': str, # contains piano, guitar, synths, etc.
|
| 22 |
+
}
|
| 23 |
+
"""
|
| 24 |
+
input_path = Path(input_path)
|
| 25 |
+
output_dir = Path(output_dir)
|
| 26 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 27 |
+
|
| 28 |
+
python = sys.executable
|
| 29 |
+
|
| 30 |
+
cmd = [
|
| 31 |
+
python, "-m", "demucs",
|
| 32 |
+
"--name", model,
|
| 33 |
+
"--out", str(output_dir),
|
| 34 |
+
str(input_path),
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
|
| 38 |
+
if result.returncode != 0:
|
| 39 |
+
raise RuntimeError(f"Demucs failed: {result.stderr[:500]}")
|
| 40 |
+
|
| 41 |
+
# Demucs outputs to: output_dir/{model}/{filename_without_ext}/
|
| 42 |
+
stems_dir = output_dir / model / input_path.stem
|
| 43 |
+
|
| 44 |
+
if not stems_dir.exists():
|
| 45 |
+
raise FileNotFoundError(f"Demucs output not found at {stems_dir}")
|
| 46 |
+
|
| 47 |
+
stems = {}
|
| 48 |
+
for name in ("vocals", "drums", "bass", "other"):
|
| 49 |
+
path = stems_dir / f"{name}.wav"
|
| 50 |
+
if not path.exists():
|
| 51 |
+
raise FileNotFoundError(f"Missing stem: {path}")
|
| 52 |
+
stems[name] = str(path)
|
| 53 |
+
|
| 54 |
+
return stems
|