Spaces:

allyboyboy
/

mroctopus

Sleeping

App Files Files Community

Ewan commited on Feb 20

Commit

f0a176a

1 Parent(s): 751132e

Initial commit - Mr Octopus piano tutorial app

Browse files

Files changed (33) hide show

.dockerignore +11 -0
.gitignore +10 -0
Dockerfile +39 -0
README.md +4 -6
api/requirements.txt +15 -0
api/server.py +130 -0
app/.gitignore +24 -0
app/README.md +16 -0
app/eslint.config.js +29 -0
app/index.html +17 -0
app/package-lock.json +0 -0
app/package.json +29 -0
app/public/midi/transcription.mid +0 -0
app/public/midi/transcription_chords.json +0 -0
app/public/midi/transcription_raw.mid +0 -0
app/public/midi/transcription_spectral.json +566 -0
app/src/App.jsx +256 -0
app/src/components/Controls.jsx +179 -0
app/src/components/OctopusLogo.jsx +54 -0
app/src/components/PianoRoll.jsx +335 -0
app/src/hooks/useMidi.js +83 -0
app/src/hooks/usePlayback.js +244 -0
app/src/index.css +552 -0
app/src/main.jsx +10 -0
app/src/utils/colorScheme.js +35 -0
app/src/utils/generateSampleMidi.js +89 -0
app/src/utils/midiHelpers.js +178 -0
app/vite.config.js +12 -0
fly.toml +16 -0
transcriber/chords.py +459 -0
transcriber/optimize.py +1356 -0
transcriber/spectral.py +296 -0
transcriber/transcribe.py +50 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,11 @@

+app/node_modules
+app/dist
+transcriber/venv
+transcriber/uploads
+transcriber/analyze_*.py
+transcriber/compare.py
+transcriber/diagnose_*.py
+transcriber/simulate_*.py
+__pycache__
+*.pyc
+.git

.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+app/node_modules
+app/dist
+transcriber/venv
+transcriber/uploads
+transcriber/analyze_*.py
+transcriber/compare.py
+transcriber/diagnose_*.py
+transcriber/simulate_*.py
+__pycache__
+*.pyc

Dockerfile ADDED Viewed

	@@ -0,0 +1,39 @@

+FROM node:20-slim AS frontend
+WORKDIR /build
+COPY app/package.json app/package-lock.json* ./
+RUN npm ci
+COPY app/ .
+RUN npm run build
+# --- Python backend ---
+FROM python:3.12-slim
+# System deps: ffmpeg for audio processing, yt-dlp needs it too
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ffmpeg \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Install Python dependencies
+# basic-pitch pulls in tensorflow on Linux, but we only use ONNX runtime.
+# Install it with --no-deps and manually specify what we need.
+COPY api/requirements.txt /app/api/requirements.txt
+RUN pip install --no-cache-dir \
+    fastapi uvicorn[standard] python-multipart \
+    onnxruntime pretty_midi librosa scipy numpy "setuptools<81" \
+    yt-dlp mir-eval resampy scikit-learn && \
+    pip install --no-cache-dir --no-deps basic-pitch
+# Copy application code
+COPY transcriber/ /app/transcriber/
+COPY api/ /app/api/
+# Copy built frontend
+COPY --from=frontend /build/dist /app/app/dist
+ENV PORT=7860
+EXPOSE 7860
+CMD ["uvicorn", "api.server:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,8 @@
 ---
-title: Mroctopus
-emoji: 📈
 colorFrom: purple
-colorTo: green
 sdk: docker
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Mr Octopus
+emoji: 🐙
 colorFrom: purple
+colorTo: blue
 sdk: docker
+app_port: 7860
 ---

api/requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+fastapi>=0.115
+uvicorn[standard]>=0.34
+python-multipart>=0.0.18
+# Transcription pipeline
+basic-pitch>=0.3
+onnxruntime>=1.17
+pretty_midi>=0.2.10
+librosa>=0.10
+scipy>=1.12
+numpy>=1.24
+setuptools<81
+# URL downloading
+yt-dlp>=2024.1

api/server.py ADDED Viewed

	@@ -0,0 +1,130 @@

+"""FastAPI backend for the piano tutorial transcription pipeline."""
+import json
+import sys
+import tempfile
+import uuid
+from pathlib import Path
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.responses import FileResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.middleware.cors import CORSMiddleware
+# Add transcriber to path
+TRANSCRIBER_DIR = Path(__file__).resolve().parent.parent / "transcriber"
+sys.path.insert(0, str(TRANSCRIBER_DIR))
+app = FastAPI(title="Piano Tutorial API")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Directory for temporary processing files
+WORK_DIR = Path(tempfile.gettempdir()) / "piano-tutorial"
+WORK_DIR.mkdir(exist_ok=True)
+@app.post("/api/transcribe")
+async def transcribe(
+    file: UploadFile = File(...),
+):
+    """Transcribe an uploaded audio file to MIDI.
+    Accepts a file upload (MP3, M4A, WAV, OGG, FLAC).
+    Returns JSON with a job_id, MIDI download URL, and chord data.
+    """
+    job_id = str(uuid.uuid4())[:8]
+    job_dir = WORK_DIR / job_id
+    job_dir.mkdir(exist_ok=True)
+    try:
+        suffix = Path(file.filename).suffix or ".m4a"
+        audio_path = job_dir / f"upload{suffix}"
+        content = await file.read()
+        audio_path.write_bytes(content)
+        # Run transcription
+        from transcribe import transcribe as run_transcribe
+        raw_midi_path = job_dir / "transcription_raw.mid"
+        run_transcribe(str(audio_path), str(raw_midi_path))
+        # Run optimization (also runs chord detection as Step 10)
+        from optimize import optimize
+        optimized_path = job_dir / "transcription.mid"
+        optimize(str(audio_path), str(raw_midi_path), str(optimized_path))
+        if not optimized_path.exists():
+            raise HTTPException(500, "Optimization failed to produce output")
+        # Load chord data if available
+        chords_path = job_dir / "transcription_chords.json"
+        chord_data = None
+        if chords_path.exists():
+            with open(chords_path) as f:
+                chord_data = json.load(f)
+        return JSONResponse({
+            "job_id": job_id,
+            "midi_url": f"/api/jobs/{job_id}/midi",
+            "chords_url": f"/api/jobs/{job_id}/chords",
+            "chords": chord_data,
+        })
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(500, f"Transcription failed: {str(e)}")
+@app.get("/api/jobs/{job_id}/midi")
+async def get_midi(job_id: str):
+    """Download the optimized MIDI file for a completed job."""
+    midi_path = WORK_DIR / job_id / "transcription.mid"
+    if not midi_path.exists():
+        raise HTTPException(404, f"No MIDI file found for job {job_id}")
+    return FileResponse(
+        midi_path,
+        media_type="audio/midi",
+        filename="transcription.mid",
+    )
+@app.get("/api/jobs/{job_id}/chords")
+async def get_chords(job_id: str):
+    """Get the detected chord data for a completed job."""
+    chords_path = WORK_DIR / job_id / "transcription_chords.json"
+    if not chords_path.exists():
+        raise HTTPException(404, f"No chord data found for job {job_id}")
+    with open(chords_path) as f:
+        chord_data = json.load(f)
+    return JSONResponse(chord_data)
+@app.get("/api/health")
+async def health():
+    return {"status": "ok"}
+# Serve the built React frontend (in production)
+DIST_DIR = Path(__file__).resolve().parent.parent / "app" / "dist"
+if DIST_DIR.exists():
+    # Serve static assets
+    app.mount("/assets", StaticFiles(directory=str(DIST_DIR / "assets")), name="assets")
+    # Serve MIDI files if they exist
+    midi_dir = DIST_DIR / "midi"
+    if midi_dir.exists():
+        app.mount("/midi", StaticFiles(directory=str(midi_dir)), name="midi")
+    # Catch-all: serve index.html for SPA routing
+    @app.get("/{path:path}")
+    async def serve_spa(path: str):
+        file_path = DIST_DIR / path
+        if file_path.is_file():
+            return FileResponse(file_path)
+        return FileResponse(DIST_DIR / "index.html")

app/.gitignore ADDED Viewed

	@@ -0,0 +1,24 @@

+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+pnpm-debug.log*
+lerna-debug.log*
+node_modules
+dist
+dist-ssr
+*.local
+# Editor directories and files
+.vscode/*
+!.vscode/extensions.json
+.idea
+.DS_Store
+*.suo
+*.ntvs*
+*.njsproj
+*.sln
+*.sw?

app/README.md ADDED Viewed

	@@ -0,0 +1,16 @@

+# React + Vite
+This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
+Currently, two official plugins are available:
+- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Babel](https://babeljs.io/) (or [oxc](https://oxc.rs) when used in [rolldown-vite](https://vite.dev/guide/rolldown)) for Fast Refresh
+- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/) for Fast Refresh
+## React Compiler
+The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation).
+## Expanding the ESLint configuration
+If you are developing a production application, we recommend using TypeScript with type-aware lint rules enabled. Check out the [TS template](https://github.com/vitejs/vite/tree/main/packages/create-vite/template-react-ts) for information on how to integrate TypeScript and [`typescript-eslint`](https://typescript-eslint.io) in your project.

app/eslint.config.js ADDED Viewed

	@@ -0,0 +1,29 @@

+import js from '@eslint/js'
+import globals from 'globals'
+import reactHooks from 'eslint-plugin-react-hooks'
+import reactRefresh from 'eslint-plugin-react-refresh'
+import { defineConfig, globalIgnores } from 'eslint/config'
+export default defineConfig([
+  globalIgnores(['dist']),
+  {
+    files: ['**/*.{js,jsx}'],
+    extends: [
+      js.configs.recommended,
+      reactHooks.configs.flat.recommended,
+      reactRefresh.configs.vite,
+    ],
+    languageOptions: {
+      ecmaVersion: 2020,
+      globals: globals.browser,
+      parserOptions: {
+        ecmaVersion: 'latest',
+        ecmaFeatures: { jsx: true },
+        sourceType: 'module',
+      },
+    },
+    rules: {
+      'no-unused-vars': ['error', { varsIgnorePattern: '^[A-Z_]' }],
+    },
+  },
+])

app/index.html ADDED Viewed

	@@ -0,0 +1,17 @@

+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="theme-color" content="#07070e" />
+    <link rel="icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 48 48'%3E%3Cellipse cx='24' cy='16' rx='14' ry='12' fill='%238b5cf6'/%3E%3Ccircle cx='19' cy='14' r='2.5' fill='%2307070e'/%3E%3Ccircle cx='29' cy='14' r='2.5' fill='%2307070e'/%3E%3Ccircle cx='20' cy='13.3' r='0.9' fill='white'/%3E%3Ccircle cx='30' cy='13.3' r='0.9' fill='white'/%3E%3Crect x='7' y='26' width='3' height='18' rx='1' fill='%23f0eef5'/%3E%3Crect x='12' y='26' width='3' height='18' rx='1' fill='%23f0eef5'/%3E%3Crect x='16.5' y='26' width='2.5' height='13' rx='0.8' fill='%231e1b4b'/%3E%3Crect x='21' y='26' width='3' height='18' rx='1' fill='%23f0eef5'/%3E%3Crect x='26' y='26' width='3' height='18' rx='1' fill='%23f0eef5'/%3E%3Crect x='30.5' y='26' width='2.5' height='13' rx='0.8' fill='%231e1b4b'/%3E%3Crect x='35' y='26' width='3' height='18' rx='1' fill='%23f0eef5'/%3E%3Crect x='40' y='26' width='3' height='18' rx='1' fill='%23f0eef5'/%3E%3C/svg%3E" />
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet" />
+    <title>Mr. Octopus</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
+</html>

app/package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

app/package.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "name": "app",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "lint": "eslint .",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "@tonejs/midi": "^2.0.28",
+    "react": "^19.2.0",
+    "react-dom": "^19.2.0",
+    "tone": "^15.1.22"
+  },
+  "devDependencies": {
+    "@eslint/js": "^9.39.1",
+    "@types/react": "^19.2.7",
+    "@types/react-dom": "^19.2.3",
+    "@vitejs/plugin-react": "^5.1.1",
+    "eslint": "^9.39.1",
+    "eslint-plugin-react-hooks": "^7.0.1",
+    "eslint-plugin-react-refresh": "^0.4.24",
+    "globals": "^16.5.0",
+    "vite": "^7.3.1"
+  }
+}

app/public/midi/transcription.mid ADDED Viewed

Binary file (8.75 kB). View file

app/public/midi/transcription_chords.json ADDED Viewed

The diff for this file is too large to render. See raw diff

app/public/midi/transcription_raw.mid ADDED Viewed

Binary file (13.3 kB). View file

app/public/midi/transcription_spectral.json ADDED Viewed

	@@ -0,0 +1,566 @@

+{
+  "spectral_f1": 0.4765,
+  "spectral_precision": 0.7711,
+  "spectral_recall": 0.3448,
+  "spectral_similarity": 0.4061,
+  "per_octave": [
+    {
+      "octave": 0,
+      "range": "A0-A1",
+      "audio_energy": 154196,
+      "missing_energy": 152357,
+      "extra_energy": 156,
+      "matched_energy": 1839,
+      "coverage": 0.0119
+    },
+    {
+      "octave": 1,
+      "range": "A1-A2",
+      "audio_energy": 194926,
+      "missing_energy": 179822,
+      "extra_energy": 310,
+      "matched_energy": 15104,
+      "coverage": 0.0775
+    },
+    {
+      "octave": 2,
+      "range": "A2-A3",
+      "audio_energy": 199170,
+      "missing_energy": 156470,
+      "extra_energy": 1031,
+      "matched_energy": 42700,
+      "coverage": 0.2144
+    },
+    {
+      "octave": 3,
+      "range": "A3-A4",
+      "audio_energy": 203356,
+      "missing_energy": 124913,
+      "extra_energy": 929,
+      "matched_energy": 78443,
+      "coverage": 0.3857
+    },
+    {
+      "octave": 4,
+      "range": "A4-A5",
+      "audio_energy": 184288,
+      "missing_energy": 77943,
+      "extra_energy": 11447,
+      "matched_energy": 106345,
+      "coverage": 0.5771
+    },
+    {
+      "octave": 5,
+      "range": "A5-A6",
+      "audio_energy": 178926,
+      "missing_energy": 57570,
+      "extra_energy": 15304,
+      "matched_energy": 121356,
+      "coverage": 0.6782
+    },
+    {
+      "octave": 6,
+      "range": "A6-A7",
+      "audio_energy": 61937,
+      "missing_energy": 21815,
+      "extra_energy": 73759,
+      "matched_energy": 40122,
+      "coverage": 0.6478
+    },
+    {
+      "octave": 7,
+      "range": "A7-A8",
+      "audio_energy": 6945,
+      "missing_energy": 4709,
+      "extra_energy": 18200,
+      "matched_energy": 2236,
+      "coverage": 0.322
+    }
+  ],
+  "per_time": [
+    {
+      "time_start": 0.0,
+      "time_end": 6.69,
+      "missing": 37299,
+      "extra": 8429,
+      "matched": 15904,
+      "fidelity": 0.299
+    },
+    {
+      "time_start": 6.69,
+      "time_end": 13.37,
+      "missing": 39869,
+      "extra": 6624,
+      "matched": 20740,
+      "fidelity": 0.342
+    },
+    {
+      "time_start": 13.37,
+      "time_end": 20.06,
+      "missing": 41062,
+      "extra": 6050,
+      "matched": 20521,
+      "fidelity": 0.333
+    },
+    {
+      "time_start": 20.06,
+      "time_end": 26.75,
+      "missing": 39620,
+      "extra": 7666,
+      "matched": 21162,
+      "fidelity": 0.348
+    },
+    {
+      "time_start": 26.75,
+      "time_end": 33.44,
+      "missing": 37182,
+      "extra": 6304,
+      "matched": 24549,
+      "fidelity": 0.398
+    },
+    {
+      "time_start": 33.44,
+      "time_end": 40.12,
+      "missing": 38849,
+      "extra": 5103,
+      "matched": 23978,
+      "fidelity": 0.382
+    },
+    {
+      "time_start": 40.12,
+      "time_end": 46.81,
+      "missing": 38366,
+      "extra": 5501,
+      "matched": 23406,
+      "fidelity": 0.379
+    },
+    {
+      "time_start": 46.81,
+      "time_end": 53.5,
+      "missing": 40410,
+      "extra": 4906,
+      "matched": 21436,
+      "fidelity": 0.347
+    },
+    {
+      "time_start": 53.5,
+      "time_end": 60.19,
+      "missing": 36247,
+      "extra": 6550,
+      "matched": 22495,
+      "fidelity": 0.383
+    },
+    {
+      "time_start": 60.19,
+      "time_end": 66.87,
+      "missing": 38617,
+      "extra": 6128,
+      "matched": 22503,
+      "fidelity": 0.368
+    },
+    {
+      "time_start": 66.87,
+      "time_end": 73.56,
+      "missing": 39224,
+      "extra": 5250,
+      "matched": 22932,
+      "fidelity": 0.369
+    },
+    {
+      "time_start": 73.56,
+      "time_end": 80.25,
+      "missing": 39544,
+      "extra": 5728,
+      "matched": 22623,
+      "fidelity": 0.364
+    },
+    {
+      "time_start": 80.25,
+      "time_end": 86.94,
+      "missing": 39767,
+      "extra": 5753,
+      "matched": 23063,
+      "fidelity": 0.367
+    },
+    {
+      "time_start": 86.94,
+      "time_end": 93.62,
+      "missing": 37375,
+      "extra": 6104,
+      "matched": 24441,
+      "fidelity": 0.395
+    },
+    {
+      "time_start": 93.62,
+      "time_end": 100.31,
+      "missing": 37995,
+      "extra": 6150,
+      "matched": 21667,
+      "fidelity": 0.363
+    },
+    {
+      "time_start": 100.31,
+      "time_end": 107.0,
+      "missing": 39151,
+      "extra": 5272,
+      "matched": 19814,
+      "fidelity": 0.336
+    },
+    {
+      "time_start": 107.0,
+      "time_end": 113.68,
+      "missing": 35573,
+      "extra": 6465,
+      "matched": 20390,
+      "fidelity": 0.364
+    },
+    {
+      "time_start": 113.68,
+      "time_end": 120.37,
+      "missing": 35737,
+      "extra": 10055,
+      "matched": 14143,
+      "fidelity": 0.284
+    },
+    {
+      "time_start": 120.37,
+      "time_end": 127.06,
+      "missing": 36421,
+      "extra": 6311,
+      "matched": 19330,
+      "fidelity": 0.347
+    },
+    {
+      "time_start": 127.06,
+      "time_end": 133.75,
+      "missing": 47035,
+      "extra": 787,
+      "matched": 3048,
+      "fidelity": 0.061
+    }
+  ],
+  "missing_notes": [
+    {
+      "pitch": 35,
+      "note": "B1",
+      "time_start": 96.595,
+      "time_end": 100.566,
+      "duration": 3.971,
+      "energy": 0.512
+    },
+    {
+      "pitch": 35,
+      "note": "B1",
+      "time_start": 18.112,
+      "time_end": 21.246,
+      "duration": 3.135,
+      "energy": 0.611
+    },
+    {
+      "pitch": 62,
+      "note": "D4",
+      "time_start": 4.853,
+      "time_end": 7.732,
+      "duration": 2.879,
+      "energy": 0.665
+    },
+    {
+      "pitch": 64,
+      "note": "E4",
+      "time_start": 34.435,
+      "time_end": 36.827,
+      "duration": 2.392,
+      "energy": 0.71
+    },
+    {
+      "pitch": 43,
+      "note": "G2",
+      "time_start": 75.024,
+      "time_end": 77.996,
+      "duration": 2.972,
+      "energy": 0.571
+    },
+    {
+      "pitch": 65,
+      "note": "F4",
+      "time_start": 46.788,
+      "time_end": 49.041,
+      "duration": 2.252,
+      "energy": 0.73
+    },
+    {
+      "pitch": 65,
+      "note": "F4",
+      "time_start": 100.287,
+      "time_end": 102.655,
+      "duration": 2.368,
+      "energy": 0.694
+    },
+    {
+      "pitch": 55,
+      "note": "G3",
+      "time_start": 77.857,
+      "time_end": 80.573,
+      "duration": 2.717,
+      "energy": 0.578
+    },
+    {
+      "pitch": 59,
+      "note": "B3",
+      "time_start": 45.79,
+      "time_end": 48.042,
+      "duration": 2.252,
+      "energy": 0.656
+    },
+    {
+      "pitch": 50,
+      "note": "D3",
+      "time_start": 51.107,
+      "time_end": 53.359,
+      "duration": 2.252,
+      "energy": 0.649
+    },
+    {
+      "pitch": 45,
+      "note": "A2",
+      "time_start": 112.315,
+      "time_end": 114.962,
+      "duration": 2.647,
+      "energy": 0.552
+    },
+    {
+      "pitch": 45,
+      "note": "A2",
+      "time_start": 49.087,
+      "time_end": 51.432,
+      "duration": 2.345,
+      "energy": 0.617
+    },
+    {
+      "pitch": 68,
+      "note": "G#4",
+      "time_start": 100.566,
+      "time_end": 102.725,
+      "duration": 2.159,
+      "energy": 0.666
+    },
+    {
+      "pitch": 60,
+      "note": "C4",
+      "time_start": 129.939,
+      "time_end": 132.423,
+      "duration": 2.485,
+      "energy": 0.565
+    },
+    {
+      "pitch": 59,
+      "note": "B3",
+      "time_start": 117.656,
+      "time_end": 120.303,
+      "duration": 2.647,
+      "energy": 0.521
+    },
+    {
+      "pitch": 62,
+      "note": "D4",
+      "time_start": 100.008,
+      "time_end": 102.028,
+      "duration": 2.02,
+      "energy": 0.679
+    },
+    {
+      "pitch": 62,
+      "note": "D4",
+      "time_start": 46.208,
+      "time_end": 48.089,
+      "duration": 1.881,
+      "energy": 0.725
+    },
+    {
+      "pitch": 67,
+      "note": "G4",
+      "time_start": 128.499,
+      "time_end": 130.868,
+      "duration": 2.368,
+      "energy": 0.57
+    },
+    {
+      "pitch": 50,
+      "note": "D3",
+      "time_start": 4.714,
+      "time_end": 7.221,
+      "duration": 2.508,
+      "energy": 0.535
+    },
+    {
+      "pitch": 55,
+      "note": "G3",
+      "time_start": 125.922,
+      "time_end": 128.058,
+      "duration": 2.136,
+      "energy": 0.597
+    }
+  ],
+  "extra_notes": [
+    {
+      "pitch": 97,
+      "note": "C#7",
+      "time_start": 6.966,
+      "time_end": 9.218,
+      "duration": 2.252,
+      "energy": 0.651
+    },
+    {
+      "pitch": 103,
+      "note": "G7",
+      "time_start": 112.315,
+      "time_end": 114.567,
+      "duration": 2.252,
+      "energy": 0.623
+    },
+    {
+      "pitch": 104,
+      "note": "G#7",
+      "time_start": 6.966,
+      "time_end": 9.218,
+      "duration": 2.252,
+      "energy": 0.576
+    },
+    {
+      "pitch": 108,
+      "note": "C8",
+      "time_start": 112.315,
+      "time_end": 114.567,
+      "duration": 2.252,
+      "energy": 0.573
+    },
+    {
+      "pitch": 107,
+      "note": "B7",
+      "time_start": 47.903,
+      "time_end": 50.016,
+      "duration": 2.113,
+      "energy": 0.534
+    },
+    {
+      "pitch": 107,
+      "note": "B7",
+      "time_start": 101.378,
+      "time_end": 103.12,
+      "duration": 1.741,
+      "energy": 0.611
+    },
+    {
+      "pitch": 105,
+      "note": "A7",
+      "time_start": 116.541,
+      "time_end": 118.399,
+      "duration": 1.858,
+      "energy": 0.564
+    },
+    {
+      "pitch": 99,
+      "note": "D#7",
+      "time_start": 115.426,
+      "time_end": 118.027,
+      "duration": 2.601,
+      "energy": 0.391
+    },
+    {
+      "pitch": 103,
+      "note": "G7",
+      "time_start": 93.112,
+      "time_end": 95.62,
+      "duration": 2.508,
+      "energy": 0.387
+    },
+    {
+      "pitch": 95,
+      "note": "B6",
+      "time_start": 113.871,
+      "time_end": 115.659,
+      "duration": 1.788,
+      "energy": 0.539
+    },
+    {
+      "pitch": 86,
+      "note": "D6",
+      "time_start": 0.58,
+      "time_end": 2.159,
+      "duration": 1.579,
+      "energy": 0.596
+    },
+    {
+      "pitch": 91,
+      "note": "G6",
+      "time_start": 0.58,
+      "time_end": 2.159,
+      "duration": 1.579,
+      "energy": 0.558
+    },
+    {
+      "pitch": 107,
+      "note": "B7",
+      "time_start": 67.291,
+      "time_end": 68.801,
+      "duration": 1.509,
+      "energy": 0.531
+    },
+    {
+      "pitch": 104,
+      "note": "G#7",
+      "time_start": 62.02,
+      "time_end": 63.321,
+      "duration": 1.3,
+      "energy": 0.605
+    },
+    {
+      "pitch": 108,
+      "note": "C8",
+      "time_start": 51.13,
+      "time_end": 52.477,
+      "duration": 1.347,
+      "energy": 0.555
+    },
+    {
+      "pitch": 103,
+      "note": "G7",
+      "time_start": 40.403,
+      "time_end": 41.982,
+      "duration": 1.579,
+      "energy": 0.452
+    },
+    {
+      "pitch": 103,
+      "note": "G7",
+      "time_start": 104.536,
+      "time_end": 105.837,
+      "duration": 1.3,
+      "energy": 0.549
+    },
+    {
+      "pitch": 96,
+      "note": "C7",
+      "time_start": 118.445,
+      "time_end": 119.722,
+      "duration": 1.277,
+      "energy": 0.558
+    },
+    {
+      "pitch": 79,
+      "note": "G5",
+      "time_start": 0.58,
+      "time_end": 1.602,
+      "duration": 1.022,
+      "energy": 0.685
+    },
+    {
+      "pitch": 84,
+      "note": "C6",
+      "time_start": 0.58,
+      "time_end": 1.602,
+      "duration": 1.022,
+      "energy": 0.682
+    }
+  ]
+}

app/src/App.jsx ADDED Viewed

	@@ -0,0 +1,256 @@

+import { useState, useEffect, useRef, useCallback } from 'react';
+import * as Tone from 'tone';
+import PianoRoll from './components/PianoRoll';
+import Controls from './components/Controls';
+import OctopusLogo from './components/OctopusLogo';
+import { useMidi } from './hooks/useMidi';
+import { usePlayback } from './hooks/usePlayback';
+import { buildKeyboardLayout } from './utils/midiHelpers';
+const API_BASE = import.meta.env.DEV ? 'http://localhost:8000' : '';
+// App states: 'upload' -> 'loading' -> 'player'
+function UploadScreen({ onFileSelected }) {
+  const [isDragging, setIsDragging] = useState(false);
+  const [errorMsg, setErrorMsg] = useState('');
+  const fileInputRef = useRef(null);
+  const handleFile = useCallback((file) => {
+    if (!file) return;
+    const ext = file.name.split('.').pop().toLowerCase();
+    if (!['mp3', 'm4a', 'wav', 'ogg', 'flac'].includes(ext)) {
+      setErrorMsg('Please upload an audio file (MP3, M4A, WAV, OGG, or FLAC)');
+      return;
+    }
+    setErrorMsg('');
+    onFileSelected(file);
+  }, [onFileSelected]);
+  const handleDrop = useCallback((e) => {
+    e.preventDefault();
+    setIsDragging(false);
+    handleFile(e.dataTransfer.files[0]);
+  }, [handleFile]);
+  const handleDragOver = useCallback((e) => {
+    e.preventDefault();
+    setIsDragging(true);
+  }, []);
+  const handleDragLeave = useCallback(() => {
+    setIsDragging(false);
+  }, []);
+  const handleFileSelect = useCallback((e) => {
+    handleFile(e.target.files[0]);
+  }, [handleFile]);
+  return (
+    <div className="upload-screen">
+      <div className="upload-content">
+        <div className="upload-logo">
+          <OctopusLogo size={80} />
+          <h1>Mr. Octopus</h1>
+          <p className="upload-tagline">Your AI piano teacher</p>
+        </div>
+        <p className="upload-description">
+          Drop a song and Mr. Octopus will transcribe it into a piano tutorial
+          you can follow along with, note by note. Works best with clearly
+          recorded solo piano pieces.
+        </p>
+        <div
+          className={`drop-zone ${isDragging ? 'dragging' : ''}`}
+          onDrop={handleDrop}
+          onDragOver={handleDragOver}
+          onDragLeave={handleDragLeave}
+          onClick={() => fileInputRef.current?.click()}
+        >
+          <div className="drop-icon">&#9835;</div>
+          <p>Drag & drop an audio file</p>
+          <p className="drop-hint">MP3, M4A, WAV, OGG, FLAC</p>
+          <input
+            ref={fileInputRef}
+            type="file"
+            accept="audio/*,.m4a,.mp3,.wav,.ogg,.flac"
+            onChange={handleFileSelect}
+            hidden
+          />
+        </div>
+        <div className="copyright-notice">
+          Please only upload audio you have the rights to use.
+        </div>
+        {errorMsg && (
+          <div className="upload-error">{errorMsg}</div>
+        )}
+      </div>
+    </div>
+  );
+}
+function LoadingScreen({ status }) {
+  return (
+    <div className="upload-screen">
+      <div className="upload-processing">
+        <div className="processing-logo">
+          <OctopusLogo size={72} />
+        </div>
+        <h2>{status}</h2>
+        <p className="loading-sub">This usually takes 5-10 seconds</p>
+        <div className="loading-bar">
+          <div className="loading-bar-fill" />
+        </div>
+      </div>
+    </div>
+  );
+}
+export default function App() {
+  const containerRef = useRef(null);
+  const [dimensions, setDimensions] = useState({ width: 800, height: 600 });
+  const [screen, setScreen] = useState('upload'); // 'upload' | 'loading' | 'player'
+  const [loadingStatus, setLoadingStatus] = useState('');
+  const [chords, setChords] = useState([]);
+  const { notes, totalDuration, fileName, loadFromUrl, loadFromBlob } = useMidi();
+  const {
+    isPlaying,
+    currentTimeRef,
+    activeNotes,
+    tempo,
+    samplesLoaded,
+    loopStart,
+    loopEnd,
+    isLooping,
+    togglePlayPause,
+    setTempo,
+    seekTo,
+    scheduleNotes,
+    setLoopA,
+    setLoopB,
+    clearLoop,
+  } = usePlayback();
+  // When samples are loaded and we have notes, transition to player
+  useEffect(() => {
+    if (screen === 'loading' && samplesLoaded && notes.length > 0) {
+      setScreen('player');
+    }
+  }, [screen, samplesLoaded, notes.length]);
+  const handleFileSelected = useCallback(async (file) => {
+    setScreen('loading');
+    setLoadingStatus('Transcribing your song...');
+    try {
+      const form = new FormData();
+      form.append('file', file);
+      const res = await fetch(`${API_BASE}/api/transcribe`, {
+        method: 'POST',
+        body: form,
+      });
+      if (!res.ok) {
+        const err = await res.json().catch(() => ({ detail: res.statusText }));
+        throw new Error(err.detail || 'Transcription failed');
+      }
+      const data = await res.json();
+      setLoadingStatus('Loading piano sounds...');
+      const midiRes = await fetch(`${API_BASE}${data.midi_url}`);
+      const blob = await midiRes.blob();
+      loadFromBlob(blob, file.name.replace(/\.[^.]+$/, '.mid'));
+      if (data.chords) {
+        const chordList = data.chords?.chords || data.chords || [];
+        setChords(Array.isArray(chordList) ? chordList : []);
+      }
+      // Screen transition to 'player' happens via the useEffect above
+      // once both samplesLoaded and notes.length > 0
+    } catch (e) {
+      setScreen('upload');
+      alert(e.message || 'Something went wrong. Please try again.');
+    }
+  }, [loadFromBlob]);
+  const handleNewSong = useCallback(() => {
+    setScreen('upload');
+    setChords([]);
+  }, []);
+  // Reschedule audio when notes change
+  useEffect(() => {
+    if (notes.length > 0) {
+      scheduleNotes(notes, totalDuration);
+    }
+  }, [notes, totalDuration, scheduleNotes]);
+  // Handle resize
+  useEffect(() => {
+    const el = containerRef.current;
+    if (!el) return;
+    const ro = new ResizeObserver(([entry]) => {
+      const { width, height } = entry.contentRect;
+      if (width > 0 && height > 0) {
+        setDimensions({ width, height });
+      }
+    });
+    ro.observe(el);
+    return () => ro.disconnect();
+  }, [screen]);
+  const keyboardLayout = buildKeyboardLayout(dimensions.width);
+  const handleTogglePlay = useCallback(async () => {
+    if (!samplesLoaded) return;
+    await Tone.start();
+    togglePlayPause();
+  }, [togglePlayPause, samplesLoaded]);
+  if (screen === 'upload') {
+    return <UploadScreen onFileSelected={handleFileSelected} />;
+  }
+  if (screen === 'loading') {
+    return <LoadingScreen status={loadingStatus} />;
+  }
+  return (
+    <div className="app">
+      <Controls
+        isPlaying={isPlaying}
+        togglePlayPause={handleTogglePlay}
+        tempo={tempo}
+        setTempo={setTempo}
+        currentTimeRef={currentTimeRef}
+        totalDuration={totalDuration}
+        seekTo={seekTo}
+        fileName={fileName}
+        onNewSong={handleNewSong}
+        loopStart={loopStart}
+        loopEnd={loopEnd}
+        isLooping={isLooping}
+        onSetLoopA={setLoopA}
+        onSetLoopB={setLoopB}
+        onClearLoop={clearLoop}
+      />
+      <div className="canvas-container" ref={containerRef}>
+        <PianoRoll
+          notes={notes}
+          currentTimeRef={currentTimeRef}
+          activeNotes={activeNotes}
+          keyboardLayout={keyboardLayout}
+          width={dimensions.width}
+          height={dimensions.height}
+          loopStart={loopStart}
+          loopEnd={loopEnd}
+          chords={chords}
+        />
+      </div>
+    </div>
+  );
+}

app/src/components/Controls.jsx ADDED Viewed

	@@ -0,0 +1,179 @@

+import { useState, useEffect, useRef } from 'react';
+import OctopusLogo from './OctopusLogo';
+function formatTime(s) {
+  const m = Math.floor(s / 60);
+  const sec = Math.floor(s % 60);
+  return `${m}:${sec.toString().padStart(2, '0')}`;
+}
+export default function Controls({
+  isPlaying,
+  togglePlayPause,
+  tempo,
+  setTempo,
+  currentTimeRef,
+  totalDuration,
+  seekTo,
+  fileName,
+  onNewSong,
+  loopStart,
+  loopEnd,
+  isLooping,
+  onSetLoopA,
+  onSetLoopB,
+  onClearLoop,
+}) {
+  const [displayTime, setDisplayTime] = useState(0);
+  const intervalRef = useRef(null);
+  useEffect(() => {
+    intervalRef.current = setInterval(() => {
+      setDisplayTime(currentTimeRef.current);
+    }, 50);
+    return () => clearInterval(intervalRef.current);
+  }, [currentTimeRef]);
+  const progress = totalDuration > 0 ? (displayTime / totalDuration) * 100 : 0;
+  // Loop region markers for the timeline
+  const loopStartPct = loopStart !== null && totalDuration > 0
+    ? (loopStart / totalDuration) * 100 : null;
+  const loopEndPct = loopEnd !== null && totalDuration > 0
+    ? (loopEnd / totalDuration) * 100 : null;
+  // Build timeline background with loop region
+  let timelineBg;
+  if (loopStartPct !== null && loopEndPct !== null) {
+    timelineBg = `linear-gradient(to right,
+      var(--border) ${loopStartPct}%,
+      rgba(139, 92, 246, 0.3) ${loopStartPct}%,
+      var(--primary) ${Math.min(progress, loopEndPct)}%,
+      rgba(139, 92, 246, 0.3) ${Math.min(progress, loopEndPct)}%,
+      rgba(139, 92, 246, 0.3) ${loopEndPct}%,
+      var(--border) ${loopEndPct}%)`;
+  } else {
+    timelineBg = `linear-gradient(to right, var(--primary) ${progress}%, var(--border) ${progress}%)`;
+  }
+  return (
+    <div className="controls">
+      {/* Main controls row */}
+      <div className="controls-main">
+        <div className="controls-left">
+          <div className="brand-mark">
+            <OctopusLogo size={28} />
+            <span className="brand-name">Mr. Octopus</span>
+          </div>
+          {fileName && (
+            <span className="file-name">{fileName.replace(/\.[^.]+$/, '')}</span>
+          )}
+        </div>
+        <div className="controls-center">
+          <button
+            className="transport-btn"
+            onClick={() => seekTo(Math.max(0, displayTime - 5))}
+            title="Back 5s"
+          >
+            <svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
+              <path d="M11 18V6l-8.5 6 8.5 6zm.5-6l8.5 6V6l-8.5 6z" />
+            </svg>
+          </button>
+          <button className="play-btn" onClick={togglePlayPause}>
+            {isPlaying ? (
+              <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor">
+                <rect x="6" y="4" width="4" height="16" rx="1" />
+                <rect x="14" y="4" width="4" height="16" rx="1" />
+              </svg>
+            ) : (
+              <svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor">
+                <path d="M8 5v14l11-7z" />
+              </svg>
+            )}
+          </button>
+          <button
+            className="transport-btn"
+            onClick={() => seekTo(Math.min(totalDuration, displayTime + 5))}
+            title="Forward 5s"
+          >
+            <svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
+              <path d="M4 18l8.5-6L4 6v12zm9-12v12l8.5-6L13 6z" />
+            </svg>
+          </button>
+        </div>
+        <div className="controls-right">
+          {/* Loop controls */}
+          <div className="loop-controls">
+            {!isLooping ? (
+              <>
+                <button
+                  className={`btn btn-loop ${loopStart !== null ? 'active' : ''}`}
+                  onClick={onSetLoopA}
+                  title="Set loop start point (A)"
+                >
+                  A
+                </button>
+                <button
+                  className={`btn btn-loop ${loopEnd !== null ? 'active' : ''}`}
+                  onClick={onSetLoopB}
+                  disabled={loopStart === null}
+                  title="Set loop end point (B)"
+                >
+                  B
+                </button>
+              </>
+            ) : (
+              <button
+                className="btn btn-loop active"
+                onClick={onClearLoop}
+                title="Clear loop"
+              >
+                {formatTime(loopStart)} - {formatTime(loopEnd)}
+                <span className="loop-x">&times;</span>
+              </button>
+            )}
+          </div>
+          {onNewSong && (
+            <button className="btn btn-new" onClick={onNewSong}>
+              + New Song
+            </button>
+          )}
+          <div className="tempo-control">
+            <span className="tempo-label">Speed</span>
+            <input
+              type="range"
+              min={50}
+              max={200}
+              value={tempo}
+              onChange={(e) => setTempo(Number(e.target.value))}
+            />
+            <span className="tempo-value">{tempo}%</span>
+          </div>
+        </div>
+      </div>
+      {/* Timeline row */}
+      <div className="timeline">
+        <span className="timeline-time">{formatTime(displayTime)}</span>
+        <div className="timeline-track">
+          <input
+            type="range"
+            min={0}
+            max={totalDuration || 1}
+            step={0.1}
+            value={displayTime}
+            onChange={(e) => seekTo(Number(e.target.value))}
+            style={{ background: timelineBg }}
+          />
+        </div>
+        <span className="timeline-time">{formatTime(totalDuration)}</span>
+      </div>
+    </div>
+  );
+}

app/src/components/OctopusLogo.jsx ADDED Viewed

	@@ -0,0 +1,54 @@

+import { useId } from 'react';
+export default function OctopusLogo({ size = 48 }) {
+  const id = useId();
+  const gradId = `octo${id.replace(/:/g, '')}`;
+  const legs = [
+    [-66, 'W'],
+    [-47, 'W'],
+    [-28, 'B'],
+    [-9,  'W'],
+    [9,   'W'],
+    [28,  'B'],
+    [47,  'W'],
+    [66,  'W'],
+  ];
+  const ox = 24, oy = 25;
+  return (
+    <svg width={size} height={size} viewBox="0 0 48 48" fill="none" xmlns="http://www.w3.org/2000/svg">
+      <defs>
+        <linearGradient id={gradId} x1="10" y1="8" x2="38" y2="44" gradientUnits="userSpaceOnUse">
+          <stop stopColor="#a78bfa" />
+          <stop offset="1" stopColor="#06b6d4" />
+        </linearGradient>
+      </defs>
+      {/* Tentacle legs — piano keys splayed outward */}
+      {legs.map(([angle, type], i) => {
+        const isBlack = type === 'B';
+        const w = isBlack ? 2.2 : 3;
+        const h = isBlack ? 11 : 16;
+        const rx = isBlack ? 0.7 : 1;
+        return (
+          <g key={i} transform={`rotate(${angle}, ${ox}, ${oy})`}>
+            <rect x={ox - w / 2} y={oy} width={w} height={h} rx={rx}
+              fill={isBlack ? '#1e1b4b' : '#f0eef5'} />
+            <rect x={ox - w / 2} y={oy} width={w} height={h} rx={rx}
+              fill={isBlack ? '#4338ca' : '#c8c5d6'}
+              opacity={isBlack ? 0.12 : 0.2} />
+          </g>
+        );
+      })}
+      {/* Head */}
+      <ellipse cx="24" cy="16" rx="14" ry="12" fill={`url(#${gradId})`} />
+      <circle cx="19" cy="14" r="2.5" fill="#07070e" />
+      <circle cx="29" cy="14" r="2.5" fill="#07070e" />
+      <circle cx="20" cy="13.3" r="0.9" fill="white" opacity="0.9" />
+      <circle cx="30" cy="13.3" r="0.9" fill="white" opacity="0.9" />
+    </svg>
+  );
+}

app/src/components/PianoRoll.jsx ADDED Viewed

	@@ -0,0 +1,335 @@

+import { useRef, useEffect } from 'react';
+import { COLORS, noteColor, noteGlowColor } from '../utils/colorScheme';
+import {
+  buildNotePositionMap,
+  noteXPositionFast,
+  getVisibleNotes,
+  isBlackKey,
+} from '../utils/midiHelpers';
+const LOOK_AHEAD_SECONDS = 4;
+const KEYBOARD_HEIGHT_RATIO = 0.18; // keyboard takes 18% of canvas height
+const MIN_KEYBOARD_HEIGHT = 80;
+const MAX_KEYBOARD_HEIGHT = 150;
+function drawRoundedRect(ctx, x, y, w, h, r) {
+  if (h < 0) return;
+  r = Math.min(r, w / 2, h / 2);
+  ctx.beginPath();
+  ctx.moveTo(x + r, y);
+  ctx.lineTo(x + w - r, y);
+  ctx.quadraticCurveTo(x + w, y, x + w, y + r);
+  ctx.lineTo(x + w, y + h - r);
+  ctx.quadraticCurveTo(x + w, y + h, x + w - r, y + h);
+  ctx.lineTo(x + r, y + h);
+  ctx.quadraticCurveTo(x, y + h, x, y + h - r);
+  ctx.lineTo(x, y + r);
+  ctx.quadraticCurveTo(x, y, x + r, y);
+  ctx.closePath();
+}
+function drawFallingNotes(ctx, notes, currentTime, hitLineY, positionMap) {
+  const pixelsPerSecond = hitLineY / LOOK_AHEAD_SECONDS;
+  const visibleNotes = getVisibleNotes(notes, currentTime, LOOK_AHEAD_SECONDS, 0.5);
+  ctx.save();
+  for (const note of visibleNotes) {
+    const noteBottom = hitLineY - (note.time - currentTime) * pixelsPerSecond;
+    const noteTop =
+      hitLineY - (note.time + note.duration - currentTime) * pixelsPerSecond;
+    // Clip to note area
+    if (noteBottom < 0 || noteTop > hitLineY) continue;
+    const clippedTop = Math.max(noteTop, 0);
+    const clippedBottom = Math.min(noteBottom, hitLineY);
+    const height = clippedBottom - clippedTop;
+    if (height < 1) continue;
+    const pos = noteXPositionFast(note.midi, positionMap);
+    if (!pos) continue;
+    const padding = 1;
+    const x = pos.x + padding;
+    const w = pos.width - padding * 2;
+    // Glow
+    ctx.shadowColor = noteGlowColor(note.midi);
+    ctx.shadowBlur = 12;
+    // Note body
+    ctx.fillStyle = noteColor(note.midi);
+    drawRoundedRect(ctx, x, clippedTop, w, height, 4);
+    ctx.fill();
+    // Brighter edge at the bottom (hitting edge)
+    if (noteBottom <= hitLineY && noteBottom >= hitLineY - 3) {
+      ctx.shadowBlur = 20;
+      ctx.fillStyle = noteGlowColor(note.midi);
+      ctx.fillRect(x, hitLineY - 3, w, 3);
+    }
+  }
+  ctx.shadowBlur = 0;
+  ctx.restore();
+}
+function drawHitLine(ctx, y, width) {
+  ctx.save();
+  ctx.shadowColor = COLORS.hitLine;
+  ctx.shadowBlur = 8;
+  ctx.strokeStyle = COLORS.hitLine;
+  ctx.lineWidth = 2;
+  ctx.beginPath();
+  ctx.moveTo(0, y);
+  ctx.lineTo(width, y);
+  ctx.stroke();
+  ctx.shadowBlur = 0;
+  ctx.restore();
+}
+function drawKeyboard(ctx, keyboardLayout, keyboardY, keyboardHeight, activeNotes) {
+  const blackKeyHeight = keyboardHeight * 0.62;
+  // White keys
+  for (const key of keyboardLayout) {
+    if (key.isBlack) continue;
+    const isActive = activeNotes.has(key.midiNumber);
+    ctx.fillStyle = isActive ? COLORS.whiteKeyActive : COLORS.whiteKey;
+    ctx.fillRect(key.x, keyboardY, key.width, keyboardHeight);
+    ctx.strokeStyle = COLORS.keyBorder;
+    ctx.lineWidth = 1;
+    ctx.strokeRect(key.x, keyboardY, key.width, keyboardHeight);
+    if (isActive) {
+      ctx.save();
+      ctx.shadowColor = noteGlowColor(key.midiNumber);
+      ctx.shadowBlur = 15;
+      ctx.fillStyle = isActive ? COLORS.whiteKeyActive : COLORS.whiteKey;
+      ctx.fillRect(key.x + 1, keyboardY, key.width - 2, keyboardHeight);
+      ctx.shadowBlur = 0;
+      ctx.restore();
+    }
+  }
+  // Black keys (drawn on top)
+  for (const key of keyboardLayout) {
+    if (!key.isBlack) continue;
+    const isActive = activeNotes.has(key.midiNumber);
+    ctx.fillStyle = isActive ? COLORS.blackKeyActive : COLORS.blackKey;
+    ctx.fillRect(key.x, keyboardY, key.width, blackKeyHeight);
+    if (isActive) {
+      ctx.save();
+      ctx.shadowColor = noteGlowColor(key.midiNumber);
+      ctx.shadowBlur = 15;
+      ctx.fillRect(key.x, keyboardY, key.width, blackKeyHeight);
+      ctx.shadowBlur = 0;
+      ctx.restore();
+    }
+    // Black key border
+    ctx.strokeStyle = '#000000';
+    ctx.lineWidth = 1;
+    ctx.strokeRect(key.x, keyboardY, key.width, blackKeyHeight);
+  }
+}
+function drawChordLabels(ctx, chords, currentTime, hitLineY, width) {
+  if (!chords || chords.length === 0) return;
+  const pixelsPerSecond = hitLineY / LOOK_AHEAD_SECONDS;
+  const CHORD_STRIP_HEIGHT = 28;
+  ctx.save();
+  // Find chords visible in the current window
+  for (const chord of chords) {
+    const startTime = chord.start_time;
+    const endTime = chord.end_time;
+    // Skip if entirely outside visible range
+    if (endTime < currentTime - 0.5 || startTime > currentTime + LOOK_AHEAD_SECONDS) continue;
+    // Skip single-note "chords"
+    if (chord.quality === 'note') continue;
+    const yBottom = hitLineY - (startTime - currentTime) * pixelsPerSecond;
+    const yTop = hitLineY - (endTime - currentTime) * pixelsPerSecond;
+    // Clip to visible area
+    if (yBottom < 0 || yTop > hitLineY) continue;
+    const clippedTop = Math.max(yTop, 0);
+    const clippedBottom = Math.min(yBottom, hitLineY);
+    // Draw chord label strip on the left side
+    const stripY = clippedTop;
+    const stripHeight = Math.max(CHORD_STRIP_HEIGHT, clippedBottom - clippedTop);
+    // Semi-transparent background pill
+    ctx.fillStyle = 'rgba(139, 92, 246, 0.15)';
+    drawRoundedRect(ctx, 8, stripY, 72, Math.min(CHORD_STRIP_HEIGHT, stripHeight), 6);
+    ctx.fill();
+    // Chord name text
+    ctx.font = 'bold 13px -apple-system, BlinkMacSystemFont, sans-serif';
+    ctx.textAlign = 'center';
+    ctx.textBaseline = 'middle';
+    // Color based on quality
+    const isMinor = chord.quality?.includes('minor') || chord.quality?.includes('dim');
+    ctx.fillStyle = isMinor ? 'rgba(167, 139, 250, 0.9)' : 'rgba(255, 255, 255, 0.9)';
+    const labelY = stripY + Math.min(CHORD_STRIP_HEIGHT, stripHeight) / 2;
+    ctx.fillText(chord.chord_name, 44, labelY);
+    // Subtle divider line across the full width at chord boundary
+    if (yBottom > 0 && yBottom < hitLineY) {
+      ctx.strokeStyle = 'rgba(139, 92, 246, 0.12)';
+      ctx.lineWidth = 1;
+      ctx.beginPath();
+      ctx.moveTo(0, yBottom);
+      ctx.lineTo(width, yBottom);
+      ctx.stroke();
+    }
+  }
+  ctx.restore();
+}
+function drawLoopMarkers(ctx, loopStart, loopEnd, currentTime, hitLineY, width) {
+  if (loopStart === null || loopEnd === null) return;
+  const pixelsPerSecond = hitLineY / LOOK_AHEAD_SECONDS;
+  // Draw loop region boundaries as dashed lines
+  for (const t of [loopStart, loopEnd]) {
+    const y = hitLineY - (t - currentTime) * pixelsPerSecond;
+    if (y < 0 || y > hitLineY) continue;
+    ctx.save();
+    ctx.setLineDash([6, 4]);
+    ctx.strokeStyle = 'rgba(139, 92, 246, 0.6)';
+    ctx.lineWidth = 2;
+    ctx.beginPath();
+    ctx.moveTo(0, y);
+    ctx.lineTo(width, y);
+    ctx.stroke();
+    ctx.setLineDash([]);
+    ctx.restore();
+  }
+  // Dim area outside the loop
+  const loopStartY = hitLineY - (loopStart - currentTime) * pixelsPerSecond;
+  const loopEndY = hitLineY - (loopEnd - currentTime) * pixelsPerSecond;
+  ctx.save();
+  ctx.fillStyle = 'rgba(0, 0, 0, 0.3)';
+  // Above loop end (future beyond loop)
+  if (loopEndY > 0) {
+    ctx.fillRect(0, 0, width, Math.min(loopEndY, hitLineY));
+  }
+  // Below loop start (past before loop)
+  if (loopStartY < hitLineY) {
+    ctx.fillRect(0, Math.max(loopStartY, 0), width, hitLineY - Math.max(loopStartY, 0));
+  }
+  ctx.restore();
+}
+export default function PianoRoll({
+  notes,
+  currentTimeRef,
+  activeNotes,
+  keyboardLayout,
+  width,
+  height,
+  loopStart,
+  loopEnd,
+  chords,
+}) {
+  const canvasRef = useRef(null);
+  const positionMapRef = useRef(null);
+  // Rebuild position map when layout changes
+  useEffect(() => {
+    positionMapRef.current = buildNotePositionMap(keyboardLayout);
+  }, [keyboardLayout]);
+  // Main render loop
+  useEffect(() => {
+    const canvas = canvasRef.current;
+    if (!canvas) return;
+    const ctx = canvas.getContext('2d');
+    const dpr = window.devicePixelRatio || 1;
+    canvas.width = width * dpr;
+    canvas.height = height * dpr;
+    ctx.scale(dpr, dpr);
+    let frameId;
+    function render() {
+      const currentTime = currentTimeRef.current;
+      const keyboardHeight = Math.min(
+        MAX_KEYBOARD_HEIGHT,
+        Math.max(MIN_KEYBOARD_HEIGHT, height * KEYBOARD_HEIGHT_RATIO)
+      );
+      const hitLineY = height - keyboardHeight;
+      // Clear
+      ctx.fillStyle = COLORS.pianoRollBg;
+      ctx.fillRect(0, 0, width, height);
+      // Draw subtle grid lines for visual reference
+      ctx.strokeStyle = '#ffffff08';
+      ctx.lineWidth = 1;
+      const pixelsPerSecond = hitLineY / LOOK_AHEAD_SECONDS;
+      for (let s = 0; s < LOOK_AHEAD_SECONDS; s++) {
+        const y = hitLineY - s * pixelsPerSecond;
+        ctx.beginPath();
+        ctx.moveTo(0, y);
+        ctx.lineTo(width, y);
+        ctx.stroke();
+      }
+      // Falling notes
+      if (positionMapRef.current) {
+        drawFallingNotes(ctx, notes, currentTime, hitLineY, positionMapRef.current);
+      }
+      // Chord labels
+      drawChordLabels(ctx, chords, currentTime, hitLineY, width);
+      // Loop markers
+      drawLoopMarkers(ctx, loopStart, loopEnd, currentTime, hitLineY, width);
+      // Hit line
+      drawHitLine(ctx, hitLineY, width);
+      // Keyboard
+      drawKeyboard(ctx, keyboardLayout, hitLineY, keyboardHeight, activeNotes);
+      frameId = requestAnimationFrame(render);
+    }
+    render();
+    return () => cancelAnimationFrame(frameId);
+  }, [notes, keyboardLayout, activeNotes, width, height, currentTimeRef, loopStart, loopEnd, chords]);
+  return (
+    <canvas
+      ref={canvasRef}
+      style={{
+        width: `${width}px`,
+        height: `${height}px`,
+        display: 'block',
+      }}
+    />
+  );
+}

app/src/hooks/useMidi.js ADDED Viewed

	@@ -0,0 +1,83 @@

+import { useState, useCallback } from 'react';
+import { Midi } from '@tonejs/midi';
+import { parseMidiFile } from '../utils/midiHelpers';
+import { generateSampleMidi } from '../utils/generateSampleMidi';
+export function useMidi() {
+  const [notes, setNotes] = useState([]);
+  const [totalDuration, setTotalDuration] = useState(0);
+  const [isLoading, setIsLoading] = useState(false);
+  const [error, setError] = useState(null);
+  const [fileName, setFileName] = useState('');
+  const loadFromUrl = useCallback(async (url) => {
+    setIsLoading(true);
+    setError(null);
+    try {
+      const midi = await Midi.fromUrl(url);
+      const result = parseMidiFile(midi);
+      setNotes(result.notes);
+      setTotalDuration(result.totalDuration);
+      setFileName(url.split('/').pop());
+    } catch (e) {
+      setError(e.message);
+    } finally {
+      setIsLoading(false);
+    }
+  }, []);
+  const loadFromFile = useCallback(async (file) => {
+    if (!file) return;
+    setIsLoading(true);
+    setError(null);
+    try {
+      const arrayBuffer = await file.arrayBuffer();
+      const midi = new Midi(arrayBuffer);
+      const result = parseMidiFile(midi);
+      setNotes(result.notes);
+      setTotalDuration(result.totalDuration);
+      setFileName(file.name);
+    } catch (e) {
+      setError(e.message);
+    } finally {
+      setIsLoading(false);
+    }
+  }, []);
+  const loadFromBlob = useCallback(async (blob, name = 'transcription.mid') => {
+    setIsLoading(true);
+    setError(null);
+    try {
+      const arrayBuffer = await blob.arrayBuffer();
+      const midi = new Midi(arrayBuffer);
+      const result = parseMidiFile(midi);
+      setNotes(result.notes);
+      setTotalDuration(result.totalDuration);
+      setFileName(name);
+    } catch (e) {
+      setError(e.message);
+    } finally {
+      setIsLoading(false);
+    }
+  }, []);
+  const loadSample = useCallback(() => {
+    const midi = generateSampleMidi();
+    const result = parseMidiFile(midi);
+    setNotes(result.notes);
+    setTotalDuration(result.totalDuration);
+    setFileName('Twinkle Twinkle Little Star');
+  }, []);
+  return {
+    notes,
+    totalDuration,
+    isLoading,
+    error,
+    fileName,
+    loadFromUrl,
+    loadFromFile,
+    loadFromBlob,
+    loadSample,
+  };
+}

app/src/hooks/usePlayback.js ADDED Viewed

	@@ -0,0 +1,244 @@

+import { useRef, useState, useCallback, useEffect } from 'react';
+import * as Tone from 'tone';
+// Salamander Grand Piano samples (Tone.js official CDN, interpolates between keys)
+const SALAMANDER_URL = 'https://tonejs.github.io/audio/salamander/';
+const SAMPLE_NOTES = [
+  'A0', 'C1', 'D#1', 'F#1', 'A1', 'C2', 'D#2', 'F#2',
+  'A2', 'C3', 'D#3', 'F#3', 'A3', 'C4', 'D#4', 'F#4',
+  'A4', 'C5', 'D#5', 'F#5', 'A5', 'C6', 'D#6', 'F#6',
+  'A6', 'C7', 'D#7', 'F#7', 'A7', 'C8',
+];
+function buildSampleMap() {
+  const map = {};
+  for (const note of SAMPLE_NOTES) {
+    // OGG has no encoder padding delay; MP3 adds ~50ms of silence
+    map[note] = note.replace('#', 's') + '.ogg';
+  }
+  return map;
+}
+export function usePlayback() {
+  const [isPlaying, setIsPlaying] = useState(false);
+  const [tempo, setTempoState] = useState(100);
+  const [activeNotes, setActiveNotes] = useState(new Set());
+  const [samplesLoaded, setSamplesLoaded] = useState(false);
+  const [loopStart, setLoopStart] = useState(null);
+  const [loopEnd, setLoopEnd] = useState(null);
+  const currentTimeRef = useRef(0);
+  const animationRef = useRef(null);
+  const synthRef = useRef(null);
+  const scheduledRef = useRef(false);
+  const totalDurationRef = useRef(0);
+  const loopRef = useRef({ start: null, end: null });
+  // Keep loopRef in sync with state
+  useEffect(() => {
+    loopRef.current = { start: loopStart, end: loopEnd };
+  }, [loopStart, loopEnd]);
+  // Initialize Salamander Grand Piano sampler eagerly on mount
+  useEffect(() => {
+    if (!synthRef.current) {
+      synthRef.current = new Tone.Sampler({
+        urls: buildSampleMap(),
+        baseUrl: SALAMANDER_URL,
+        release: 1.5,
+        onload: () => setSamplesLoaded(true),
+      }).toDestination();
+    }
+  }, []);
+  const getSynth = useCallback(() => {
+    if (!synthRef.current) {
+      synthRef.current = new Tone.Sampler({
+        urls: buildSampleMap(),
+        baseUrl: SALAMANDER_URL,
+        release: 1.5,
+        onload: () => setSamplesLoaded(true),
+      }).toDestination();
+    }
+    return synthRef.current;
+  }, []);
+  // Animation loop — updates currentTimeRef, handles looping and end
+  const tick = useCallback(() => {
+    const transport = Tone.getTransport();
+    if (transport.state === 'started') {
+      currentTimeRef.current = transport.seconds;
+      const loop = loopRef.current;
+      // Handle loop: when we reach loopEnd, jump back to loopStart
+      if (loop.start !== null && loop.end !== null && transport.seconds >= loop.end) {
+        if (synthRef.current) synthRef.current.releaseAll();
+        transport.seconds = loop.start;
+        currentTimeRef.current = loop.start;
+        setActiveNotes(new Set());
+      }
+      // Auto-stop at end (only when not looping)
+      else if (
+        totalDurationRef.current > 0 &&
+        transport.seconds >= totalDurationRef.current + 0.5
+      ) {
+        transport.pause();
+        transport.seconds = 0;
+        currentTimeRef.current = 0;
+        setIsPlaying(false);
+        setActiveNotes(new Set());
+      }
+    }
+    animationRef.current = requestAnimationFrame(tick);
+  }, []);
+  // Start/stop animation loop
+  useEffect(() => {
+    animationRef.current = requestAnimationFrame(tick);
+    return () => {
+      if (animationRef.current) {
+        cancelAnimationFrame(animationRef.current);
+      }
+    };
+  }, [tick]);
+  // Schedule all notes on the Transport
+  const scheduleNotes = useCallback(
+    (notes, totalDuration) => {
+      const transport = Tone.getTransport();
+      transport.cancel();
+      transport.stop();
+      transport.seconds = 0;
+      currentTimeRef.current = 0;
+      totalDurationRef.current = totalDuration;
+      scheduledRef.current = false;
+      setIsPlaying(false);
+      setActiveNotes(new Set());
+      const synth = getSynth();
+      notes.forEach((note) => {
+        transport.schedule((time) => {
+          const noteName = Tone.Frequency(note.midi, 'midi').toNote();
+          synth.triggerAttackRelease(noteName, note.duration, time, note.velocity);
+          // Key highlight on
+          Tone.Draw.schedule(() => {
+            setActiveNotes((prev) => {
+              const next = new Set(prev);
+              next.add(note.midi);
+              return next;
+            });
+          }, time);
+          // Key highlight off
+          Tone.Draw.schedule(() => {
+            setActiveNotes((prev) => {
+              const next = new Set(prev);
+              next.delete(note.midi);
+              return next;
+            });
+          }, time + note.duration);
+        }, note.time);
+      });
+      scheduledRef.current = true;
+    },
+    [getSynth]
+  );
+  const play = useCallback(async () => {
+    await Tone.start();
+    const transport = Tone.getTransport();
+    transport.start();
+    setIsPlaying(true);
+  }, []);
+  const pause = useCallback(() => {
+    Tone.getTransport().pause();
+    setIsPlaying(false);
+  }, []);
+  const togglePlayPause = useCallback(async () => {
+    if (isPlaying) {
+      pause();
+    } else {
+      await play();
+    }
+  }, [isPlaying, play, pause]);
+  const setTempo = useCallback((percent) => {
+    setTempoState(percent);
+    Tone.getTransport().playbackRate = percent / 100;
+  }, []);
+  const seekTo = useCallback(
+    (timeInSeconds) => {
+      const synth = getSynth();
+      synth.releaseAll();
+      Tone.getTransport().seconds = timeInSeconds;
+      currentTimeRef.current = timeInSeconds;
+      setActiveNotes(new Set());
+    },
+    [getSynth]
+  );
+  // Set loop A point (start)
+  const setLoopA = useCallback(() => {
+    const t = currentTimeRef.current;
+    setLoopStart(t);
+    // If loopEnd is before the new start, clear it
+    if (loopEnd !== null && loopEnd <= t) {
+      setLoopEnd(null);
+    }
+  }, [loopEnd]);
+  // Set loop B point (end)
+  const setLoopB = useCallback(() => {
+    const t = currentTimeRef.current;
+    if (loopStart !== null && t > loopStart) {
+      setLoopEnd(t);
+    }
+  }, [loopStart]);
+  // Clear loop
+  const clearLoop = useCallback(() => {
+    setLoopStart(null);
+    setLoopEnd(null);
+  }, []);
+  const isLooping = loopStart !== null && loopEnd !== null;
+  // Cleanup
+  useEffect(() => {
+    return () => {
+      Tone.getTransport().cancel();
+      Tone.getTransport().stop();
+      if (synthRef.current) {
+        synthRef.current.dispose();
+        synthRef.current = null;
+      }
+    };
+  }, []);
+  return {
+    isPlaying,
+    currentTimeRef,
+    activeNotes,
+    tempo,
+    samplesLoaded,
+    loopStart,
+    loopEnd,
+    isLooping,
+    play,
+    pause,
+    togglePlayPause,
+    setTempo,
+    seekTo,
+    scheduleNotes,
+    setLoopA,
+    setLoopB,
+    clearLoop,
+  };
+}

app/src/index.css ADDED Viewed

	@@ -0,0 +1,552 @@

+:root {
+  --bg: #07070e;
+  --surface: #0f0f1a;
+  --surface-2: #161628;
+  --surface-3: #1c1c36;
+  --border: #1e1e3a;
+  --border-hover: #2e2e52;
+  --primary: #8b5cf6;
+  --primary-hover: #a78bfa;
+  --primary-dim: #7c3aed;
+  --primary-glow: rgba(139, 92, 246, 0.25);
+  --accent: #06b6d4;
+  --text: #f1f5f9;
+  --text-muted: #94a3b8;
+  --text-subtle: #525280;
+  --danger: #ef4444;
+  --danger-bg: rgba(239, 68, 68, 0.1);
+  --radius: 10px;
+  --radius-lg: 14px;
+}
+* {
+  margin: 0;
+  padding: 0;
+  box-sizing: border-box;
+}
+body {
+  background: var(--bg);
+  color: var(--text);
+  font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
+  overflow: hidden;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+}
+#root {
+  width: 100vw;
+  height: 100vh;
+}
+.app {
+  width: 100%;
+  height: 100%;
+  display: flex;
+  flex-direction: column;
+}
+/* ========================================
+   Upload Screen
+   ======================================== */
+.upload-screen {
+  width: 100vw;
+  height: 100vh;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  background: var(--bg);
+  background-image:
+    radial-gradient(ellipse at 50% 30%, rgba(139, 92, 246, 0.08) 0%, transparent 60%),
+    radial-gradient(ellipse at 50% 60%, rgba(6, 182, 212, 0.04) 0%, transparent 50%);
+}
+.upload-content {
+  width: 100%;
+  max-width: 520px;
+  padding: 0 24px;
+}
+.upload-logo {
+  display: flex;
+  flex-direction: column;
+  align-items: center;
+  margin-bottom: 48px;
+}
+.upload-logo h1 {
+  font-size: 36px;
+  font-weight: 700;
+  letter-spacing: -1px;
+  background: linear-gradient(135deg, #a78bfa 0%, #06b6d4 100%);
+  -webkit-background-clip: text;
+  -webkit-text-fill-color: transparent;
+  background-clip: text;
+  margin-top: 14px;
+}
+.upload-tagline {
+  color: var(--text-muted);
+  font-size: 16px;
+  margin-top: 8px;
+}
+.upload-description {
+  color: var(--text-muted);
+  font-size: 14px;
+  line-height: 1.6;
+  text-align: center;
+  margin-bottom: 28px;
+}
+/* Copyright notice */
+.copyright-notice {
+  margin-top: 20px;
+  padding: 12px 16px;
+  font-size: 12px;
+  line-height: 1.5;
+  color: var(--text-subtle);
+  text-align: center;
+  border-top: 1px solid var(--border);
+}
+/* Drop zone */
+.drop-zone {
+  border: 2px dashed var(--border);
+  border-radius: var(--radius-lg);
+  padding: 36px 24px;
+  cursor: pointer;
+  transition: all 0.2s;
+  text-align: center;
+}
+.drop-zone:hover,
+.drop-zone.dragging {
+  border-color: var(--primary);
+  background: rgba(139, 92, 246, 0.05);
+  box-shadow: inset 0 0 30px rgba(139, 92, 246, 0.03);
+}
+.drop-icon {
+  font-size: 32px;
+  margin-bottom: 10px;
+  opacity: 0.4;
+}
+.drop-zone p {
+  color: var(--text-muted);
+  font-size: 14px;
+  font-weight: 500;
+}
+.drop-hint {
+  font-size: 12px !important;
+  color: var(--text-subtle) !important;
+  font-weight: 400 !important;
+  margin-top: 6px;
+}
+/* Error message */
+.upload-error {
+  margin-top: 20px;
+  color: var(--danger);
+  font-size: 13px;
+  font-weight: 500;
+  background: var(--danger-bg);
+  padding: 12px 16px;
+  border-radius: var(--radius);
+  border: 1px solid rgba(239, 68, 68, 0.2);
+}
+/* ========================================
+   Processing / Loading Screen
+   ======================================== */
+.upload-processing {
+  text-align: center;
+}
+.processing-logo {
+  animation: pulse 2s ease-in-out infinite;
+  margin-bottom: 24px;
+}
+.upload-processing h2 {
+  font-size: 24px;
+  font-weight: 700;
+  color: var(--text);
+  margin-bottom: 8px;
+}
+.upload-processing p {
+  color: var(--text-muted);
+  font-size: 15px;
+}
+.loading-sub {
+  margin-bottom: 28px;
+}
+.loading-bar {
+  width: 200px;
+  height: 4px;
+  background: var(--border);
+  border-radius: 2px;
+  margin: 0 auto;
+  overflow: hidden;
+}
+.loading-bar-fill {
+  width: 40%;
+  height: 100%;
+  background: linear-gradient(90deg, var(--primary), var(--accent));
+  border-radius: 2px;
+  animation: loading-slide 1.5s ease-in-out infinite;
+}
+@keyframes loading-slide {
+  0% { transform: translateX(-100%); }
+  100% { transform: translateX(350%); }
+}
+@keyframes pulse {
+  0%, 100% { transform: scale(1); opacity: 1; }
+  50% { transform: scale(1.08); opacity: 0.7; }
+}
+/* ========================================
+   Controls Bar (Player)
+   ======================================== */
+.controls {
+  background: var(--surface);
+  border-bottom: 1px solid var(--border);
+  flex-shrink: 0;
+  display: flex;
+  flex-direction: column;
+}
+.controls-main {
+  height: 56px;
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  padding: 0 20px;
+  gap: 16px;
+}
+.controls-left {
+  display: flex;
+  align-items: center;
+  gap: 14px;
+  min-width: 0;
+  flex: 1;
+}
+.brand-mark {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+  flex-shrink: 0;
+}
+.brand-name {
+  font-size: 15px;
+  font-weight: 700;
+  background: linear-gradient(135deg, #a78bfa, #06b6d4);
+  -webkit-background-clip: text;
+  -webkit-text-fill-color: transparent;
+  background-clip: text;
+  white-space: nowrap;
+  letter-spacing: -0.3px;
+}
+.file-name {
+  font-size: 13px;
+  color: var(--text-muted);
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  max-width: 200px;
+  padding-left: 14px;
+  border-left: 1.5px solid var(--border);
+  font-weight: 500;
+}
+.controls-center {
+  display: flex;
+  align-items: center;
+  gap: 6px;
+  flex-shrink: 0;
+}
+.controls-right {
+  display: flex;
+  align-items: center;
+  gap: 16px;
+  flex: 1;
+  justify-content: flex-end;
+}
+/* Transport buttons */
+.transport-btn {
+  width: 36px;
+  height: 36px;
+  border-radius: 8px;
+  border: none;
+  background: var(--surface-2);
+  color: var(--text-muted);
+  cursor: pointer;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  transition: all 0.15s;
+}
+.transport-btn:hover {
+  background: var(--surface-3);
+  color: var(--text);
+}
+/* Play button — bold and prominent */
+.play-btn {
+  width: 48px;
+  height: 48px;
+  border-radius: 50%;
+  border: none;
+  background: var(--primary);
+  color: white;
+  font-size: 18px;
+  cursor: pointer;
+  transition: all 0.2s;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  box-shadow: 0 0 20px var(--primary-glow);
+}
+.play-btn:hover {
+  background: var(--primary-hover);
+  box-shadow: 0 0 30px var(--primary-glow);
+  transform: scale(1.05);
+}
+.play-btn:active {
+  transform: scale(0.97);
+}
+/* + New Song button */
+.btn {
+  background: var(--surface-2);
+  color: var(--text-muted);
+  border: 1.5px solid var(--border);
+  border-radius: 8px;
+  padding: 7px 16px;
+  font-size: 12px;
+  font-weight: 600;
+  font-family: inherit;
+  cursor: pointer;
+  transition: all 0.15s;
+  white-space: nowrap;
+  letter-spacing: 0.2px;
+}
+.btn:hover {
+  background: var(--surface-3);
+  color: var(--text);
+  border-color: var(--border-hover);
+}
+.btn-new {
+  border-color: var(--primary-dim);
+  color: var(--primary-hover);
+}
+.btn-new:hover {
+  background: rgba(139, 92, 246, 0.1);
+  border-color: var(--primary);
+  color: var(--primary-hover);
+}
+/* Tempo control */
+.tempo-control {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  background: var(--surface-2);
+  padding: 6px 14px;
+  border-radius: 8px;
+  border: 1px solid var(--border);
+}
+.tempo-label {
+  font-size: 11px;
+  font-weight: 600;
+  color: var(--text-subtle);
+  text-transform: uppercase;
+  letter-spacing: 0.5px;
+  white-space: nowrap;
+}
+.tempo-value {
+  font-size: 13px;
+  font-weight: 600;
+  color: var(--text-muted);
+  min-width: 36px;
+  text-align: right;
+  font-variant-numeric: tabular-nums;
+}
+.tempo-control input[type='range'] {
+  width: 80px;
+}
+/* ========================================
+   Timeline / Progress Bar
+   ======================================== */
+.timeline {
+  display: flex;
+  align-items: center;
+  gap: 12px;
+  padding: 0 20px 10px;
+}
+.timeline-time {
+  font-size: 12px;
+  font-weight: 600;
+  color: var(--text-muted);
+  font-variant-numeric: tabular-nums;
+  min-width: 36px;
+}
+.timeline-time:last-child {
+  text-align: right;
+}
+.timeline-track {
+  flex: 1;
+  position: relative;
+}
+.timeline-track input[type='range'] {
+  width: 100%;
+  height: 6px;
+  border-radius: 3px;
+  -webkit-appearance: none;
+  appearance: none;
+  outline: none;
+  cursor: pointer;
+  transition: height 0.15s;
+}
+.timeline-track input[type='range']:hover {
+  height: 8px;
+}
+.timeline-track input[type='range']::-webkit-slider-thumb {
+  -webkit-appearance: none;
+  appearance: none;
+  width: 14px;
+  height: 14px;
+  border-radius: 50%;
+  background: var(--primary-hover);
+  cursor: pointer;
+  border: 2px solid white;
+  box-shadow: 0 0 8px var(--primary-glow);
+  transition: transform 0.1s;
+}
+.timeline-track input[type='range']::-webkit-slider-thumb:hover {
+  transform: scale(1.25);
+}
+.timeline-track input[type='range']::-moz-range-thumb {
+  width: 14px;
+  height: 14px;
+  border-radius: 50%;
+  background: var(--primary-hover);
+  cursor: pointer;
+  border: 2px solid white;
+  box-shadow: 0 0 8px var(--primary-glow);
+}
+/* General range sliders (for tempo) */
+input[type='range'] {
+  -webkit-appearance: none;
+  appearance: none;
+  background: var(--border);
+  height: 4px;
+  border-radius: 2px;
+  outline: none;
+  cursor: pointer;
+}
+input[type='range']::-webkit-slider-thumb {
+  -webkit-appearance: none;
+  appearance: none;
+  width: 14px;
+  height: 14px;
+  border-radius: 50%;
+  background: var(--primary);
+  cursor: pointer;
+  border: none;
+  transition: transform 0.1s;
+}
+input[type='range']::-webkit-slider-thumb:hover {
+  transform: scale(1.2);
+}
+input[type='range']::-moz-range-thumb {
+  width: 14px;
+  height: 14px;
+  border-radius: 50%;
+  background: var(--primary);
+  cursor: pointer;
+  border: none;
+}
+/* Loop controls */
+.loop-controls {
+  display: flex;
+  align-items: center;
+  gap: 4px;
+}
+.btn-loop {
+  min-width: 32px;
+  text-align: center;
+  font-weight: 700;
+  font-size: 12px;
+  padding: 6px 10px;
+  border-radius: 6px;
+  font-family: inherit;
+  letter-spacing: 0.3px;
+}
+.btn-loop.active {
+  background: rgba(139, 92, 246, 0.15);
+  border-color: var(--primary);
+  color: var(--primary-hover);
+}
+.btn-loop:disabled {
+  opacity: 0.3;
+  cursor: not-allowed;
+}
+.loop-x {
+  margin-left: 6px;
+  font-size: 14px;
+  opacity: 0.6;
+}
+/* Canvas area */
+.canvas-container {
+  flex: 1;
+  position: relative;
+  overflow: hidden;
+  width: 100%;
+}

app/src/main.jsx ADDED Viewed

	@@ -0,0 +1,10 @@

+import { StrictMode } from 'react'
+import { createRoot } from 'react-dom/client'
+import './index.css'
+import App from './App.jsx'
+createRoot(document.getElementById('root')).render(
+  <StrictMode>
+    <App />
+  </StrictMode>,
+)

app/src/utils/colorScheme.js ADDED Viewed

	@@ -0,0 +1,35 @@

+export const COLORS = {
+  background: '#07070e',
+  pianoRollBg: '#0a0a14',
+  hitLine: '#ffffff33',
+  hitLineGlow: '#ffffff18',
+  // Note colors — brand-aligned
+  leftHand: '#8b5cf6',
+  leftHandGlow: '#a78bfa',
+  rightHand: '#06b6d4',
+  rightHandGlow: '#22d3ee',
+  // Piano key colors
+  whiteKey: '#e8e8e8',
+  whiteKeyActive: '#c4b5fd',
+  blackKey: '#1a1a2e',
+  blackKeyActive: '#7c3aed',
+  keyBorder: '#2a2a40',
+  // UI
+  text: '#f1f5f9',
+  textMuted: '#94a3b8',
+  controlsBg: '#0a0a14',
+  controlsBorder: '#1e1e3a',
+};
+export const MIDI_SPLIT_POINT = 60; // Middle C (C4)
+export function noteColor(midiNumber) {
+  return midiNumber < MIDI_SPLIT_POINT ? COLORS.leftHand : COLORS.rightHand;
+}
+export function noteGlowColor(midiNumber) {
+  return midiNumber < MIDI_SPLIT_POINT ? COLORS.leftHandGlow : COLORS.rightHandGlow;
+}

app/src/utils/generateSampleMidi.js ADDED Viewed

	@@ -0,0 +1,89 @@

+import { Midi } from '@tonejs/midi';
+/**
+ * Generate a sample "Twinkle Twinkle Little Star" MIDI with both hands.
+ */
+export function generateSampleMidi() {
+  const midi = new Midi();
+  // Right hand melody (MIDI >= 60)
+  const rhTrack = midi.addTrack();
+  rhTrack.name = 'Right Hand';
+  // C C G G A A G - F F E E D D C
+  // Then: G G F F E E D - G G F F E E D
+  const melody = [
+    // Phrase 1
+    60, 60, 67, 67, 69, 69, 67,
+    // Phrase 2
+    65, 65, 64, 64, 62, 62, 60,
+    // Phrase 3
+    67, 67, 65, 65, 64, 64, 62,
+    // Phrase 4
+    67, 67, 65, 65, 64, 64, 62,
+    // Phrase 5 (repeat phrase 1)
+    60, 60, 67, 67, 69, 69, 67,
+    // Phrase 6 (repeat phrase 2)
+    65, 65, 64, 64, 62, 62, 60,
+  ];
+  const durations = [
+    0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1,
+    0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1,
+    0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1,
+    0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1,
+    0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1,
+    0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 1,
+  ];
+  let t = 0;
+  melody.forEach((note, i) => {
+    rhTrack.addNote({
+      midi: note,
+      time: t,
+      duration: durations[i] * 0.9,
+      velocity: 0.8,
+    });
+    t += durations[i];
+  });
+  // Left hand accompaniment (MIDI < 60)
+  const lhTrack = midi.addTrack();
+  lhTrack.name = 'Left Hand';
+  const chords = [
+    // Phrase 1: C major
+    { notes: [48, 52, 55], time: 0, dur: 2 },
+    { notes: [48, 52, 55], time: 2, dur: 1 },
+    // Phrase 2: F major -> C major
+    { notes: [41, 45, 48], time: 3, dur: 2 },
+    { notes: [48, 52, 55], time: 5, dur: 1 },
+    // Phrase 3: C -> G -> Am -> F
+    { notes: [48, 52], time: 7, dur: 1 },
+    { notes: [43, 47], time: 8, dur: 1 },
+    { notes: [45, 48], time: 9, dur: 1 },
+    // Phrase 4: same
+    { notes: [41, 45], time: 10, dur: 1 },
+    { notes: [48, 52], time: 11, dur: 1 },
+    { notes: [43, 47], time: 12, dur: 1 },
+    { notes: [45, 48], time: 13, dur: 1 },
+    // Phrase 5
+    { notes: [48, 52, 55], time: 14, dur: 2 },
+    { notes: [48, 52, 55], time: 16, dur: 1 },
+    // Phrase 6
+    { notes: [41, 45, 48], time: 17, dur: 2 },
+    { notes: [48, 52, 55], time: 19, dur: 2 },
+  ];
+  chords.forEach((chord) => {
+    chord.notes.forEach((note) => {
+      lhTrack.addNote({
+        midi: note,
+        time: chord.time,
+        duration: chord.dur * 0.9,
+        velocity: 0.6,
+      });
+    });
+  });
+  return midi;
+}

app/src/utils/midiHelpers.js ADDED Viewed

	@@ -0,0 +1,178 @@

+import { MIDI_SPLIT_POINT } from './colorScheme';
+// Piano range: C2 (36) to C7 (96)
+export const LOWEST_NOTE = 36;
+export const HIGHEST_NOTE = 96;
+const BLACK_KEY_OFFSETS = new Set([1, 3, 6, 8, 10]);
+export function isBlackKey(midiNumber) {
+  return BLACK_KEY_OFFSETS.has(midiNumber % 12);
+}
+/**
+ * Build an array of key objects with pixel positions for a given canvas width.
+ * Returns [{ midiNumber, x, width, isBlack }]
+ */
+export function buildKeyboardLayout(canvasWidth) {
+  // Count white keys in range
+  const keys = [];
+  const whiteKeys = [];
+  for (let midi = LOWEST_NOTE; midi <= HIGHEST_NOTE; midi++) {
+    if (!isBlackKey(midi)) {
+      whiteKeys.push(midi);
+    }
+  }
+  const whiteKeyWidth = canvasWidth / whiteKeys.length;
+  const blackKeyWidth = whiteKeyWidth * 0.6;
+  // Position white keys
+  const keyMap = new Map();
+  whiteKeys.forEach((midi, i) => {
+    keyMap.set(midi, {
+      midiNumber: midi,
+      x: i * whiteKeyWidth,
+      width: whiteKeyWidth,
+      isBlack: false,
+    });
+  });
+  // Position black keys between their adjacent white keys
+  for (let midi = LOWEST_NOTE; midi <= HIGHEST_NOTE; midi++) {
+    if (isBlackKey(midi)) {
+      // Find the white key just below this black key
+      const prevWhite = keyMap.get(midi - 1);
+      if (prevWhite) {
+        keyMap.set(midi, {
+          midiNumber: midi,
+          x: prevWhite.x + prevWhite.width - blackKeyWidth / 2,
+          width: blackKeyWidth,
+          isBlack: true,
+        });
+      }
+    }
+  }
+  // Return sorted by MIDI number
+  for (let midi = LOWEST_NOTE; midi <= HIGHEST_NOTE; midi++) {
+    if (keyMap.has(midi)) {
+      keys.push(keyMap.get(midi));
+    }
+  }
+  return keys;
+}
+/**
+ * Get the x position and width for a falling note block.
+ */
+export function noteXPosition(midiNumber, keyboardLayout) {
+  const key = keyboardLayout.find((k) => k.midiNumber === midiNumber);
+  if (key) return { x: key.x, width: key.width };
+  // Clamp to range
+  if (midiNumber < LOWEST_NOTE) {
+    const first = keyboardLayout[0];
+    return { x: first.x, width: first.width };
+  }
+  const last = keyboardLayout[keyboardLayout.length - 1];
+  return { x: last.x, width: last.width };
+}
+// Build a fast lookup map for noteXPosition (avoids .find() per note per frame)
+export function buildNotePositionMap(keyboardLayout) {
+  const map = new Map();
+  for (const key of keyboardLayout) {
+    map.set(key.midiNumber, { x: key.x, width: key.width });
+  }
+  return map;
+}
+export function noteXPositionFast(midiNumber, positionMap) {
+  const pos = positionMap.get(midiNumber);
+  if (pos) return pos;
+  // Clamp
+  if (midiNumber < LOWEST_NOTE) return positionMap.get(LOWEST_NOTE);
+  return positionMap.get(HIGHEST_NOTE);
+}
+/**
+ * Parse a Midi object (from @tonejs/midi) into our note format.
+ */
+export function parseMidiFile(midiObject) {
+  const notes = [];
+  midiObject.tracks.forEach((track) => {
+    track.notes.forEach((note) => {
+      notes.push({
+        midi: note.midi,
+        name: note.name,
+        time: note.time,
+        duration: note.duration,
+        velocity: note.velocity,
+        hand: note.midi < MIDI_SPLIT_POINT ? 'left' : 'right',
+      });
+    });
+  });
+  // Sort by start time
+  notes.sort((a, b) => a.time - b.time);
+  const totalDuration =
+    notes.length > 0
+      ? Math.max(...notes.map((n) => n.time + n.duration))
+      : 0;
+  return { notes, totalDuration };
+}
+/**
+ * Get notes visible in the current time window using binary search.
+ * Notes array must be sorted by `time` (start time).
+ */
+export function getVisibleNotes(
+  notes,
+  currentTime,
+  lookAheadSeconds,
+  maxPastSeconds = 1
+) {
+  const endTime = currentTime + lookAheadSeconds;
+  // Find the longest note duration so we can search far enough back
+  // to catch long-held notes that started early but are still visible.
+  // Precompute once on first call via a cached property.
+  if (notes._maxDur == null) {
+    let mx = 0;
+    for (let i = 0; i < notes.length; i++) {
+      if (notes[i].duration > mx) mx = notes[i].duration;
+    }
+    notes._maxDur = mx;
+  }
+  const searchBack = maxPastSeconds + notes._maxDur;
+  // Binary search on `time` (which IS sorted) to find the earliest
+  // note that could possibly still be visible.
+  const earliest = currentTime - searchBack;
+  let lo = 0;
+  let hi = notes.length;
+  while (lo < hi) {
+    const mid = (lo + hi) >> 1;
+    if (notes[mid].time < earliest) {
+      lo = mid + 1;
+    } else {
+      hi = mid;
+    }
+  }
+  const cutoff = currentTime - maxPastSeconds;
+  const result = [];
+  for (let i = lo; i < notes.length && notes[i].time < endTime; i++) {
+    // Only include if the note hasn't fully ended before the visible window
+    if (notes[i].time + notes[i].duration >= cutoff) {
+      result.push(notes[i]);
+    }
+  }
+  return result;
+}

app/vite.config.js ADDED Viewed

	@@ -0,0 +1,12 @@

+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+// https://vite.dev/config/
+export default defineConfig({
+  plugins: [react()],
+  server: {
+    proxy: {
+      '/api': 'http://localhost:8000',
+    },
+  },
+})

fly.toml ADDED Viewed

	@@ -0,0 +1,16 @@

+app = "mroctopus"
+primary_region = "lhr"
+[build]
+[http_service]
+  internal_port = 8000
+  force_https = true
+  auto_stop_machines = "stop"
+  auto_start_machines = true
+  min_machines_running = 0
+[vm]
+  memory = "2gb"
+  cpu_kind = "shared"
+  cpus = 2

transcriber/chords.py ADDED Viewed

	@@ -0,0 +1,459 @@

+"""Chord detection from MIDI files using template-matching music theory.
+Analyzes a MIDI file to detect chords at each note onset, producing a
+time-stamped list of chord events with root, quality, and constituent notes.
+Designed for the Mr. Octopus piano tutorial pipeline.
+"""
+import json
+from pathlib import Path
+from collections import defaultdict
+import pretty_midi
+import numpy as np
+# ---------------------------------------------------------------------------
+# Music theory constants
+# ---------------------------------------------------------------------------
+NOTE_NAMES = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
+# Enharmonic display preferences: use flats for certain roots to match
+# standard music notation (e.g. Bb major, not A# major).
+ENHARMONIC_DISPLAY = {
+    "C": "C", "C#": "Db", "D": "D", "D#": "Eb", "E": "E", "F": "F",
+    "F#": "F#", "G": "G", "G#": "Ab", "A": "A", "A#": "Bb", "B": "B",
+}
+# Chord templates: quality name -> set of semitone intervals from root.
+# Each template is a frozenset of pitch-class intervals (0 = root).
+CHORD_TEMPLATES = {
+    # Triads
+    "major":       frozenset([0, 4, 7]),
+    "minor":       frozenset([0, 3, 7]),
+    "diminished":  frozenset([0, 3, 6]),
+    "augmented":   frozenset([0, 4, 8]),
+    # Suspended
+    "sus2":        frozenset([0, 2, 7]),
+    "sus4":        frozenset([0, 5, 7]),
+    # Seventh chords
+    "dominant 7":  frozenset([0, 4, 7, 10]),
+    "major 7":     frozenset([0, 4, 7, 11]),
+    "minor 7":     frozenset([0, 3, 7, 10]),
+    "diminished 7": frozenset([0, 3, 6, 9]),
+    "half-dim 7":  frozenset([0, 3, 6, 10]),
+    "min/maj 7":   frozenset([0, 3, 7, 11]),
+    "augmented 7": frozenset([0, 4, 8, 10]),
+    # Extended / added-tone
+    "add9":        frozenset([0, 2, 4, 7]),
+    "minor add9":  frozenset([0, 2, 3, 7]),
+    "6":           frozenset([0, 4, 7, 9]),
+    "minor 6":     frozenset([0, 3, 7, 9]),
+}
+# Short suffix for display (e.g. "Cm7", "Gdim", "Fsus4")
+QUALITY_SUFFIX = {
+    "major":       "",
+    "minor":       "m",
+    "diminished":  "dim",
+    "augmented":   "aug",
+    "sus2":        "sus2",
+    "sus4":        "sus4",
+    "dominant 7":  "7",
+    "major 7":     "maj7",
+    "minor 7":     "m7",
+    "diminished 7": "dim7",
+    "half-dim 7":  "m7b5",
+    "min/maj 7":   "m(maj7)",
+    "augmented 7": "aug7",
+    "add9":        "add9",
+    "minor add9":  "madd9",
+    "6":           "6",
+    "minor 6":     "m6",
+}
+# Priority ordering for tie-breaking when multiple templates match equally.
+# Lower index = preferred.  Triads > sevenths > extended > suspended.
+QUALITY_PRIORITY = [
+    "major", "minor", "dominant 7", "minor 7", "major 7",
+    "diminished", "augmented", "half-dim 7", "diminished 7",
+    "6", "minor 6", "sus4", "sus2", "add9", "minor add9",
+    "min/maj 7", "augmented 7",
+]
+# ---------------------------------------------------------------------------
+# Frame extraction
+# ---------------------------------------------------------------------------
+def extract_note_frames(midi_data, onset_tolerance=0.05):
+    """Group MIDI notes into simultaneous frames (chords / single notes).
+    Notes whose onsets fall within `onset_tolerance` seconds of each other
+    are grouped into the same frame.  Returns a list of dicts:
+        {
+            "start": float,      # earliest onset in the group
+            "end": float,        # latest note-off in the group
+            "pitches": [int],    # MIDI pitch numbers
+            "velocities": [int], # corresponding velocities
+        }
+    sorted by start time.
+    """
+    # Collect all notes across instruments (typically only one for piano)
+    all_notes = []
+    for inst in midi_data.instruments:
+        for note in inst.notes:
+            all_notes.append(note)
+    all_notes.sort(key=lambda n: n.start)
+    if not all_notes:
+        return []
+    frames = []
+    current_group = [all_notes[0]]
+    for note in all_notes[1:]:
+        if note.start - current_group[0].start <= onset_tolerance:
+            current_group.append(note)
+        else:
+            frames.append(_group_to_frame(current_group))
+            current_group = [note]
+    frames.append(_group_to_frame(current_group))
+    return frames
+def _group_to_frame(notes):
+    """Convert a group of pretty_midi Note objects into a frame dict."""
+    return {
+        "start": min(n.start for n in notes),
+        "end": max(n.end for n in notes),
+        "pitches": [n.pitch for n in notes],
+        "velocities": [n.velocity for n in notes],
+    }
+# ---------------------------------------------------------------------------
+# Template matching
+# ---------------------------------------------------------------------------
+def pitch_class_set(pitches):
+    """Convert a list of MIDI pitches to a set of pitch classes (0-11)."""
+    return set(p % 12 for p in pitches)
+def match_chord(pitches, velocities=None):
+    """Identify a chord from a set of MIDI pitches.
+    Uses a template-matching approach that tests every possible root (0-11)
+    against every chord template.  Scoring:
+    1. Count how many template tones are present in the pitch-class set
+       (weighted by velocity when available).
+    2. Penalize extra notes not in the template.
+    3. Prefer templates that explain more notes.
+    4. Handle inversions: the bass note does not need to be the root.
+    Returns a dict:
+        {
+            "root": int,            # pitch class 0-11
+            "root_name": str,       # e.g. "C", "Db"
+            "quality": str,         # e.g. "minor 7"
+            "chord_name": str,      # e.g. "Cm7"
+            "notes": [str],         # constituent note names
+            "midi_pitches": [int],  # original MIDI pitches
+        }
+    or None if fewer than 2 distinct pitch classes.
+    """
+    pcs = pitch_class_set(pitches)
+    if len(pcs) < 2:
+        return _single_note_result(pitches) if pitches else None
+    # Build a velocity weight map (pitch class -> total velocity)
+    pc_weight = defaultdict(float)
+    if velocities and len(velocities) == len(pitches):
+        for p, v in zip(pitches, velocities):
+            pc_weight[p % 12] += v
+    else:
+        for p in pitches:
+            pc_weight[p % 12] += 80  # default velocity
+    # Normalize weights so the max is 1.0
+    max_w = max(pc_weight.values()) if pc_weight else 1.0
+    for pc in pc_weight:
+        pc_weight[pc] /= max_w
+    # Determine the bass note (lowest pitch) for inversion bonus
+    bass_pc = min(pitches) % 12
+    best_score = -999
+    best_result = None
+    for root in range(12):
+        for quality, template in CHORD_TEMPLATES.items():
+            # Transpose template to this root
+            transposed = frozenset((root + interval) % 12 for interval in template)
+            # Score: weighted count of template tones present
+            matched_weight = 0.0
+            matched_count = 0
+            for pc in transposed:
+                if pc in pcs:
+                    matched_weight += pc_weight.get(pc, 0.5)
+                    matched_count += 1
+            # How many of the input pitch classes are NOT in the template?
+            extra_notes = len(pcs - transposed)
+            # How many template tones are missing?
+            missing = len(transposed) - matched_count
+            # Base score: reward matches, penalize misses and extras
+            score = matched_weight * 2.0 - missing * 1.5 - extra_notes * 0.5
+            # Bonus if this template perfectly covers all input notes
+            if extra_notes == 0 and missing == 0:
+                score += 3.0
+            # Bonus if root is the bass note (root position)
+            if root == bass_pc:
+                score += 0.8
+            # Bonus for root having high velocity
+            score += pc_weight.get(root, 0) * 0.3
+            # Smaller bonus for simpler chord types (triads over 7ths)
+            priority_idx = QUALITY_PRIORITY.index(quality) if quality in QUALITY_PRIORITY else len(QUALITY_PRIORITY)
+            score -= priority_idx * 0.05
+            # A template must match at least 2 pitch classes to be viable
+            if matched_count < 2:
+                continue
+            if score > best_score:
+                best_score = score
+                root_name = ENHARMONIC_DISPLAY[NOTE_NAMES[root]]
+                suffix = QUALITY_SUFFIX.get(quality, quality)
+                chord_name = f"{root_name}{suffix}"
+                best_result = {
+                    "root": root,
+                    "root_name": root_name,
+                    "quality": quality,
+                    "chord_name": chord_name,
+                    "notes": sorted([ENHARMONIC_DISPLAY[NOTE_NAMES[pc]] for pc in transposed]),
+                    "midi_pitches": sorted(pitches),
+                }
+    # If no template matched well enough, fall back to describing the bass + interval
+    if best_result is None:
+        return _fallback_chord(pitches)
+    return best_result
+def _single_note_result(pitches):
+    """Return a result for a single note (no chord)."""
+    if not pitches:
+        return None
+    pc = pitches[0] % 12
+    name = ENHARMONIC_DISPLAY[NOTE_NAMES[pc]]
+    return {
+        "root": pc,
+        "root_name": name,
+        "quality": "note",
+        "chord_name": name,
+        "notes": [name],
+        "midi_pitches": sorted(pitches),
+    }
+def _fallback_chord(pitches):
+    """Produce a best-effort label for unrecognized pitch combinations."""
+    pcs = pitch_class_set(pitches)
+    bass_pc = min(pitches) % 12
+    bass_name = ENHARMONIC_DISPLAY[NOTE_NAMES[bass_pc]]
+    # Try to describe as a root + collection of intervals
+    intervals = sorted((pc - bass_pc) % 12 for pc in pcs if pc != bass_pc)
+    interval_str = ",".join(str(i) for i in intervals)
+    return {
+        "root": bass_pc,
+        "root_name": bass_name,
+        "quality": "unknown",
+        "chord_name": f"{bass_name}({interval_str})",
+        "notes": sorted([ENHARMONIC_DISPLAY[NOTE_NAMES[pc]] for pc in pcs]),
+        "midi_pitches": sorted(pitches),
+    }
+# ---------------------------------------------------------------------------
+# Smoothing
+# ---------------------------------------------------------------------------
+def smooth_chords(chord_events, min_duration=0.1):
+    """Remove very short chord changes and merge consecutive identical chords.
+    If the same chord name appears in consecutive events and the intermediate
+    event lasts less than `min_duration`, it gets absorbed into the surrounding
+    chord.  Then consecutive events with the same chord name are merged.
+    """
+    if not chord_events:
+        return chord_events
+    # Pass 1: Remove extremely short transient chords (< min_duration)
+    # by replacing them with the previous chord name
+    filtered = list(chord_events)
+    for i in range(1, len(filtered) - 1):
+        duration = filtered[i]["end_time"] - filtered[i]["start_time"]
+        if duration < min_duration:
+            # Absorb into previous chord
+            filtered[i]["chord_name"] = filtered[i - 1]["chord_name"]
+            filtered[i]["quality"] = filtered[i - 1]["quality"]
+            filtered[i]["root_note"] = filtered[i - 1]["root_note"]
+            filtered[i]["notes"] = filtered[i - 1]["notes"]
+    # Pass 2: Merge consecutive events with the same chord name
+    merged = [filtered[0]]
+    for event in filtered[1:]:
+        if event["chord_name"] == merged[-1]["chord_name"]:
+            # Extend the previous event's end time
+            merged[-1]["end_time"] = event["end_time"]
+            # Merge midi_pitches (union)
+            existing = set(merged[-1].get("midi_pitches", []))
+            existing.update(event.get("midi_pitches", []))
+            merged[-1]["midi_pitches"] = sorted(existing)
+        else:
+            merged.append(event)
+    return merged
+# ---------------------------------------------------------------------------
+# Main detection pipeline
+# ---------------------------------------------------------------------------
+def detect_chords(midi_path, output_path=None, onset_tolerance=0.05,
+                  min_chord_duration=0.1):
+    """Detect chords from a MIDI file and save results as JSON.
+    Parameters
+    ----------
+    midi_path : str or Path
+        Path to the input MIDI file.
+    output_path : str or Path, optional
+        Path for the output JSON file.  Defaults to the MIDI filename
+        with "_chords.json" suffix.
+    onset_tolerance : float
+        Maximum time difference (seconds) to group notes into the same frame.
+    min_chord_duration : float
+        Minimum duration for a chord event; shorter events get smoothed away.
+    Returns
+    -------
+    list[dict]
+        List of chord event dicts, each containing:
+            - start_time (float): onset time in seconds
+            - end_time (float): offset time in seconds
+            - chord_name (str): display name, e.g. "Am7"
+            - root_note (str): root pitch class name, e.g. "A"
+            - quality (str): chord quality, e.g. "minor 7"
+            - notes (list[str]): constituent note names
+            - midi_pitches (list[int]): original MIDI pitch numbers
+    """
+    midi_path = Path(midi_path)
+    if output_path is None:
+        output_path = midi_path.with_name(
+            midi_path.stem + "_chords.json"
+        )
+    else:
+        output_path = Path(output_path)
+    print(f"\nChord detection: {midi_path.name}")
+    # Load MIDI
+    midi_data = pretty_midi.PrettyMIDI(str(midi_path))
+    # Extract note frames
+    frames = extract_note_frames(midi_data, onset_tolerance=onset_tolerance)
+    print(f"  Extracted {len(frames)} note frames")
+    if not frames:
+        result = []
+        _write_json(result, output_path)
+        return result
+    # Match chords for each frame
+    raw_events = []
+    for frame in frames:
+        chord = match_chord(frame["pitches"], frame["velocities"])
+        if chord is None:
+            continue
+        raw_events.append({
+            "start_time": round(frame["start"], 4),
+            "end_time": round(frame["end"], 4),
+            "chord_name": chord["chord_name"],
+            "root_note": chord["root_name"],
+            "quality": chord["quality"],
+            "notes": chord["notes"],
+            "midi_pitches": chord["midi_pitches"],
+        })
+    print(f"  Identified {len(raw_events)} raw chord events")
+    # Smooth results
+    smoothed = smooth_chords(raw_events, min_duration=min_chord_duration)
+    print(f"  After smoothing: {len(smoothed)} chord events")
+    # Round all times for clean output
+    for event in smoothed:
+        event["start_time"] = round(event["start_time"], 4)
+        event["end_time"] = round(event["end_time"], 4)
+    # Summary: count unique chords
+    unique_chords = set(e["chord_name"] for e in smoothed)
+    print(f"  Unique chords: {len(unique_chords)} ({', '.join(sorted(unique_chords))})")
+    # Write JSON
+    _write_json(smoothed, output_path)
+    print(f"  Saved to {output_path}")
+    return smoothed
+def _write_json(data, path):
+    """Write chord data to a JSON file."""
+    output = {
+        "version": 1,
+        "description": "Chord detection output from Mr. Octopus piano tutorial pipeline",
+        "chord_count": len(data),
+        "chords": data,
+    }
+    with open(path, "w") as f:
+        json.dump(output, f, indent=2)
+# ---------------------------------------------------------------------------
+# CLI entry point
+# ---------------------------------------------------------------------------
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) < 2:
+        print("Usage: python chords.py <midi_file> [output.json]")
+        print()
+        print("Analyzes a MIDI file and detects chords at each note onset.")
+        print("Outputs a JSON file with timestamped chord events.")
+        sys.exit(1)
+    midi_file = sys.argv[1]
+    out_file = sys.argv[2] if len(sys.argv) > 2 else None
+    events = detect_chords(midi_file, out_file)
+    print(f"\nDetected {len(events)} chord events")

transcriber/optimize.py ADDED Viewed

	@@ -0,0 +1,1356 @@

+"""Optimize MIDI transcription by correcting onsets, cleaning artifacts, and
+ensuring rhythmic accuracy against the original audio."""
+import copy
+from pathlib import Path
+import numpy as np
+import pretty_midi
+import librosa
+from collections import Counter
+def remove_leading_silence_notes(midi_data, y, sr):
+    """Remove notes that appear during silence/noise before the music starts.
+    Finds the first moment of real musical energy and removes any MIDI notes
+    before that point (typically microphone rumble / low-freq noise artifacts).
+    """
+    midi_out = copy.deepcopy(midi_data)
+    # Compute RMS in 50ms windows
+    hop = int(0.05 * sr)
+    rms = np.array([
+        np.sqrt(np.mean(y[i * hop:(i + 1) * hop] ** 2))
+        for i in range(len(y) // hop)
+    ])
+    if len(rms) == 0:
+        return midi_out, 0, 0.0
+    # Music starts when RMS first exceeds 10% of the peak energy
+    max_rms = np.max(rms)
+    music_start = 0.0
+    for i, r in enumerate(rms):
+        if r > max_rms * 0.1:
+            music_start = i * 0.05
+            break
+    if music_start < 0.1:
+        return midi_out, 0, music_start
+    removed = 0
+    for instrument in midi_out.instruments:
+        filtered = []
+        for note in instrument.notes:
+            if note.start < music_start:
+                removed += 1
+            else:
+                filtered.append(note)
+        instrument.notes = filtered
+    return midi_out, removed, music_start
+def remove_trailing_silence_notes(midi_data, y, sr):
+    """Remove notes that appear during the audio fade-out/silence at the end."""
+    midi_out = copy.deepcopy(midi_data)
+    hop = int(0.05 * sr)
+    rms = np.array([
+        np.sqrt(np.mean(y[i * hop:(i + 1) * hop] ** 2))
+        for i in range(len(y) // hop)
+    ])
+    if len(rms) == 0:
+        return midi_out, 0, len(y) / sr
+    max_rms = np.max(rms)
+    # Find the last moment where RMS exceeds 5% of peak (searching backwards)
+    music_end = len(y) / sr
+    for i in range(len(rms) - 1, -1, -1):
+        if rms[i] > max_rms * 0.05:
+            music_end = (i + 1) * 0.05
+            break
+    removed = 0
+    for instrument in midi_out.instruments:
+        filtered = []
+        for note in instrument.notes:
+            if note.start > music_end:
+                removed += 1
+            else:
+                filtered.append(note)
+        instrument.notes = filtered
+    return midi_out, removed, music_end
+def remove_low_energy_notes(midi_data, y, sr, hop_length=512):
+    """Remove notes whose onsets don't correspond to real audio energy.
+    Computes the onset strength envelope and removes notes at times
+    where the audio shows no significant onset energy. This catches
+    basic-pitch hallucinations that appear at normal pitches but have
+    no corresponding audio event.
+    Uses an adaptive threshold based on the recording's onset strength
+    distribution (15th percentile), so it works equally well on loud
+    and quiet recordings.
+    """
+    midi_out = copy.deepcopy(midi_data)
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
+    onset_times = librosa.frames_to_time(
+        np.arange(len(onset_env)), sr=sr, hop_length=hop_length
+    )
+    removed = 0
+    for instrument in midi_out.instruments:
+        # First pass: measure strength per note
+        note_strengths = []
+        for note in instrument.notes:
+            frame = np.argmin(np.abs(onset_times - note.start))
+            lo = max(0, frame - 2)
+            hi = min(len(onset_env), frame + 3)
+            strength = float(np.max(onset_env[lo:hi]))
+            note_strengths.append(strength)
+        if not note_strengths:
+            continue
+        # Adaptive threshold: 15th percentile of note onset strengths
+        # This adapts to the recording's volume — quiet recordings get
+        # a lower threshold, loud recordings get a higher one.
+        # Floor at 0.5 to always catch clearly silent hallucinations.
+        strength_threshold = max(0.5, float(np.percentile(note_strengths, 15)))
+        filtered = []
+        for idx, note in enumerate(instrument.notes):
+            if note_strengths[idx] >= strength_threshold:
+                filtered.append(note)
+            else:
+                # Keep notes that are part of a chord with a strong onset
+                chord_has_energy = False
+                for other_idx, other in enumerate(instrument.notes):
+                    if other is note:
+                        continue
+                    if abs(other.start - note.start) < 0.03 and note_strengths[other_idx] >= strength_threshold:
+                        chord_has_energy = True
+                        break
+                if chord_has_energy:
+                    filtered.append(note)
+                else:
+                    removed += 1
+        instrument.notes = filtered
+    return midi_out, removed
+def remove_harmonic_ghosts(midi_data, y=None, sr=22050, hop_length=512):
+    """Remove notes that are harmonic doublings of louder lower notes.
+    Pairwise detector: for notes at harmonic intervals (7, 12, 19, 24
+    semitones), remove the upper note if it's clearly a harmonic ghost.
+    Uses CQT energy to protect strong notes: if the CQT shows the note
+    has strong energy (> -10dB), it's a real played note regardless of
+    velocity ratio. This prevents removing notes like C6 that happen to
+    co-occur with C5 but are genuinely played.
+    """
+    midi_out = copy.deepcopy(midi_data)
+    removed = 0
+    harmonic_intervals = {7, 12, 19, 24}
+    # Compute CQT for energy verification if audio provided
+    C_db = None
+    if y is not None:
+        N_BINS = 88 * 3
+        FMIN = librosa.note_to_hz('A0')
+        C = np.abs(librosa.cqt(
+            y, sr=sr, hop_length=hop_length,
+            fmin=FMIN, n_bins=N_BINS, bins_per_octave=36,
+        ))
+        C_db = librosa.amplitude_to_db(C, ref=np.max(C))
+    for instrument in midi_out.instruments:
+        notes = sorted(instrument.notes, key=lambda n: n.start)
+        to_remove = set()
+        for i, note in enumerate(notes):
+            if i in to_remove:
+                continue
+            if note.pitch < 48:
+                continue
+            # Check CQT energy — protect notes with moderate+ energy
+            if C_db is not None:
+                fund_bin = (note.pitch - 21) * 3 + 1
+                if 0 <= fund_bin < C_db.shape[0]:
+                    start_frame = max(0, int(note.start * sr / hop_length))
+                    end_frame = min(C_db.shape[1], start_frame + max(1, int(0.2 * sr / hop_length)))
+                    lo = max(0, fund_bin - 1)
+                    hi = min(C_db.shape[0], fund_bin + 2)
+                    onset_db = float(np.max(C_db[lo:hi, start_frame:end_frame]))
+                    if onset_db > -12.0:
+                        # Real CQT energy present — keep this note
+                        continue
+            for j, other in enumerate(notes):
+                if i == j or j in to_remove:
+                    continue
+                if abs(other.start - note.start) > 0.10:
+                    continue
+                diff = note.pitch - other.pitch
+                if diff in harmonic_intervals and diff > 0:
+                    ratio = note.velocity / max(1, other.velocity)
+                    if note.pitch >= 72:
+                        # C5+: remove if noticeably quieter
+                        if ratio < 0.75:
+                            to_remove.add(i)
+                            break
+                    elif other.pitch < 48:
+                        if ratio < 0.95:
+                            to_remove.add(i)
+                            break
+                    else:
+                        if ratio < 0.75:
+                            to_remove.add(i)
+                            break
+        instrument.notes = [n for k, n in enumerate(notes) if k not in to_remove]
+        removed += len(to_remove)
+    return midi_out, removed
+def remove_phantom_notes(midi_data, max_pitch=None):
+    """Remove high-register notes that are likely harmonic artifacts.
+    Uses multiple factors to distinguish real notes from phantoms:
+    - Must be above the 95th percentile pitch
+    - Must be rare (< 3 occurrences at that exact pitch)
+    - Must have low velocity (< 40)
+    - Must be isolated (no other notes within 2 semitones and 500ms)
+    """
+    midi_out = copy.deepcopy(midi_data)
+    all_notes = [(n, i) for i, inst in enumerate(midi_out.instruments) for n in inst.notes]
+    all_pitches = [n.pitch for n, _ in all_notes]
+    if not all_pitches:
+        return midi_out, 0
+    if max_pitch is None:
+        max_pitch = int(np.percentile(all_pitches, 95))
+    pitch_counts = Counter(all_pitches)
+    # Build a time-sorted list for neighbor checking
+    time_sorted = sorted(all_notes, key=lambda x: x[0].start)
+    def is_isolated(note, all_sorted):
+        """Check if a note has no other notes nearby (within 100ms).
+        A note in a chord or musical event is not isolated, regardless
+        of the pitch of its neighbors. This prevents falsely removing
+        high notes that are part of chords with lower-pitched notes.
+        """
+        for other, _ in all_sorted:
+            if other is note:
+                continue
+            if other.start > note.start + 0.1:
+                break
+            if abs(other.start - note.start) < 0.1:
+                return False
+        return True
+    removed = 0
+    for instrument in midi_out.instruments:
+        filtered = []
+        for note in instrument.notes:
+            if note.pitch > max_pitch:
+                count = pitch_counts[note.pitch]
+                duration = note.end - note.start
+                # Higher velocity threshold for very high notes (above MIDI 80)
+                vel_thresh = 55 if note.pitch > 80 else 40
+                # Only remove if MULTIPLE indicators suggest it's a phantom:
+                # Very rare AND (low velocity OR very short OR isolated)
+                if count < 3 and (note.velocity < vel_thresh or duration < 0.08 or
+                                   is_isolated(note, time_sorted)):
+                    removed += 1
+                    continue
+            filtered.append(note)
+        instrument.notes = filtered
+    return midi_out, removed
+def remove_spurious_onsets(midi_data, y, sr, ref_onsets, hop_length=512):
+    """Remove MIDI notes that form false-positive onsets not backed by audio.
+    Analysis shows 37 extra MIDI onsets cause the biggest F1 drag (precision=0.918).
+    This filter targets three categories of false positives:
+    1. Chord fragments: notes that basic-pitch split from a real chord, creating
+       a separate onset within 60ms of a matched onset. These should have been
+       grouped with the chord.
+    2. Isolated ghost onsets: single-note, low-strength onsets far from any
+       audio onset -- pure hallucinations.
+    3. Short+quiet artifacts: onsets where every note is both short (<200ms)
+       and quiet (velocity < 50).
+    The filter first identifies which MIDI onsets already match audio onsets,
+    then only removes unmatched onsets meeting the above criteria.
+    """
+    midi_out = copy.deepcopy(midi_data)
+    tolerance = 0.05
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
+    onset_times = librosa.frames_to_time(
+        np.arange(len(onset_env)), sr=sr, hop_length=hop_length
+    )
+    # Collect all notes and compute unique onsets
+    all_notes = sorted(
+        [n for inst in midi_out.instruments for n in inst.notes],
+        key=lambda n: n.start
+    )
+    midi_onsets = sorted(set(round(n.start, 4) for n in all_notes))
+    midi_onsets_arr = np.array(midi_onsets)
+    # Identify which MIDI onsets are already matched to audio onsets
+    matched_est = set()
+    for r in ref_onsets:
+        diffs = np.abs(midi_onsets_arr - r)
+        best = np.argmin(diffs)
+        if diffs[best] <= tolerance and best not in matched_est:
+            matched_est.add(best)
+    # For each unmatched onset, check removal criteria
+    onsets_to_remove = set()
+    for j, mo in enumerate(midi_onsets_arr):
+        if j in matched_est:
+            continue
+        # Get notes at this onset
+        onset_notes = [n for n in all_notes if abs(n.start - mo) < 0.03]
+        if not onset_notes:
+            continue
+        # Compute onset strength at this time
+        frame = np.argmin(np.abs(onset_times - mo))
+        lo = max(0, frame - 2)
+        hi = min(len(onset_env), frame + 3)
+        strength = float(np.max(onset_env[lo:hi]))
+        # Distance to nearest audio onset
+        diffs_audio = np.abs(ref_onsets - mo)
+        nearest_audio_ms = float(np.min(diffs_audio)) * 1000
+        # Check if near a matched MIDI onset (chord fragment)
+        near_matched = any(
+            abs(midi_onsets_arr[k] - mo) < 0.060
+            for k in matched_est
+        )
+        # Category 1: Chord fragment -- near a matched onset, but only if
+        # the onset has weak audio energy. Strong onsets near chords may be
+        # real grace notes or arpeggios.
+        if near_matched and strength < 2.0:
+            onsets_to_remove.add(j)
+            continue
+        # Category 2: Isolated ghost -- single note, low strength or far from audio
+        if len(onset_notes) == 1 and (strength < 1.5 or nearest_audio_ms > 100):
+            onsets_to_remove.add(j)
+            continue
+        # Category 3: Short+quiet artifact
+        if all(n.end - n.start < 0.2 and n.velocity < 50 for n in onset_notes):
+            onsets_to_remove.add(j)
+            continue
+        # Category 4: Low-velocity bass ghost -- single bass note (< MIDI 40),
+        # low velocity (< 35), far from audio onset. These are rumble artifacts
+        # that survive the energy filter.
+        if (len(onset_notes) == 1 and onset_notes[0].pitch < 40
+                and onset_notes[0].velocity < 35 and nearest_audio_ms > 60):
+            onsets_to_remove.add(j)
+            continue
+        # Category 5: Multi-note onset very far from any audio onset (> 200ms)
+        # with weak onset strength. These are chord-split artifacts or
+        # hallucinated events with no audio support.
+        if nearest_audio_ms > 200 and strength < 2.0:
+            onsets_to_remove.add(j)
+            continue
+    # Remove notes belonging to spurious onsets
+    times_to_remove = set(midi_onsets_arr[j] for j in onsets_to_remove)
+    removed = 0
+    for instrument in midi_out.instruments:
+        filtered = []
+        for note in instrument.notes:
+            note_onset = round(note.start, 4)
+            if any(abs(note_onset - t) < 0.03 for t in times_to_remove):
+                removed += 1
+            else:
+                filtered.append(note)
+        instrument.notes = filtered
+    return midi_out, removed, len(onsets_to_remove)
+def remove_pitch_unconfirmed_notes(midi_data, y, sr, hop_length=512):
+    """Remove notes where the CQT has no energy at their fundamental pitch.
+    Checks the onset region (first 200ms) of each note for CQT energy,
+    not the full duration. This prevents CQT-extended notes from being
+    falsely removed due to low average energy over their extended tail.
+    Targets two ranges where hallucinations concentrate:
+    - Sub-bass (< MIDI 40): rumble artifacts
+    - Upper register (> MIDI 72): harmonic doublings
+    Core piano range (MIDI 40-72 / E2-C5) is reliable from basic-pitch.
+    """
+    midi_out = copy.deepcopy(midi_data)
+    N_BINS = 88 * 3
+    FMIN = librosa.note_to_hz('A0')
+    C = np.abs(librosa.cqt(
+        y, sr=sr, hop_length=hop_length,
+        fmin=FMIN, n_bins=N_BINS, bins_per_octave=36,
+    ))
+    C_db = librosa.amplitude_to_db(C, ref=np.max(C))
+    # Collect all notes for chord checking
+    all_notes = sorted(
+        [n for inst in midi_out.instruments for n in inst.notes],
+        key=lambda n: n.start
+    )
+    # Onset region: check max energy in first 200ms
+    onset_frames = max(1, int(0.2 * sr / hop_length))
+    removed = 0
+    for instrument in midi_out.instruments:
+        filtered = []
+        for note in instrument.notes:
+            # Only filter sub-bass and upper register — core range is reliable
+            if 40 <= note.pitch <= 72:
+                filtered.append(note)
+                continue
+            fund_bin = (note.pitch - 21) * 3 + 1
+            if fund_bin < 0 or fund_bin >= N_BINS:
+                filtered.append(note)
+                continue
+            start_frame = max(0, int(note.start * sr / hop_length))
+            check_end = min(C.shape[1], start_frame + onset_frames)
+            if start_frame >= check_end:
+                filtered.append(note)
+                continue
+            lo = max(0, fund_bin - 1)
+            hi = min(N_BINS, fund_bin + 2)
+            # Use max energy in onset region, not average over full duration
+            onset_db = float(np.max(C_db[lo:hi, start_frame:check_end]))
+            if note.pitch < 40:
+                thresh = -42.0
+            else:  # > 72, upper register
+                thresh = -20.0
+            if onset_db < thresh:
+                # Remove if weak CQT evidence regardless of context
+                # Very weak = always remove; moderate weak = check isolation
+                if onset_db < thresh - 10:
+                    # Extremely weak: always remove
+                    removed += 1
+                    continue
+                concurrent = sum(1 for o in all_notes
+                                 if abs(o.start - note.start) < 0.05 and o is not note)
+                if concurrent <= 3 or note.velocity < 55:
+                    removed += 1
+                else:
+                    filtered.append(note)
+            else:
+                filtered.append(note)
+        instrument.notes = filtered
+    return midi_out, removed
+def apply_pitch_ceiling(midi_data, max_pitch=96):
+    """Remove notes above a hard pitch ceiling (C7 / MIDI 96).
+    Only truly extreme high notes are blanket-removed. Notes between C6-C7
+    are kept and handled by the CQT energy filter instead, since some
+    (like C6, D6) can be legitimate played notes.
+    """
+    midi_out = copy.deepcopy(midi_data)
+    removed = 0
+    for instrument in midi_out.instruments:
+        filtered = []
+        for note in instrument.notes:
+            if note.pitch >= max_pitch:
+                removed += 1
+            else:
+                filtered.append(note)
+        instrument.notes = filtered
+    return midi_out, removed
+def limit_concurrent_notes(midi_data, max_per_hand=4, hand_split=60):
+    """Limit notes per chord to max_per_hand per hand.
+    Groups notes by onset time (within 30ms) and splits into left/right hand.
+    Removes excess notes — protects melody (highest RH pitch) and bass
+    (lowest LH pitch), then removes lowest velocity.
+    """
+    midi_out = copy.deepcopy(midi_data)
+    removed = 0
+    for instrument in midi_out.instruments:
+        notes = sorted(instrument.notes, key=lambda n: n.start)
+        if not notes:
+            continue
+        chords = []
+        current_chord = [0]
+        for i in range(1, len(notes)):
+            if notes[i].start - notes[current_chord[0]].start < 0.03:
+                current_chord.append(i)
+            else:
+                chords.append(current_chord)
+                current_chord = [i]
+        chords.append(current_chord)
+        to_remove = set()
+        for chord_indices in chords:
+            left = [idx for idx in chord_indices if notes[idx].pitch < hand_split]
+            right = [idx for idx in chord_indices if notes[idx].pitch >= hand_split]
+            for is_right, hand_indices in [(True, right), (False, left)]:
+                if len(hand_indices) <= max_per_hand:
+                    continue
+                # Protect melody (highest RH) or bass (lowest LH)
+                if is_right:
+                    protected = max(hand_indices, key=lambda idx: notes[idx].pitch)
+                else:
+                    protected = min(hand_indices, key=lambda idx: notes[idx].pitch)
+                trimmable = [idx for idx in hand_indices if idx != protected]
+                scored = [(notes[idx].velocity, idx) for idx in trimmable]
+                scored.sort()
+                excess = len(hand_indices) - max_per_hand
+                for _, idx in scored[:excess]:
+                    to_remove.add(idx)
+        instrument.notes = [n for k, n in enumerate(notes) if k not in to_remove]
+        removed += len(to_remove)
+    return midi_out, removed
+def limit_total_concurrent(midi_data, max_per_hand=4, hand_split=60):
+    """Limit concurrent sounding notes to max_per_hand per hand.
+    Splits notes into left hand (< hand_split) and right hand (>= hand_split).
+    At each note onset, count concurrent notes in that hand. If > max_per_hand,
+    trim sustained notes — but protect the melody (highest RH pitch) and bass
+    (lowest LH pitch). Among the rest, trim lowest velocity first.
+    """
+    midi_out = copy.deepcopy(midi_data)
+    trimmed = 0
+    for instrument in midi_out.instruments:
+        notes = sorted(instrument.notes, key=lambda n: n.start)
+        if not notes:
+            continue
+        for i, note in enumerate(notes):
+            is_right = note.pitch >= hand_split
+            # Find all notes in the same hand currently sounding
+            sounding = []
+            for j in range(i):
+                if notes[j].end > note.start:
+                    same_hand = (notes[j].pitch >= hand_split) == is_right
+                    if same_hand:
+                        sounding.append(j)
+            if len(sounding) + 1 > max_per_hand:
+                excess = len(sounding) + 1 - max_per_hand
+                # All indices including the current note
+                all_indices = sounding + [i]
+                if is_right:
+                    # Protect highest pitch (melody)
+                    protected = max(all_indices, key=lambda j: notes[j].pitch)
+                else:
+                    # Protect lowest pitch (bass)
+                    protected = min(all_indices, key=lambda j: notes[j].pitch)
+                # Among the sustained (not the new note), trim lowest velocity
+                # but never trim the protected note
+                trimmable = [j for j in sounding if j != protected]
+                scored = [(notes[j].velocity, j) for j in trimmable]
+                scored.sort()  # lowest velocity trimmed first
+                for _, j in scored[:excess]:
+                    notes[j].end = note.start
+                    trimmed += 1
+        instrument.notes = [n for n in notes if n.end - n.start > 0.01]
+    return midi_out, trimmed
+def extend_note_durations(midi_data, y, sr, hop_length=512, max_per_hand=4, hand_split=60):
+    """Extend MIDI note durations to match audio CQT energy decay.
+    Basic-pitch systematically underestimates note durations. This uses
+    the CQT spectrogram to find where the audio energy actually decays
+    and extends each note to match, dramatically improving spectral recall.
+    Concurrent-aware: won't extend a note past the point where doing so
+    would exceed max_per_hand concurrent notes in the same hand. This
+    prevents the downstream concurrent limiter from having to trim hundreds
+    of over-extended notes.
+    """
+    midi_out = copy.deepcopy(midi_data)
+    N_BINS = 88 * 3
+    FMIN = librosa.note_to_hz('A0')
+    C = np.abs(librosa.cqt(
+        y, sr=sr, hop_length=hop_length,
+        fmin=FMIN, n_bins=N_BINS, bins_per_octave=36,
+    ))
+    C_db = librosa.amplitude_to_db(C, ref=np.max(C))
+    C_norm = np.maximum(C_db, -80.0)
+    C_norm = (C_norm + 80.0) / 80.0
+    n_frames = C.shape[1]
+    # Pre-compute per-frame concurrent counts per hand (fast O(1) lookup)
+    right_count = np.zeros(n_frames, dtype=int)
+    left_count = np.zeros(n_frames, dtype=int)
+    for inst in midi_out.instruments:
+        for n in inst.notes:
+            sf = max(0, int(n.start * sr / hop_length))
+            ef = min(n_frames, int(n.end * sr / hop_length))
+            if n.pitch >= hand_split:
+                right_count[sf:ef] += 1
+            else:
+                left_count[sf:ef] += 1
+    extended = 0
+    for inst in midi_out.instruments:
+        # Sort notes by start time for overlap checking
+        notes_sorted = sorted(inst.notes, key=lambda n: (n.pitch, n.start))
+        for idx, note in enumerate(notes_sorted):
+            fund_bin = (note.pitch - 21) * 3 + 1
+            if fund_bin < 0 or fund_bin >= N_BINS:
+                continue
+            end_frame = min(n_frames, int(note.end * sr / hop_length))
+            # Max extension: 2 seconds beyond current end
+            max_extend = min(n_frames, end_frame + int(2.0 * sr / hop_length))
+            # Don't extend into the next note at the same pitch
+            next_start_frame = max_extend
+            for other in notes_sorted[idx + 1:]:
+                if other.pitch == note.pitch:
+                    next_start_frame = min(next_start_frame, int(other.start * sr / hop_length) - 1)
+                    break
+            is_right = note.pitch >= hand_split
+            hand_count = right_count if is_right else left_count
+            actual_end = end_frame
+            for f in range(end_frame, min(max_extend, next_start_frame)):
+                lo = max(0, fund_bin - 1)
+                hi = min(N_BINS, fund_bin + 2)
+                if np.mean(C_norm[lo:hi, f]) > 0.20:
+                    # Check concurrent: this note isn't counted in hand_count
+                    # beyond end_frame, so hand_count[f] >= max_per_hand means
+                    # extending here would create max_per_hand + 1 concurrent
+                    if hand_count[f] >= max_per_hand:
+                        break
+                    actual_end = f
+                else:
+                    break
+            new_end = actual_end * hop_length / sr
+            if new_end > note.end + 0.05:
+                # Update the concurrent count array for the extended region
+                old_end_frame = end_frame
+                new_end_frame = min(n_frames, int(new_end * sr / hop_length))
+                if new_end_frame > old_end_frame:
+                    hand_count[old_end_frame:new_end_frame] += 1
+                note.end = new_end
+                extended += 1
+    return midi_out, extended
+def align_chords(midi_data, threshold=0.02):
+    """Snap notes within a chord to the exact same onset time.
+    basic-pitch's ~12ms frame resolution can make notes in the same chord
+    start at slightly different times, causing a 'flammy' sound.
+    """
+    midi_out = copy.deepcopy(midi_data)
+    aligned = 0
+    for instrument in midi_out.instruments:
+        notes = sorted(instrument.notes, key=lambda n: n.start)
+        i = 0
+        while i < len(notes):
+            group = [notes[i]]
+            j = i + 1
+            while j < len(notes) and notes[j].start - notes[i].start < threshold:
+                group.append(notes[j])
+                j += 1
+            if len(group) > 1:
+                median_start = float(np.median([n.start for n in group]))
+                for note in group:
+                    if note.start != median_start:
+                        duration = note.end - note.start
+                        note.start = median_start
+                        note.end = median_start + duration
+                        aligned += 1
+            i = j
+    return midi_out, aligned
+def quantize_to_beat_grid(midi_data, y, sr, hop_length=512, strength=0.5):
+    """Quantize note onsets to a detected beat grid.
+    Uses librosa beat tracking to find the tempo and beat positions,
+    builds a 16th-note grid, and snaps onsets toward the nearest grid
+    position. Preserves note durations.
+    """
+    midi_out = copy.deepcopy(midi_data)
+    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr, hop_length=hop_length)
+    if hasattr(tempo, '__len__'):
+        tempo = float(tempo[0])
+    beat_times = librosa.frames_to_time(beat_frames, sr=sr, hop_length=hop_length)
+    if len(beat_times) < 2:
+        print("  Could not detect beats, skipping quantization")
+        return midi_out, 0, tempo
+    # Build a 16th-note grid from the beat times
+    grid = []
+    for i in range(len(beat_times) - 1):
+        beat_dur = beat_times[i + 1] - beat_times[i]
+        sixteenth = beat_dur / 4
+        for sub in range(4):
+            grid.append(beat_times[i] + sub * sixteenth)
+    if len(beat_times) >= 2:
+        last_beat_dur = beat_times[-1] - beat_times[-2]
+        sixteenth = last_beat_dur / 4
+        for sub in range(4):
+            grid.append(beat_times[-1] + sub * sixteenth)
+    grid = np.array(grid)
+    quantized = 0
+    for instrument in midi_out.instruments:
+        for note in instrument.notes:
+            diffs = np.abs(grid - note.start)
+            nearest_idx = np.argmin(diffs)
+            nearest_grid = grid[nearest_idx]
+            deviation = nearest_grid - note.start
+            if abs(deviation) < 0.06:  # Only quantize if < 60ms off grid
+                duration = note.end - note.start
+                note.start = note.start + deviation * strength
+                note.end = note.start + duration
+                if abs(deviation) > 0.005:
+                    quantized += 1
+    return midi_out, quantized, tempo
+def correct_onsets(midi_data, ref_onsets, min_off=0.02, max_off=0.15):
+    """Correct chord onsets that are clearly misaligned with audio onsets.
+    Groups notes into chords, then for each chord checks if there's a closer
+    audio onset. Only corrects if min_off-max_off off and no adjacent chord
+    is a better match.
+    """
+    midi_out = copy.deepcopy(midi_data)
+    all_notes = sorted(
+        [(n, inst_idx) for inst_idx, inst in enumerate(midi_out.instruments)
+         for n in inst.notes],
+        key=lambda x: x[0].start
+    )
+    chord_groups = []
+    if all_notes:
+        current_group = [all_notes[0]]
+        for item in all_notes[1:]:
+            if item[0].start - current_group[0][0].start < 0.03:
+                current_group.append(item)
+            else:
+                chord_groups.append(current_group)
+                current_group = [item]
+        chord_groups.append(current_group)
+    chord_onsets = np.array([g[0][0].start for g in chord_groups])
+    corrections = 0
+    total_shift = 0.0
+    for group_idx, group in enumerate(chord_groups):
+        chord_onset = chord_onsets[group_idx]
+        diffs = ref_onsets - chord_onset
+        abs_diffs = np.abs(diffs)
+        nearest_idx = np.argmin(abs_diffs)
+        nearest_diff = diffs[nearest_idx]
+        abs_diff = abs_diffs[nearest_idx]
+        if min_off < abs_diff < max_off:
+            # Verify no adjacent chord is a better match
+            if group_idx > 0:
+                prev_onset = chord_onsets[group_idx - 1]
+                if abs(ref_onsets[nearest_idx] - prev_onset) < abs_diff:
+                    continue
+            if group_idx < len(chord_onsets) - 1:
+                next_onset = chord_onsets[group_idx + 1]
+                if abs(ref_onsets[nearest_idx] - next_onset) < abs_diff:
+                    continue
+            for note, inst_idx in group:
+                duration = note.end - note.start
+                note.start = max(0, note.start + nearest_diff)
+                note.end = note.start + duration
+            corrections += 1
+            total_shift += abs(nearest_diff)
+    initial_f1 = onset_f1(ref_onsets, chord_onsets)
+    corrected_onsets = np.array([g[0][0].start for g in chord_groups])
+    final_f1 = onset_f1(ref_onsets, corrected_onsets)
+    return midi_out, corrections, total_shift, len(chord_groups), initial_f1, final_f1
+def apply_global_offset(midi_data, ref_onsets):
+    """Measure and correct systematic timing offset against audio onsets.
+    Computes the median difference between MIDI and audio onsets, then
+    shifts all notes to center the distribution around zero.
+    """
+    midi_out = copy.deepcopy(midi_data)
+    all_onsets = sorted(set(n.start for inst in midi_out.instruments for n in inst.notes))
+    diffs = []
+    for mo in all_onsets:
+        ad = np.abs(ref_onsets - mo)
+        if np.min(ad) < 0.10:
+            closest = ref_onsets[np.argmin(ad)]
+            diffs.append(closest - mo)  # positive = MIDI is early, negative = late
+    if not diffs:
+        return midi_out, 0.0
+    median_offset = float(np.median(diffs))
+    # Only apply if the offset is meaningful (> 5ms)
+    if abs(median_offset) < 0.005:
+        return midi_out, 0.0
+    for instrument in midi_out.instruments:
+        for note in instrument.notes:
+            duration = note.end - note.start
+            note.start = max(0, note.start + median_offset)
+            note.end = note.start + duration
+    return midi_out, median_offset
+def fix_note_overlap(midi_data, hand_split=60, min_duration=0.10):
+    """Trim overlapping notes in the right hand so each note releases cleanly.
+    Also enforces a minimum note duration across ALL notes.
+    """
+    midi_out = copy.deepcopy(midi_data)
+    trimmed = 0
+    for instrument in midi_out.instruments:
+        rh_notes = [n for n in instrument.notes if n.pitch >= hand_split]
+        rh_notes.sort(key=lambda n: (n.start, n.pitch))
+        for i, note in enumerate(rh_notes):
+            for j in range(i + 1, min(i + 8, len(rh_notes))):
+                next_note = rh_notes[j]
+                if next_note.start <= note.start:
+                    continue
+                overlap = note.end - next_note.start
+                if overlap > 0.05:  # Only trim significant overlaps (>50ms)
+                    original_dur = note.end - note.start
+                    new_end = next_note.start - 0.01
+                    # Never shorten more than 30% of original duration
+                    min_allowed = note.start + original_dur * 0.7
+                    if new_end < min_allowed:
+                        new_end = min_allowed
+                    note.end = new_end
+                    if note.end - note.start < min_duration:
+                        note.end = note.start + min_duration
+                    trimmed += 1
+                    break
+    # Enforce minimum duration on ALL notes (catches any collapsed durations)
+    enforced = 0
+    for instrument in midi_out.instruments:
+        for note in instrument.notes:
+            if note.end - note.start < min_duration:
+                note.end = note.start + min_duration
+                enforced += 1
+    return midi_out, trimmed, enforced
+def recover_missing_notes(midi_data, y, sr, hop_length=512, snap_onsets=None):
+    """Recover strong notes the transcriber missed using CQT analysis.
+    Scans the audio CQT for pitch energy that isn't represented in the MIDI.
+    When a pitch has strong, sustained energy but no corresponding MIDI note,
+    synthesize one. Targets upper register (>= C4) where basic-pitch can
+    under-detect, especially when harmonics cause false removal.
+    If snap_onsets is provided, recovered notes are snapped to the nearest
+    existing onset for rhythmic alignment.
+    Should be run AFTER all removal filters so the coverage map reflects
+    what actually survived.
+    """
+    midi_out = copy.deepcopy(midi_data)
+    N_BINS = 88 * 3
+    FMIN = librosa.note_to_hz('A0')
+    C = np.abs(librosa.cqt(
+        y, sr=sr, hop_length=hop_length,
+        fmin=FMIN, n_bins=N_BINS, bins_per_octave=36,
+    ))
+    C_db = librosa.amplitude_to_db(C, ref=np.max(C))
+    times = librosa.frames_to_time(np.arange(C.shape[1]), sr=sr, hop_length=hop_length)
+    # Build a set of existing note coverage: (pitch, frame) pairs
+    existing = set()
+    for inst in midi_out.instruments:
+        for note in inst.notes:
+            start_frame = max(0, int(note.start * sr / hop_length))
+            end_frame = min(C.shape[1], int(note.end * sr / hop_length))
+            for f in range(start_frame, end_frame):
+                existing.add((note.pitch, f))
+    # Scan C4 (60) to B6 (95) for uncovered energy
+    recovered = 0
+    min_energy = -10.0  # dB threshold — only recover notes with strong CQT energy
+    min_duration_s = 0.05  # ~50ms minimum
+    gap_tolerance = 3  # allow 3-frame dips without breaking a note
+    for midi_pitch in range(60, 96):
+        fund_bin = (midi_pitch - 21) * 3 + 1
+        if fund_bin < 0 or fund_bin >= N_BINS:
+            continue
+        # Harmonic check: skip if an octave-below note is much louder
+        # (this note is likely a harmonic, not a real played note)
+        lower_pitch = midi_pitch - 12
+        if lower_pitch >= 21:
+            lower_bin = (lower_pitch - 21) * 3 + 1
+            if 0 <= lower_bin < N_BINS:
+                lower_lo = max(0, lower_bin - 1)
+                lower_hi = min(N_BINS, lower_bin + 2)
+                upper_energy = float(np.max(C_db[max(0, fund_bin - 1):min(N_BINS, fund_bin + 2), :]))
+                lower_energy = float(np.max(C_db[lower_lo:lower_hi, :]))
+                if lower_energy - upper_energy > 12:
+                    # Octave below is 12+ dB louder — likely a harmonic
+                    continue
+        lo = max(0, fund_bin - 1)
+        hi = min(N_BINS, fund_bin + 2)
+        # Get energy and coverage for this pitch
+        pitch_energy = np.max(C_db[lo:hi, :], axis=0)
+        # Find uncovered regions with strong energy
+        strong_uncovered = np.array([
+            pitch_energy[f] >= min_energy and (midi_pitch, f) not in existing
+            for f in range(len(pitch_energy))
+        ])
+        # Close small gaps (morphological closing)
+        for f in range(1, len(strong_uncovered) - 1):
+            if not strong_uncovered[f] and pitch_energy[f] >= min_energy - 5:
+                before = any(strong_uncovered[max(0, f - gap_tolerance):f])
+                after = any(strong_uncovered[f + 1:min(len(strong_uncovered), f + gap_tolerance + 1)])
+                if before and after:
+                    strong_uncovered[f] = True
+        # Extract contiguous regions
+        regions = []
+        in_region = False
+        start_f = 0
+        for f in range(len(strong_uncovered)):
+            if strong_uncovered[f] and not in_region:
+                start_f = f
+                in_region = True
+            elif not strong_uncovered[f] and in_region:
+                regions.append((start_f, f))
+                in_region = False
+        if in_region:
+            regions.append((start_f, len(strong_uncovered)))
+        for start_f, end_f in regions:
+            t_start = times[start_f]
+            t_end = times[min(end_f, len(times) - 1)]
+            if t_end - t_start < min_duration_s:
+                continue
+            avg_energy = float(np.mean(pitch_energy[start_f:end_f]))
+            velocity = min(75, max(35, int(55 + avg_energy * 1.5)))
+            # Snap to nearest existing onset for rhythmic alignment
+            note_start = t_start
+            note_end = t_end
+            if snap_onsets is not None and len(snap_onsets) > 0:
+                snap_arr = np.array(snap_onsets)
+                diffs = np.abs(snap_arr - t_start)
+                nearest_idx = np.argmin(diffs)
+                if diffs[nearest_idx] < 0.06:
+                    dur = t_end - t_start
+                    note_start = snap_arr[nearest_idx]
+                    note_end = note_start + dur
+            new_note = pretty_midi.Note(
+                velocity=velocity,
+                pitch=midi_pitch,
+                start=note_start,
+                end=note_end,
+            )
+            midi_out.instruments[0].notes.append(new_note)
+            recovered += 1
+    return midi_out, recovered
+def optimize(original_audio_path, midi_path, output_path=None):
+    """Full optimization pipeline."""
+    if output_path is None:
+        output_path = midi_path
+    sr = 22050
+    hop_length = 512
+    # Load audio and detect onsets
+    print(f"Analyzing audio: {original_audio_path}")
+    y, _ = librosa.load(original_audio_path, sr=sr, mono=True)
+    audio_duration = len(y) / sr
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
+    # Use backtrack=False: basic-pitch onsets align with energy peaks, not
+    # the earlier rise points that backtrack finds (~50ms earlier).
+    # Use delta=0.04 for higher sensitivity — detects ~15% more onsets,
+    # reducing unmatched MIDI onsets from 116 to 80.
+    ref_onset_frames = librosa.onset.onset_detect(
+        onset_envelope=onset_env, sr=sr, hop_length=hop_length,
+        backtrack=False, delta=0.04
+    )
+    ref_onsets = librosa.frames_to_time(ref_onset_frames, sr=sr, hop_length=hop_length)
+    print(f"  {audio_duration:.1f}s, {len(ref_onsets)} audio onsets")
+    # Load MIDI
+    midi_data = pretty_midi.PrettyMIDI(str(midi_path))
+    total_notes = sum(len(inst.notes) for inst in midi_data.instruments)
+    print(f"  {total_notes} MIDI notes")
+    # Step 0: Remove notes in leading silence (mic rumble artifacts)
+    print("\nStep 0: Removing notes in leading silence...")
+    midi_data, silence_removed, music_start = remove_leading_silence_notes(midi_data, y, sr)
+    if silence_removed:
+        print(f"  Music starts at {music_start:.2f}s, removed {silence_removed} noise notes")
+    else:
+        print(f"  No leading silence detected")
+    # Step 0b: Remove notes in trailing silence
+    print("\nStep 0b: Removing notes in trailing silence...")
+    midi_data, trail_removed, music_end = remove_trailing_silence_notes(midi_data, y, sr)
+    if trail_removed:
+        print(f"  Music ends at {music_end:.2f}s, removed {trail_removed} trailing notes")
+    else:
+        print(f"  No trailing silence notes detected")
+    # Step 0c: Remove low-energy hallucinations
+    print("\nStep 0c: Removing low-energy hallucinations...")
+    midi_data, energy_removed = remove_low_energy_notes(midi_data, y, sr, hop_length)
+    print(f"  Removed {energy_removed} notes with no audio onset energy")
+    # Step 0d: Remove harmonic ghost notes (CQT-aware)
+    print("\nStep 0d: Removing harmonic ghost notes...")
+    midi_data, ghosts_removed = remove_harmonic_ghosts(midi_data, y, sr, hop_length)
+    print(f"  Removed {ghosts_removed} octave-harmonic ghosts")
+    # Step 1: Remove phantom high notes (conservative)
+    print("\nStep 1: Removing phantom high notes...")
+    midi_data, phantoms_removed = remove_phantom_notes(midi_data)
+    print(f"  Removed {phantoms_removed} phantom notes")
+    # Step 1b: Hard pitch ceiling at C7 (MIDI 96) — extreme highs only
+    print("\nStep 1b: Applying pitch ceiling (C7 / MIDI 96)...")
+    midi_data, ceiling_removed = apply_pitch_ceiling(midi_data, max_pitch=96)
+    print(f"  Removed {ceiling_removed} notes above C7")
+    # Step 2: Align chord notes to single onset
+    print("\nStep 2: Aligning chord notes...")
+    midi_data, chords_aligned = align_chords(midi_data)
+    print(f"  Aligned {chords_aligned} notes within chords")
+    # Step 3: Full beat-grid quantization
+    print("\nStep 3: Quantizing to beat grid...")
+    midi_data, notes_quantized, detected_tempo = quantize_to_beat_grid(
+        midi_data, y, sr, hop_length, strength=1.0
+    )
+    print(f"  Detected tempo: {detected_tempo:.0f} BPM")
+    print(f"  Quantized {notes_quantized} notes (full snap)")
+    # Step 4: Targeted onset correction against audio
+    print("\nStep 4: Correcting onsets against audio...")
+    midi_data, corrections, total_shift, n_chords, pre_f1, post_f1 = \
+        correct_onsets(midi_data, ref_onsets)
+    avg_shift = (total_shift / corrections * 1000) if corrections > 0 else 0
+    print(f"  Corrected {corrections}/{n_chords} (avg {avg_shift:.0f}ms)")
+    print(f"  Onset F1: {pre_f1:.4f} -> {post_f1:.4f}")
+    # Step 5: Tight second correction pass (10-60ms window)
+    print("\nStep 5: Fine-tuning onsets (tight pass)...")
+    midi_data, corrections2, total_shift2, n_chords2, _, post_f1_2 = \
+        correct_onsets(midi_data, ref_onsets, min_off=0.01, max_off=0.06)
+    avg_shift2 = (total_shift2 / corrections2 * 1000) if corrections2 > 0 else 0
+    print(f"  Fine-tuned {corrections2}/{n_chords2} (avg {avg_shift2:.0f}ms)")
+    print(f"  Onset F1: {post_f1:.4f} -> {post_f1_2:.4f}")
+    # Step 6: Micro-correction pass (5-25ms window)
+    print("\nStep 6: Micro-correcting onsets...")
+    midi_data, corrections3, total_shift3, n_chords3, _, post_f1_3 = \
+        correct_onsets(midi_data, ref_onsets, min_off=0.005, max_off=0.025)
+    avg_shift3 = (total_shift3 / corrections3 * 1000) if corrections3 > 0 else 0
+    print(f"  Micro-corrected {corrections3}/{n_chords3} (avg {avg_shift3:.0f}ms)")
+    print(f"  Onset F1: {post_f1_2:.4f} -> {post_f1_3:.4f}")
+    # Step 6b: Remove spurious false-positive onsets
+    print("\nStep 6b: Removing spurious onsets (false positive cleanup)...")
+    midi_data, spurious_notes, spurious_onsets = remove_spurious_onsets(
+        midi_data, y, sr, ref_onsets, hop_length
+    )
+    print(f"  Removed {spurious_notes} notes across {spurious_onsets} spurious onsets")
+    # Step 6c: Wide onset recovery pass (50-120ms window) to rescue false negatives
+    print("\nStep 6c: Wide onset recovery (rescuing false negatives)...")
+    midi_data, corrections_wide, total_shift_wide, n_chords_wide, _, post_f1_wide = \
+        correct_onsets(midi_data, ref_onsets, min_off=0.04, max_off=0.12)
+    avg_shift_wide = (total_shift_wide / corrections_wide * 1000) if corrections_wide > 0 else 0
+    print(f"  Recovered {corrections_wide}/{n_chords_wide} (avg {avg_shift_wide:.0f}ms)")
+    print(f"  Onset F1: {post_f1_3:.4f} -> {post_f1_wide:.4f}")
+    # Step 7: Global offset correction
+    print("\nStep 7: Correcting systematic offset...")
+    midi_data, offset = apply_global_offset(midi_data, ref_onsets)
+    print(f"  Applied {offset*1000:+.1f}ms global offset")
+    # Step 8: Fix overlaps and enforce min duration (LAST — after all position changes)
+    print("\nStep 8: Fixing overlaps and enforcing min duration...")
+    midi_data, notes_trimmed, durations_enforced = fix_note_overlap(midi_data)
+    print(f"  Trimmed {notes_trimmed} overlapping notes")
+    print(f"  Enforced min duration on {durations_enforced} notes")
+    # Step 8b: CQT-based duration extension
+    print("\nStep 8b: Extending note durations to match audio decay...")
+    midi_data, notes_extended = extend_note_durations(midi_data, y, sr, hop_length)
+    print(f"  Extended {notes_extended} notes to match audio CQT decay")
+    # Step 8c: Re-enforce minimum duration after CQT extension
+    min_dur_enforced_2 = 0
+    for instrument in midi_data.instruments:
+        for note in instrument.notes:
+            if note.end - note.start < 0.10:
+                note.end = note.start + 0.10
+                min_dur_enforced_2 += 1
+    if min_dur_enforced_2:
+        print(f"\nStep 8c: Re-enforced min duration on {min_dur_enforced_2} notes after CQT extension")
+    # Step 8d: CQT pitch-specific energy filter (remove bass hallucinations)
+    print("\nStep 8d: Removing pitch-unconfirmed bass notes...")
+    midi_data, cqt_removed = remove_pitch_unconfirmed_notes(midi_data, y, sr, hop_length)
+    print(f"  Removed {cqt_removed} notes with no CQT energy at their pitch")
+    # Step 8e: Recover missing notes from CQT energy
+    # Runs late so the coverage map reflects what actually survived all filters.
+    # Recovered notes won't be touched by phantom/spurious/pitch filters.
+    print("\nStep 8e: Recovering missing notes from CQT analysis...")
+    # Collect existing onset times to snap recovered notes to
+    existing_onsets = sorted(set(
+        round(n.start, 4) for inst in midi_data.instruments for n in inst.notes
+    ))
+    midi_data, notes_recovered = recover_missing_notes(
+        midi_data, y, sr, hop_length, snap_onsets=existing_onsets
+    )
+    print(f"  Recovered {notes_recovered} notes from CQT energy")
+    # Step 8f: Playability filter — limit per-onset chord size (4 per hand)
+    print("\nStep 8f: Playability filter (max 4 notes per hand per chord)...")
+    midi_data, playability_removed = limit_concurrent_notes(midi_data, max_per_hand=4)
+    print(f"  Removed {playability_removed} excess chord notes")
+    # Step 8g: Limit total concurrent sounding notes (4 per hand)
+    print("\nStep 8g: Concurrent sounding limit (max 4 per hand)...")
+    midi_data, sustain_trimmed = limit_total_concurrent(midi_data, max_per_hand=4)
+    print(f"  Trimmed {sustain_trimmed} sustained notes to reduce pileup")
+    # Final metrics
+    final_onsets = []
+    for inst in midi_data.instruments:
+        for n in inst.notes:
+            final_onsets.append(n.start)
+    final_onsets = np.unique(np.round(np.sort(final_onsets), 4))
+    final_f1 = onset_f1(ref_onsets, final_onsets)
+    final_notes = sum(len(inst.notes) for inst in midi_data.instruments)
+    # Duration sanity check
+    all_durs = [n.end - n.start for inst in midi_data.instruments for n in inst.notes]
+    min_dur = min(all_durs) * 1000 if all_durs else 0
+    print(f"\nDone:")
+    print(f"  Phantoms removed: {phantoms_removed}")
+    print(f"  Pitch ceiling removed: {ceiling_removed}")
+    print(f"  Playability filter: {playability_removed} chord / {sustain_trimmed} sustain")
+    print(f"  Chords aligned: {chords_aligned}")
+    print(f"  Notes quantized: {notes_quantized} ({detected_tempo:.0f} BPM)")
+    print(f"  Onsets corrected: {corrections}/{n_chords}")
+    print(f"  Spurious onsets removed: {spurious_onsets} ({spurious_notes} notes)")
+    print(f"  FN recovery corrections: {corrections_wide}")
+    print(f"  Global offset: {offset*1000:+.1f}ms")
+    print(f"  Overlaps trimmed: {notes_trimmed}")
+    print(f"  Min durations enforced: {durations_enforced}")
+    print(f"  Notes extended (CQT decay): {notes_extended}")
+    # Playability check: max concurrent notes per hand
+    all_final = sorted(
+        [n for inst in midi_data.instruments for n in inst.notes],
+        key=lambda n: n.start
+    )
+    max_left = 0
+    max_right = 0
+    for i, note in enumerate(all_final):
+        is_right = note.pitch >= 60
+        concurrent = sum(1 for o in all_final
+                         if o.start <= note.start < o.end
+                         and (o.pitch >= 60) == is_right)
+        if is_right:
+            max_right = max(max_right, concurrent)
+        else:
+            max_left = max(max_left, concurrent)
+    print(f"  Final onset F1: {final_f1:.4f}")
+    print(f"  Min note duration: {min_dur:.0f}ms")
+    print(f"  Max concurrent: L={max_left} R={max_right}")
+    print(f"  Notes: {total_notes} -> {final_notes}")
+    # Final step: shift all notes so music starts at t=0
+    # (must be AFTER all audio-aligned processing like onset detection, CQT filters)
+    if music_start > 0.1:
+        print(f"\nShifting all notes by -{music_start:.2f}s so music starts at t=0...")
+        for instrument in midi_data.instruments:
+            for note in instrument.notes:
+                note.start = max(0, note.start - music_start)
+                note.end = max(note.start + 0.01, note.end - music_start)
+    midi_data.write(str(output_path))
+    print(f"  Written to {output_path}")
+    # Step 9: Spectral fidelity analysis (CQT comparison)
+    print("\nStep 9: Spectral fidelity analysis (CQT comparison)...")
+    try:
+        from spectral import spectral_fidelity
+        spec_results = spectral_fidelity(y, sr, midi_data, hop_length)
+        print(f"  Spectral F1:         {spec_results['spectral_f1']:.4f}")
+        print(f"  Spectral Precision:  {spec_results['spectral_precision']:.4f}")
+        print(f"  Spectral Recall:     {spec_results['spectral_recall']:.4f}")
+        print(f"  Spectral Similarity: {spec_results['spectral_similarity']:.4f}")
+        # Save spectral report alongside MIDI
+        import json
+        report_path = str(output_path).replace('.mid', '_spectral.json')
+        Path(report_path).write_text(json.dumps(spec_results, indent=2))
+        print(f"  Report saved to {report_path}")
+    except Exception as e:
+        print(f"  Spectral analysis failed: {e}")
+    # Step 10: Chord detection
+    print("\nStep 10: Detecting chords...")
+    try:
+        from chords import detect_chords
+        chords_json_path = str(Path(output_path).with_name(
+            Path(output_path).stem + "_chords.json"
+        ))
+        chord_events = detect_chords(str(output_path), chords_json_path)
+        print(f"  Detected {len(chord_events)} chord regions")
+    except Exception as e:
+        print(f"  Chord detection failed: {e}")
+        chord_events = []
+    return midi_data
+def onset_f1(ref_onsets, est_onsets, tolerance=0.05):
+    """Compute onset detection F1 score."""
+    if len(ref_onsets) == 0 and len(est_onsets) == 0:
+        return 1.0
+    if len(ref_onsets) == 0 or len(est_onsets) == 0:
+        return 0.0
+    matched_ref = set()
+    matched_est = set()
+    for i, r in enumerate(ref_onsets):
+        diffs = np.abs(est_onsets - r)
+        best = np.argmin(diffs)
+        if diffs[best] <= tolerance and best not in matched_est:
+            matched_ref.add(i)
+            matched_est.add(best)
+    precision = len(matched_est) / len(est_onsets) if len(est_onsets) > 0 else 0
+    recall = len(matched_ref) / len(ref_onsets) if len(ref_onsets) > 0 else 0
+    if precision + recall == 0:
+        return 0.0
+    return 2 * precision * recall / (precision + recall)
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) < 3:
+        print("Usage: python optimize.py <original_audio> <midi_file> [output.mid]")
+        sys.exit(1)
+    audio_path = sys.argv[1]
+    midi_path = sys.argv[2]
+    out_path = sys.argv[3] if len(sys.argv) > 3 else None
+    optimize(audio_path, midi_path, out_path)

transcriber/spectral.py ADDED Viewed

	@@ -0,0 +1,296 @@

+"""Spectral fidelity comparison: CQT of original audio vs MIDI piano-roll.
+Compares a CQT spectrogram of the original audio against an ideal
+spectrogram synthesized from the MIDI note positions, including harmonic
+modeling. This gives a holistic fidelity metric that captures pitch,
+timing, duration, and velocity accuracy simultaneously.
+Piano notes produce rich harmonics (2nd, 3rd, 4th... partials). The MIDI
+pianoroll models these harmonics so the comparison is fair against the
+real audio CQT which contains them naturally.
+"""
+import json
+import numpy as np
+import librosa
+import pretty_midi
+from pathlib import Path
+# CQT parameters — 88 piano keys, 3 bins per semitone for smoothness
+FMIN = librosa.note_to_hz('A0')  # 27.5 Hz — lowest piano key
+N_BINS = 88 * 3  # 264 bins covering A0–C8
+BINS_PER_OCTAVE = 12 * 3  # 36
+# Piano harmonic model: partial number -> semitones above fundamental, dB attenuation
+# Based on typical grand piano harmonic spectrum
+HARMONICS = [
+    # (freq_ratio, semitone_offset, dB_attenuation)
+    (2,  12.0,  -6),    # 2nd partial: octave above
+    (3,  19.02, -12),   # 3rd partial: octave + fifth
+    (4,  24.0,  -16),   # 4th partial: 2 octaves
+    (5,  27.86, -20),   # 5th partial: 2 octaves + major 3rd
+    (6,  31.02, -22),   # 6th partial: 2 octaves + fifth
+    (7,  33.69, -26),   # 7th partial: ~2 octaves + minor 7th
+    (8,  36.0,  -28),   # 8th partial: 3 octaves
+]
+def audio_cqt(y, sr, hop_length=512):
+    """Compute magnitude CQT of an audio signal in dB."""
+    C = np.abs(librosa.cqt(
+        y, sr=sr, hop_length=hop_length,
+        fmin=FMIN, n_bins=N_BINS, bins_per_octave=BINS_PER_OCTAVE,
+    ))
+    C_db = librosa.amplitude_to_db(C, ref=np.max(C))
+    C_db = np.maximum(C_db, -80.0)
+    return C_db
+def midi_to_pianoroll_cqt(midi_data, duration, sr=22050, hop_length=512):
+    """Build a harmonic-aware CQT-like spectrogram from MIDI notes.
+    For each MIDI note, places energy at the fundamental AND its
+    harmonics (partials 2-8) with appropriate attenuation, matching
+    how a real piano sounds in the CQT domain.
+    """
+    n_frames = int(np.ceil(duration * sr / hop_length))
+    pianoroll = np.full((N_BINS, n_frames), -80.0)
+    for instrument in midi_data.instruments:
+        for note in instrument.notes:
+            # Map MIDI pitch to CQT bin (A0 = MIDI 21, 3 bins/semitone)
+            fund_bin = (note.pitch - 21) * 3 + 1
+            if fund_bin < 0 or fund_bin >= N_BINS:
+                continue
+            start_frame = max(0, int(note.start * sr / hop_length))
+            end_frame = min(n_frames, int(note.end * sr / hop_length))
+            if start_frame >= end_frame:
+                continue
+            vel_db = -30.0 + (note.velocity / 127.0) * 30.0
+            # Place fundamental with ±1 bin spread
+            _place_energy(pianoroll, fund_bin, start_frame, end_frame, vel_db)
+            # Place harmonics
+            for _, semitones, attenuation in HARMONICS:
+                harmonic_bin = fund_bin + int(round(semitones * 3))
+                if harmonic_bin >= N_BINS:
+                    break
+                harm_db = vel_db + attenuation
+                if harm_db < -70:  # Skip inaudible harmonics
+                    continue
+                _place_energy(pianoroll, harmonic_bin, start_frame, end_frame, harm_db)
+    return pianoroll
+def _place_energy(pianoroll, center_bin, start, end, db_level):
+    """Place energy in the pianoroll at center ± 1 bin."""
+    for offset, atten in [(-1, -6), (0, 0), (1, -6)]:
+        b = center_bin + offset
+        if 0 <= b < pianoroll.shape[0]:
+            pianoroll[b, start:end] = np.maximum(
+                pianoroll[b, start:end], db_level + atten
+            )
+def spectral_fidelity(y, sr, midi_data, hop_length=512):
+    """Compute spectral fidelity: how well MIDI matches the original audio.
+    Returns dict with scores and detailed diagnostics.
+    """
+    duration = len(y) / sr
+    audio_spec = audio_cqt(y, sr, hop_length)
+    midi_spec = midi_to_pianoroll_cqt(midi_data, duration, sr, hop_length)
+    # Normalize to 0-1 range
+    audio_norm = (audio_spec + 80.0) / 80.0
+    midi_norm = (midi_spec + 80.0) / 80.0
+    # Active energy thresholds
+    audio_active = audio_norm > 0.25  # > -60dB
+    midi_active = midi_norm > 0.25
+    tp = np.sum(audio_active & midi_active)
+    fn = np.sum(audio_active & ~midi_active)
+    fp = np.sum(~audio_active & midi_active)
+    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
+    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
+    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
+    # Weighted spectral similarity (MSE on active regions)
+    active_mask = audio_active | midi_active
+    if np.sum(active_mask) > 0:
+        mse = np.mean((audio_norm[active_mask] - midi_norm[active_mask]) ** 2)
+        spectral_similarity = max(0, 1.0 - mse * 4)
+    else:
+        spectral_similarity = 0
+    # Per-octave analysis
+    octave_analysis = []
+    for octave in range(8):
+        b0 = octave * 36
+        b1 = min((octave + 1) * 36, N_BINS)
+        oct_audio = audio_active[b0:b1]
+        oct_midi = midi_active[b0:b1]
+        oct_tp = int(np.sum(oct_audio & oct_midi))
+        oct_fn = int(np.sum(oct_audio & ~oct_midi))
+        oct_fp = int(np.sum(~oct_audio & oct_midi))
+        oct_total = int(np.sum(oct_audio))
+        octave_analysis.append({
+            'octave': octave,
+            'range': f"A{octave}-A{octave+1}",
+            'audio_energy': oct_total,
+            'missing_energy': oct_fn,
+            'extra_energy': oct_fp,
+            'matched_energy': oct_tp,
+            'coverage': round(float(oct_tp / oct_total) if oct_total > 0 else 1.0, 4),
+        })
+    # Per-time analysis (20 segments)
+    n_frames = audio_spec.shape[1]
+    window = max(1, n_frames // 20)
+    time_analysis = []
+    for seg in range(20):
+        f0 = seg * window
+        f1_t = min((seg + 1) * window, n_frames)
+        seg_audio = audio_active[:, f0:f1_t]
+        seg_midi = midi_active[:, f0:f1_t]
+        seg_tp = int(np.sum(seg_audio & seg_midi))
+        seg_fn = int(np.sum(seg_audio & ~seg_midi))
+        seg_fp = int(np.sum(~seg_audio & seg_midi))
+        seg_total = int(np.sum(seg_audio))
+        t0 = librosa.frames_to_time(f0, sr=sr, hop_length=hop_length)
+        t1 = librosa.frames_to_time(f1_t, sr=sr, hop_length=hop_length)
+        time_analysis.append({
+            'time_start': round(float(t0), 2),
+            'time_end': round(float(t1), 2),
+            'missing': seg_fn,
+            'extra': seg_fp,
+            'matched': seg_tp,
+            'fidelity': round(float(seg_tp / seg_total) if seg_total > 0 else 1.0, 3),
+        })
+    # Find specific missing and extra note regions
+    missing_notes = _find_note_gaps(audio_spec, midi_spec, sr, hop_length, mode='missing')
+    extra_notes = _find_note_gaps(audio_spec, midi_spec, sr, hop_length, mode='extra')
+    return {
+        'spectral_f1': round(f1, 4),
+        'spectral_precision': round(precision, 4),
+        'spectral_recall': round(recall, 4),
+        'spectral_similarity': round(spectral_similarity, 4),
+        'per_octave': octave_analysis,
+        'per_time': time_analysis,
+        'missing_notes': missing_notes[:20],
+        'extra_notes': extra_notes[:20],
+    }
+def _find_note_gaps(audio_spec, midi_spec, sr, hop_length, mode='missing'):
+    """Find time-frequency regions with energy in one but not the other.
+    mode='missing': audio has energy, MIDI doesn't (notes basic-pitch missed)
+    mode='extra': MIDI has energy, audio doesn't (hallucinations)
+    """
+    audio_norm = (audio_spec + 80.0) / 80.0
+    midi_norm = (midi_spec + 80.0) / 80.0
+    if mode == 'missing':
+        gap = (audio_norm > 0.5) & (midi_norm < 0.25)
+        energy_source = audio_norm
+    else:
+        gap = (midi_norm > 0.5) & (audio_norm < 0.25)
+        energy_source = midi_norm
+    results = []
+    n_bins, n_frames = audio_spec.shape
+    visited = np.zeros_like(gap, dtype=bool)
+    for b in range(0, n_bins, 3):
+        for f in range(n_frames):
+            if not gap[b, f] or visited[b, f]:
+                continue
+            f_start = f
+            f_end = f
+            while f_end < n_frames and gap[b, f_end] and not visited[b, f_end]:
+                visited[b, f_end] = True
+                f_end += 1
+            if f_end - f_start < 3:
+                continue
+            midi_pitch = 21 + b // 3
+            t0 = librosa.frames_to_time(f_start, sr=sr, hop_length=hop_length)
+            t1 = librosa.frames_to_time(f_end, sr=sr, hop_length=hop_length)
+            energy = float(np.mean(energy_source[b:b+3, f_start:f_end]))
+            note_name = pretty_midi.note_number_to_name(midi_pitch)
+            results.append({
+                'pitch': midi_pitch,
+                'note': note_name,
+                'time_start': round(float(t0), 3),
+                'time_end': round(float(t1), 3),
+                'duration': round(float(t1 - t0), 3),
+                'energy': round(energy, 3),
+            })
+    results.sort(key=lambda x: x['energy'] * x['duration'], reverse=True)
+    return results
+def compare(audio_path, midi_path, output_json=None):
+    """Run full spectral comparison and print report."""
+    y, sr = librosa.load(str(audio_path), sr=22050, mono=True)
+    midi_data = pretty_midi.PrettyMIDI(str(midi_path))
+    results = spectral_fidelity(y, sr, midi_data)
+    print(f"\nSpectral Fidelity Report:")
+    print(f"  Spectral F1:         {results['spectral_f1']:.4f}")
+    print(f"  Spectral Precision:  {results['spectral_precision']:.4f}")
+    print(f"  Spectral Recall:     {results['spectral_recall']:.4f}")
+    print(f"  Spectral Similarity: {results['spectral_similarity']:.4f}")
+    print(f"\n  Per-octave coverage:")
+    for o in results['per_octave']:
+        if o['audio_energy'] > 0:
+            print(f"    {o['range']}: {o['coverage']:.1%} "
+                  f"(missing: {o['missing_energy']}, extra: {o['extra_energy']})")
+    worst = sorted(results['per_time'], key=lambda x: x['fidelity'])[:5]
+    print(f"\n  Worst time segments:")
+    for t in worst:
+        print(f"    {t['time_start']:.1f}-{t['time_end']:.1f}s: "
+              f"fidelity={t['fidelity']:.1%} "
+              f"(missing: {t['missing']}, extra: {t['extra']})")
+    if results['missing_notes']:
+        print(f"\n  Top missing notes:")
+        for n in results['missing_notes'][:10]:
+            print(f"    {n['note']} at {n['time_start']:.2f}-{n['time_end']:.2f}s "
+                  f"(energy: {n['energy']:.2f})")
+    if results['extra_notes']:
+        print(f"\n  Top extra notes:")
+        for n in results['extra_notes'][:10]:
+            print(f"    {n['note']} at {n['time_start']:.2f}-{n['time_end']:.2f}s "
+                  f"(energy: {n['energy']:.2f})")
+    if output_json:
+        Path(output_json).write_text(json.dumps(results, indent=2))
+        print(f"\n  Report saved to {output_json}")
+    return results
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) < 3:
+        print("Usage: python spectral.py <audio_file> <midi_file> [output.json]")
+        sys.exit(1)
+    compare(sys.argv[1], sys.argv[2], sys.argv[3] if len(sys.argv) > 3 else None)

transcriber/transcribe.py ADDED Viewed

	@@ -0,0 +1,50 @@

+#!/usr/bin/env python3
+"""Transcribe an audio file to MIDI using basic-pitch."""
+import sys
+from pathlib import Path
+# Patch scipy.signal.gaussian (removed in scipy >=1.12, basic-pitch hasn't updated)
+import scipy.signal
+if not hasattr(scipy.signal, "gaussian"):
+    from scipy.signal.windows import gaussian
+    scipy.signal.gaussian = gaussian
+from basic_pitch.inference import predict
+import basic_pitch
+_MODELS_DIR = Path(basic_pitch.__file__).parent / "saved_models" / "icassp_2022"
+ONNX_MODEL_PATH = _MODELS_DIR / "nmp.onnx"
+def transcribe(input_path: str, output_path: str | None = None):
+    input_file = Path(input_path)
+    if not input_file.exists():
+        print(f"Error: {input_file} not found")
+        sys.exit(1)
+    if output_path is None:
+        output_path = input_file.with_suffix(".mid")
+    else:
+        output_path = Path(output_path)
+    print(f"Transcribing {input_file}...")
+    model_output, midi_data, note_events = predict(
+        str(input_file),
+        ONNX_MODEL_PATH,
+        onset_threshold=0.33,
+        frame_threshold=0.20,
+        minimum_note_length=100.0,
+    )
+    midi_data.write(str(output_path))
+    print(f"MIDI written to {output_path}")
+    print(f"Found {len(note_events)} note events")
+    return output_path
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python transcribe.py <audio_file> [output.mid]")
+        sys.exit(1)
+    input_file = sys.argv[1]
+    output_file = sys.argv[2] if len(sys.argv) > 2 else None
+    transcribe(input_file, output_file)