Initial HF Space deployment
Browse files- Dockerfile +30 -0
- README.md +18 -4
- debug_audio.py +42 -0
- main.py +200 -0
- requirements.txt +11 -0
- services/__pycache__/audio_separator.cpython-310.pyc +0 -0
- services/__pycache__/audio_separator.cpython-312.pyc +0 -0
- services/__pycache__/beat_tracker.cpython-310.pyc +0 -0
- services/__pycache__/chord_analyzer.cpython-310.pyc +0 -0
- services/__pycache__/chord_analyzer.cpython-312.pyc +0 -0
- services/__pycache__/midi_converter.cpython-310.pyc +0 -0
- services/audio_separator.py +233 -0
- services/beat_tracker.py +37 -0
- services/chord_analyzer.py +178 -0
- services/midi_converter.py +61 -0
Dockerfile
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use Python 3.10 with CUDA support for GPU
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
# Set working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies
|
| 8 |
+
RUN apt-get update && apt-get install -y \
|
| 9 |
+
ffmpeg \
|
| 10 |
+
libsndfile1 \
|
| 11 |
+
git \
|
| 12 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
+
|
| 14 |
+
# Copy requirements first for caching
|
| 15 |
+
COPY requirements.txt .
|
| 16 |
+
|
| 17 |
+
# Install Python dependencies
|
| 18 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 19 |
+
|
| 20 |
+
# Copy application code
|
| 21 |
+
COPY . .
|
| 22 |
+
|
| 23 |
+
# Create directories for uploads and processed files
|
| 24 |
+
RUN mkdir -p uploads processed
|
| 25 |
+
|
| 26 |
+
# Expose port 7860 (HF Spaces default)
|
| 27 |
+
EXPOSE 7860
|
| 28 |
+
|
| 29 |
+
# Run the application
|
| 30 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,11 +1,25 @@
|
|
| 1 |
---
|
| 2 |
title: Tunebase
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
---
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Tunebase
|
| 3 |
+
emoji: 🎵
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# Tunebase - AI Audio Stem Separation
|
| 12 |
+
|
| 13 |
+
High-fidelity stem separation for musicians using Demucs.
|
| 14 |
+
|
| 15 |
+
## Features
|
| 16 |
+
|
| 17 |
+
- 2-stem (Vocals + Instruments)
|
| 18 |
+
- 4-stem (Vocals, Drums, Bass, Other)
|
| 19 |
+
- 6-stem (+ Guitar, Piano split)
|
| 20 |
+
|
| 21 |
+
## API Endpoints
|
| 22 |
+
|
| 23 |
+
- `POST /upload` - Upload audio file
|
| 24 |
+
- `POST /process/{file_id}?mode=4stem` - Process with specified mode
|
| 25 |
+
- `GET /status/{file_id}` - Check processing status
|
debug_audio.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torchaudio
|
| 3 |
+
import soundfile
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
print(f"Torch version: {torch.__version__}")
|
| 7 |
+
print(f"Torchaudio version: {torchaudio.__version__}")
|
| 8 |
+
print(f"Soundfile version: {soundfile.__version__}")
|
| 9 |
+
|
| 10 |
+
print("\nChecking available backends:")
|
| 11 |
+
try:
|
| 12 |
+
print(f"List audio backends: {torchaudio.list_audio_backends()}")
|
| 13 |
+
except:
|
| 14 |
+
print("torchaudio.list_audio_backends() not available")
|
| 15 |
+
|
| 16 |
+
try:
|
| 17 |
+
print(f"Get audio backend: {torchaudio.get_audio_backend()}")
|
| 18 |
+
except:
|
| 19 |
+
pass
|
| 20 |
+
|
| 21 |
+
print("\nTest writing and reading:")
|
| 22 |
+
test_file = "test_audio.wav"
|
| 23 |
+
try:
|
| 24 |
+
# Generate dummy audio
|
| 25 |
+
waveform = torch.rand(1, 16000)
|
| 26 |
+
sample_rate = 16000
|
| 27 |
+
|
| 28 |
+
print(f"Saving {test_file} with backend='soundfile'...")
|
| 29 |
+
torchaudio.save(test_file, waveform, sample_rate, backend="soundfile")
|
| 30 |
+
print("Save success.")
|
| 31 |
+
|
| 32 |
+
print(f"Loading {test_file} with backend='soundfile'...")
|
| 33 |
+
loaded_wav, loaded_sr = torchaudio.load(test_file, backend="soundfile")
|
| 34 |
+
print(f"Load success. Shape: {loaded_wav.shape}")
|
| 35 |
+
|
| 36 |
+
except Exception as e:
|
| 37 |
+
import traceback
|
| 38 |
+
traceback.print_exc()
|
| 39 |
+
print(f"Test failed: {e}")
|
| 40 |
+
finally:
|
| 41 |
+
if os.path.exists(test_file):
|
| 42 |
+
os.remove(test_file)
|
main.py
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from fastapi.staticfiles import StaticFiles
|
| 4 |
+
import shutil
|
| 5 |
+
import os
|
| 6 |
+
import uuid
|
| 7 |
+
from services.audio_separator import AudioSeparator
|
| 8 |
+
from services.chord_analyzer import ChordAnalyzer
|
| 9 |
+
from services.midi_converter import MidiConverter
|
| 10 |
+
from services.beat_tracker import BeatTracker
|
| 11 |
+
|
| 12 |
+
app = FastAPI(title="Tunebase AI Engine", description="High-performance audio separation for Math Rock", version="1.0.0")
|
| 13 |
+
|
| 14 |
+
# Setup CORS
|
| 15 |
+
origins = ["*"] # Allow all for dev
|
| 16 |
+
app.add_middleware(
|
| 17 |
+
CORSMiddleware,
|
| 18 |
+
allow_origins=origins,
|
| 19 |
+
allow_credentials=True,
|
| 20 |
+
allow_methods=["*"],
|
| 21 |
+
allow_headers=["*"],
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
# Directories
|
| 25 |
+
UPLOAD_DIR = "uploads"
|
| 26 |
+
PROCESSED_DIR = "processed"
|
| 27 |
+
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
| 28 |
+
os.makedirs(PROCESSED_DIR, exist_ok=True)
|
| 29 |
+
|
| 30 |
+
# Mount statis files agar frontend bisa play hasil audio
|
| 31 |
+
app.mount("/processed", StaticFiles(directory=PROCESSED_DIR), name="processed")
|
| 32 |
+
# Mount uploads for verification if needed, but risky. Processed is enough.
|
| 33 |
+
|
| 34 |
+
# Initialize Services
|
| 35 |
+
# Warning: Loading models takes time and RAM.
|
| 36 |
+
try:
|
| 37 |
+
separator = AudioSeparator()
|
| 38 |
+
analyzer = ChordAnalyzer()
|
| 39 |
+
midi_converter = MidiConverter()
|
| 40 |
+
beat_tracker = BeatTracker()
|
| 41 |
+
except Exception as e:
|
| 42 |
+
print(f"Warning: Failed to load models on startup. {e}")
|
| 43 |
+
import traceback
|
| 44 |
+
traceback.print_exc()
|
| 45 |
+
separator = None
|
| 46 |
+
analyzer = None
|
| 47 |
+
midi_converter = None
|
| 48 |
+
beat_tracker = None
|
| 49 |
+
|
| 50 |
+
@app.get("/")
|
| 51 |
+
def read_root():
|
| 52 |
+
return {"message": "Tunebase AI Engine Ready 🎸"}
|
| 53 |
+
|
| 54 |
+
@app.post("/upload")
|
| 55 |
+
async def upload_audio(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
|
| 56 |
+
"""
|
| 57 |
+
Upload file audio dan mulai proses separasi di background.
|
| 58 |
+
"""
|
| 59 |
+
file_id = str(uuid.uuid4())
|
| 60 |
+
file_ext = file.filename.split(".")[-1]
|
| 61 |
+
file_path = os.path.join(UPLOAD_DIR, f"{file_id}.{file_ext}")
|
| 62 |
+
|
| 63 |
+
with open(file_path, "wb") as buffer:
|
| 64 |
+
shutil.copyfileobj(file.file, buffer)
|
| 65 |
+
|
| 66 |
+
return {"id": file_id, "filename": file.filename, "status": "uploaded"}
|
| 67 |
+
|
| 68 |
+
# Cleanup Task
|
| 69 |
+
import time
|
| 70 |
+
import shutil
|
| 71 |
+
|
| 72 |
+
def cleanup_old_files():
|
| 73 |
+
"""
|
| 74 |
+
Menghapus file di folder PROCESSED_DIR yang lebih tua dari 1 jam
|
| 75 |
+
untuk menghemat disk space di Hugging Face Spaces.
|
| 76 |
+
"""
|
| 77 |
+
print("Running cleanup task...")
|
| 78 |
+
now = time.time()
|
| 79 |
+
cutoff = now - 3600 # 1 hour (3600 seconds)
|
| 80 |
+
|
| 81 |
+
for folder in os.listdir(PROCESSED_DIR):
|
| 82 |
+
folder_path = os.path.join(PROCESSED_DIR, folder)
|
| 83 |
+
if os.path.isdir(folder_path):
|
| 84 |
+
try:
|
| 85 |
+
# Check creation/modify time
|
| 86 |
+
mtime = os.path.getmtime(folder_path)
|
| 87 |
+
if mtime < cutoff:
|
| 88 |
+
print(f"Deleting old session: {folder}")
|
| 89 |
+
shutil.rmtree(folder_path)
|
| 90 |
+
except Exception as e:
|
| 91 |
+
print(f"Error cleaning {folder}: {e}")
|
| 92 |
+
|
| 93 |
+
# Global Progress Store (Simple in-memory)
|
| 94 |
+
processing_status = {}
|
| 95 |
+
|
| 96 |
+
@app.get("/status/{file_id}")
|
| 97 |
+
async def get_status(file_id: str):
|
| 98 |
+
return processing_status.get(file_id, {"status": "unknown", "progress": 0, "step": "Waiting"})
|
| 99 |
+
|
| 100 |
+
def update_progress(file_id, step, progress, status="processing", data=None):
|
| 101 |
+
state = {
|
| 102 |
+
"status": status,
|
| 103 |
+
"progress": progress,
|
| 104 |
+
"step": step
|
| 105 |
+
}
|
| 106 |
+
if data:
|
| 107 |
+
state.update(data)
|
| 108 |
+
processing_status[file_id] = state
|
| 109 |
+
|
| 110 |
+
@app.post("/process/{file_id}")
|
| 111 |
+
def process_audio(file_id: str, background_tasks: BackgroundTasks, mode: str = "4stem"):
|
| 112 |
+
"""
|
| 113 |
+
Trigger separasi dan analisis.
|
| 114 |
+
Modes: 2stem (vocals+instruments), 4stem (default), 6stem (full)
|
| 115 |
+
"""
|
| 116 |
+
if not separator:
|
| 117 |
+
raise HTTPException(status_code=503, detail="AI Engine not initialized")
|
| 118 |
+
|
| 119 |
+
# Cari file
|
| 120 |
+
input_path = None
|
| 121 |
+
# Simple search
|
| 122 |
+
for f in os.listdir(UPLOAD_DIR):
|
| 123 |
+
if f.startswith(file_id):
|
| 124 |
+
input_path = os.path.join(UPLOAD_DIR, f)
|
| 125 |
+
break
|
| 126 |
+
|
| 127 |
+
if not input_path:
|
| 128 |
+
raise HTTPException(status_code=404, detail="File not found")
|
| 129 |
+
|
| 130 |
+
output_dir = os.path.join(PROCESSED_DIR, file_id)
|
| 131 |
+
|
| 132 |
+
# Dynamic base URL for HF Space vs localhost
|
| 133 |
+
space_host = os.environ.get("SPACE_HOST")
|
| 134 |
+
if space_host:
|
| 135 |
+
base_url = f"https://{space_host}/processed"
|
| 136 |
+
else:
|
| 137 |
+
base_url = "http://localhost:8000/processed"
|
| 138 |
+
|
| 139 |
+
try:
|
| 140 |
+
update_progress(file_id, "Separating Audio Stems...", 10)
|
| 141 |
+
|
| 142 |
+
# 1. Separate Audio with specified mode
|
| 143 |
+
def progress_cb(step, prog):
|
| 144 |
+
update_progress(file_id, step, prog)
|
| 145 |
+
|
| 146 |
+
stems = separator.separate(input_path, output_dir, callback=progress_cb, mode=mode)
|
| 147 |
+
|
| 148 |
+
update_progress(file_id, "Analyzing Rhythm...", 50)
|
| 149 |
+
|
| 150 |
+
# 3. MIDI & Beat Analysis
|
| 151 |
+
update_progress(file_id, "Converting to MIDI...", 70)
|
| 152 |
+
midi_files = {}
|
| 153 |
+
if midi_converter:
|
| 154 |
+
# Convert separate stems to MIDI (e.g., Piano, Bass, Guitar)
|
| 155 |
+
# Basic Pitch works best on monophonic/polyphonic instruments, less on drums
|
| 156 |
+
target_stems = ['piano', 'bass', 'guitar_rhythm', 'guitar_lead', 'vocals']
|
| 157 |
+
for stem_name in target_stems:
|
| 158 |
+
if stem_name in stems:
|
| 159 |
+
stem_path = stems[stem_name]
|
| 160 |
+
midi_out = os.path.join(output_dir, f"{stem_name}.mid")
|
| 161 |
+
if midi_converter.convert(stem_path, midi_out):
|
| 162 |
+
midi_files[stem_name] = f"{base_url}/{file_id}/{stem_name}.mid"
|
| 163 |
+
|
| 164 |
+
update_progress(file_id, "Analyzing Rhythm...", 85)
|
| 165 |
+
bpm = 0
|
| 166 |
+
beats = []
|
| 167 |
+
if beat_tracker:
|
| 168 |
+
# Use Drums for beat tracking if available, otherwise 'other' or input
|
| 169 |
+
beat_source = stems.get('drums') or stems.get('other') or input_path
|
| 170 |
+
rhythm_data = beat_tracker.track(beat_source)
|
| 171 |
+
bpm = rhythm_data['bpm']
|
| 172 |
+
beats = rhythm_data['beats']
|
| 173 |
+
|
| 174 |
+
update_progress(file_id, "Finalizing...", 95)
|
| 175 |
+
|
| 176 |
+
update_progress(file_id, "Finalizing...", 95)
|
| 177 |
+
|
| 178 |
+
# Construct full URLs for frontend
|
| 179 |
+
stems_url = {k: f"{base_url}/{file_id}/{os.path.basename(v)}" for k, v in stems.items()}
|
| 180 |
+
|
| 181 |
+
final_data = {
|
| 182 |
+
"stems": stems_url,
|
| 183 |
+
"midi": midi_files,
|
| 184 |
+
"bpm": bpm,
|
| 185 |
+
"beats": beats
|
| 186 |
+
}
|
| 187 |
+
print(f"Final Data for {file_id}: {final_data}") # Debug
|
| 188 |
+
update_progress(file_id, "Completed", 100, status="completed", data=final_data)
|
| 189 |
+
|
| 190 |
+
return {
|
| 191 |
+
"status": "completed",
|
| 192 |
+
**final_data
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
except Exception as e:
|
| 196 |
+
import traceback
|
| 197 |
+
traceback.print_exc()
|
| 198 |
+
processing_status[file_id] = {"status": "error", "error": str(e)}
|
| 199 |
+
print(f"Error processing: {e}")
|
| 200 |
+
raise HTTPException(status_code=500, detail=str(e))
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
python-multipart
|
| 4 |
+
torch
|
| 5 |
+
torchaudio
|
| 6 |
+
demucs
|
| 7 |
+
librosa
|
| 8 |
+
numpy
|
| 9 |
+
scipy
|
| 10 |
+
soundfile
|
| 11 |
+
basic-pitch
|
services/__pycache__/audio_separator.cpython-310.pyc
ADDED
|
Binary file (5.41 kB). View file
|
|
|
services/__pycache__/audio_separator.cpython-312.pyc
ADDED
|
Binary file (4.71 kB). View file
|
|
|
services/__pycache__/beat_tracker.cpython-310.pyc
ADDED
|
Binary file (1.37 kB). View file
|
|
|
services/__pycache__/chord_analyzer.cpython-310.pyc
ADDED
|
Binary file (3.85 kB). View file
|
|
|
services/__pycache__/chord_analyzer.cpython-312.pyc
ADDED
|
Binary file (4.32 kB). View file
|
|
|
services/__pycache__/midi_converter.cpython-310.pyc
ADDED
|
Binary file (1.68 kB). View file
|
|
|
services/audio_separator.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torchaudio
|
| 3 |
+
|
| 4 |
+
# Force backend to soundfile to avoid TorchCodec requirement on Windows
|
| 5 |
+
try:
|
| 6 |
+
torchaudio.set_audio_backend("soundfile")
|
| 7 |
+
except:
|
| 8 |
+
pass # In newer versions it might be automatic or different, but this is a safe attempt
|
| 9 |
+
|
| 10 |
+
from demucs.apply import apply_model
|
| 11 |
+
from demucs.pretrained import get_model
|
| 12 |
+
import os
|
| 13 |
+
import pathlib
|
| 14 |
+
|
| 15 |
+
# Konfigurasi Model
|
| 16 |
+
# Support multiple models for different separation modes
|
| 17 |
+
MODELS = {
|
| 18 |
+
"2stem": "htdemucs", # Will merge to 2 stems after
|
| 19 |
+
"4stem": "htdemucs", # Default 4 stem (drums, bass, vocals, other)
|
| 20 |
+
"6stem": "htdemucs_6s", # Full 6 stem with guitar and piano
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
class AudioSeparator:
|
| 24 |
+
def __init__(self):
|
| 25 |
+
# Load all models on startup
|
| 26 |
+
self.models = {}
|
| 27 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 28 |
+
|
| 29 |
+
# Load unique models only
|
| 30 |
+
unique_models = set(MODELS.values())
|
| 31 |
+
for model_name in unique_models:
|
| 32 |
+
print(f"Loading Demucs Model: {model_name}...")
|
| 33 |
+
model = get_model(model_name)
|
| 34 |
+
model.to(self.device)
|
| 35 |
+
self.models[model_name] = model
|
| 36 |
+
print(f"All models loaded on {self.device}")
|
| 37 |
+
|
| 38 |
+
def separate(self, audio_path: str, output_dir: str, callback=None, mode="4stem"):
|
| 39 |
+
"""
|
| 40 |
+
Memisahkan file audio menjadi stems.
|
| 41 |
+
Modes: 2stem, 4stem, 6stem
|
| 42 |
+
"""
|
| 43 |
+
# Select model based on mode
|
| 44 |
+
model_name = MODELS.get(mode, "htdemucs")
|
| 45 |
+
model = self.models[model_name]
|
| 46 |
+
|
| 47 |
+
# Load audio using soundfile directly (bypass torchaudio backend issues)
|
| 48 |
+
import soundfile as sf
|
| 49 |
+
wav_np, sr = sf.read(audio_path)
|
| 50 |
+
|
| 51 |
+
# Convert to tensor
|
| 52 |
+
wav = torch.from_numpy(wav_np).float()
|
| 53 |
+
|
| 54 |
+
if wav.ndim == 1:
|
| 55 |
+
wav = wav.unsqueeze(0)
|
| 56 |
+
else:
|
| 57 |
+
wav = wav.t()
|
| 58 |
+
|
| 59 |
+
# Resample if needed
|
| 60 |
+
if sr != 44100:
|
| 61 |
+
if callback: callback("Resampling audio...", 15)
|
| 62 |
+
resampler = torchaudio.transforms.Resample(sr, 44100)
|
| 63 |
+
wav = resampler(wav)
|
| 64 |
+
sr = 44100
|
| 65 |
+
|
| 66 |
+
wav = wav.unsqueeze(0).to(self.device)
|
| 67 |
+
|
| 68 |
+
# Apply model
|
| 69 |
+
ref = wav.mean(0)
|
| 70 |
+
wav = (wav - ref.mean()) / ref.std()
|
| 71 |
+
|
| 72 |
+
if callback: callback("Running Demucs Inference...", 20)
|
| 73 |
+
print(f"Starting separation with {model_name} (mode: {mode})...")
|
| 74 |
+
|
| 75 |
+
sources = apply_model(model, wav, shifts=1, split=True, overlap=0.25, progress=True)[0]
|
| 76 |
+
source_names = model.sources
|
| 77 |
+
|
| 78 |
+
results = {}
|
| 79 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 80 |
+
|
| 81 |
+
total_sources = len(source_names)
|
| 82 |
+
source_tensors = {name: source for name, source in zip(source_names, sources)}
|
| 83 |
+
|
| 84 |
+
# Handle different modes
|
| 85 |
+
if mode == "2stem":
|
| 86 |
+
# Merge to Vocals + Instruments
|
| 87 |
+
if callback: callback("Merging to 2 stems...", 45)
|
| 88 |
+
|
| 89 |
+
vocals = source_tensors.get('vocals')
|
| 90 |
+
instruments = None
|
| 91 |
+
for name, src in source_tensors.items():
|
| 92 |
+
if name != 'vocals':
|
| 93 |
+
if instruments is None:
|
| 94 |
+
instruments = src.clone()
|
| 95 |
+
else:
|
| 96 |
+
instruments += src
|
| 97 |
+
|
| 98 |
+
if vocals is not None:
|
| 99 |
+
self._save_audio(vocals, sr, os.path.join(output_dir, "vocals.mp3"))
|
| 100 |
+
results['vocals'] = os.path.join(output_dir, "vocals.mp3")
|
| 101 |
+
if instruments is not None:
|
| 102 |
+
self._save_audio(instruments, sr, os.path.join(output_dir, "instruments.mp3"))
|
| 103 |
+
results['instruments'] = os.path.join(output_dir, "instruments.mp3")
|
| 104 |
+
|
| 105 |
+
elif mode == "6stem":
|
| 106 |
+
# Full separation with guitar processing
|
| 107 |
+
for i, (name, source) in enumerate(source_tensors.items()):
|
| 108 |
+
progress = 30 + int((i / total_sources) * 20)
|
| 109 |
+
if callback: callback(f"Saving stem: {name}", progress)
|
| 110 |
+
|
| 111 |
+
if name == 'guitar':
|
| 112 |
+
results.update(self._process_guitar(source, sr, output_dir))
|
| 113 |
+
else:
|
| 114 |
+
stem_path = os.path.join(output_dir, f"{name}.mp3")
|
| 115 |
+
self._save_audio(source, sr, stem_path)
|
| 116 |
+
results[name] = stem_path
|
| 117 |
+
else:
|
| 118 |
+
# 4stem - standard separation
|
| 119 |
+
for i, (name, source) in enumerate(source_tensors.items()):
|
| 120 |
+
progress = 30 + int((i / total_sources) * 20)
|
| 121 |
+
if callback: callback(f"Saving stem: {name}", progress)
|
| 122 |
+
|
| 123 |
+
stem_path = os.path.join(output_dir, f"{name}.mp3")
|
| 124 |
+
self._save_audio(source, sr, stem_path)
|
| 125 |
+
results[name] = stem_path
|
| 126 |
+
|
| 127 |
+
return results
|
| 128 |
+
|
| 129 |
+
def _process_guitar(self, source, sr, output_dir):
|
| 130 |
+
"""
|
| 131 |
+
Memisahkan stem gitar menjadi Lead dan Rhythm menggunakan Mid-Side processing.
|
| 132 |
+
- Mid (center) = Rhythm (biasanya power chords, strumming di center)
|
| 133 |
+
- Side (stereo difference) = Lead (biasanya di-pan atau dengan stereo effects)
|
| 134 |
+
"""
|
| 135 |
+
# source shape: (2, samples)
|
| 136 |
+
|
| 137 |
+
# Check integrity
|
| 138 |
+
if source.shape[0] < 2:
|
| 139 |
+
print("Warning: Guitar stem is Mono. Cannot split Rhythm/Lead.")
|
| 140 |
+
path = os.path.join(output_dir, "guitar.mp3")
|
| 141 |
+
self._save_audio(source, sr, path)
|
| 142 |
+
return {"guitar_rhythm": path, "guitar_lead": path}
|
| 143 |
+
|
| 144 |
+
# MID-SIDE PROCESSING
|
| 145 |
+
# Mid = (L + R) / 2 -> Center content (usually rhythm)
|
| 146 |
+
# Side = (L - R) / 2 -> Stereo difference (usually lead)
|
| 147 |
+
left = source[0:1, :] # (1, samples)
|
| 148 |
+
right = source[1:2, :] # (1, samples)
|
| 149 |
+
|
| 150 |
+
mid = (left + right) / 2.0 # Center content -> Rhythm
|
| 151 |
+
side = (left - right) / 2.0 # Stereo diff -> Lead
|
| 152 |
+
|
| 153 |
+
# Apply subtle frequency filtering for better separation
|
| 154 |
+
# Rhythm: Emphasize low-mid (100-2000Hz) for chunky rhythm tones
|
| 155 |
+
# Lead: Emphasize mid-high (800-8000Hz) for melodic clarity
|
| 156 |
+
try:
|
| 157 |
+
import scipy.signal as signal
|
| 158 |
+
|
| 159 |
+
# Design filters
|
| 160 |
+
nyquist = sr / 2
|
| 161 |
+
|
| 162 |
+
# Rhythm: Low-pass + slight presence boost (keep fundamentals)
|
| 163 |
+
rhythm_lowcut = 80 / nyquist
|
| 164 |
+
rhythm_highcut = 4000 / nyquist
|
| 165 |
+
b_rhythm, a_rhythm = signal.butter(4, [rhythm_lowcut, rhythm_highcut], btype='band')
|
| 166 |
+
|
| 167 |
+
# Lead: Band-pass for melodic range
|
| 168 |
+
lead_lowcut = 200 / nyquist
|
| 169 |
+
lead_highcut = 8000 / nyquist
|
| 170 |
+
b_lead, a_lead = signal.butter(4, [lead_lowcut, lead_highcut], btype='band')
|
| 171 |
+
|
| 172 |
+
# Apply filters
|
| 173 |
+
mid_filtered = signal.filtfilt(b_rhythm, a_rhythm, mid.numpy())
|
| 174 |
+
side_filtered = signal.filtfilt(b_lead, a_lead, side.numpy())
|
| 175 |
+
|
| 176 |
+
mid = torch.from_numpy(mid_filtered).float()
|
| 177 |
+
side = torch.from_numpy(side_filtered).float()
|
| 178 |
+
except Exception as e:
|
| 179 |
+
print(f"Warning: Frequency filtering failed ({e}), using raw Mid-Side")
|
| 180 |
+
|
| 181 |
+
# Make stereo for output (center both)
|
| 182 |
+
# SWAPPED: Side = Rhythm (strumming often panned wide), Mid = Lead (melody often center)
|
| 183 |
+
rhythm_stereo = torch.cat([side, side], dim=0)
|
| 184 |
+
lead_stereo = torch.cat([mid, mid], dim=0)
|
| 185 |
+
|
| 186 |
+
# If side is too quiet (song has no stereo separation), mix some mid into lead
|
| 187 |
+
side_rms = torch.sqrt(torch.mean(side ** 2))
|
| 188 |
+
mid_rms = torch.sqrt(torch.mean(mid ** 2))
|
| 189 |
+
|
| 190 |
+
if side_rms < mid_rms * 0.1: # Side is <10% of mid -> almost mono mix
|
| 191 |
+
print("Notice: Audio appears to be mostly mono. Rhythm separation may be limited.")
|
| 192 |
+
# Create pseudo-separation using low frequencies for rhythm
|
| 193 |
+
try:
|
| 194 |
+
rhythm_lowpass = 2000 / nyquist
|
| 195 |
+
b_lp, a_lp = signal.butter(4, rhythm_lowpass, btype='low')
|
| 196 |
+
rhythm_from_mid = signal.filtfilt(b_lp, a_lp, mid.numpy())
|
| 197 |
+
rhythm_stereo = torch.from_numpy(rhythm_from_mid).float()
|
| 198 |
+
rhythm_stereo = torch.cat([rhythm_stereo, rhythm_stereo], dim=0)
|
| 199 |
+
except:
|
| 200 |
+
pass
|
| 201 |
+
|
| 202 |
+
# Normalize to -3dB to prevent clipping
|
| 203 |
+
def normalize(tensor):
|
| 204 |
+
peak = tensor.abs().max()
|
| 205 |
+
if peak > 0:
|
| 206 |
+
target_peak = 0.707 # -3dB
|
| 207 |
+
return tensor * (target_peak / peak)
|
| 208 |
+
return tensor
|
| 209 |
+
|
| 210 |
+
rhythm_stereo = normalize(rhythm_stereo)
|
| 211 |
+
lead_stereo = normalize(lead_stereo)
|
| 212 |
+
|
| 213 |
+
# Save files
|
| 214 |
+
path_rhythm = os.path.join(output_dir, "guitar_rhythm.mp3")
|
| 215 |
+
path_lead = os.path.join(output_dir, "guitar_lead.mp3")
|
| 216 |
+
|
| 217 |
+
self._save_audio(rhythm_stereo, sr, path_rhythm)
|
| 218 |
+
self._save_audio(lead_stereo, sr, path_lead)
|
| 219 |
+
|
| 220 |
+
return {
|
| 221 |
+
"guitar_rhythm": path_rhythm,
|
| 222 |
+
"guitar_lead": path_lead
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
def _save_audio(self, source, sr, path):
|
| 226 |
+
# source is tensor (channels, samples) on device
|
| 227 |
+
# Move to cpu
|
| 228 |
+
source = source.cpu()
|
| 229 |
+
source = source.cpu()
|
| 230 |
+
# Save using soundfile
|
| 231 |
+
# source is (channels, samples) -> need (samples, channels)
|
| 232 |
+
import soundfile as sf
|
| 233 |
+
sf.write(path, source.t().numpy(), sr)
|
services/beat_tracker.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import librosa
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
class BeatTracker:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
pass
|
| 7 |
+
|
| 8 |
+
def track(self, audio_path: str):
|
| 9 |
+
"""
|
| 10 |
+
Track beats and tempo from audio file.
|
| 11 |
+
Returns: {
|
| 12 |
+
"bpm": float,
|
| 13 |
+
"beats": list[float] # timestamps
|
| 14 |
+
}
|
| 15 |
+
"""
|
| 16 |
+
try:
|
| 17 |
+
print(f"Tracking beats for {audio_path}...")
|
| 18 |
+
y, sr = librosa.load(audio_path)
|
| 19 |
+
|
| 20 |
+
# Estimate tempo and beats
|
| 21 |
+
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
|
| 22 |
+
|
| 23 |
+
# Log output to be sure
|
| 24 |
+
# tempo is usually a scalar, but sometimes an array in older versions
|
| 25 |
+
if isinstance(tempo, np.ndarray):
|
| 26 |
+
tempo = tempo[0]
|
| 27 |
+
|
| 28 |
+
beat_times = librosa.frames_to_time(beat_frames, sr=sr)
|
| 29 |
+
|
| 30 |
+
return {
|
| 31 |
+
"bpm": round(float(tempo), 2),
|
| 32 |
+
"beats": [round(float(t), 2) for t in beat_times]
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
except Exception as e:
|
| 36 |
+
print(f"Error tracking beats: {e}")
|
| 37 |
+
return {"bpm": 0, "beats": []}
|
services/chord_analyzer.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import librosa
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
class ChordAnalyzer:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
# Template Chord - Prioritize basic triads for accuracy
|
| 7 |
+
self.templates = self._generate_chord_templates()
|
| 8 |
+
|
| 9 |
+
def _generate_chord_templates(self):
|
| 10 |
+
"""
|
| 11 |
+
Membuat template chroma untuk berbagai jenis chord.
|
| 12 |
+
12 Nada: C, C#, D, D#, E, F, F#, G, G#, A, A#, B
|
| 13 |
+
PRIORITIZED: Basic Major/Minor triads have higher matching priority
|
| 14 |
+
"""
|
| 15 |
+
templates = {}
|
| 16 |
+
roots = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
|
| 17 |
+
|
| 18 |
+
# Define chord qualities with PRIORITY WEIGHTS
|
| 19 |
+
# Higher weight = more likely to be matched
|
| 20 |
+
# Format: (intervals, priority_boost)
|
| 21 |
+
qualities = {
|
| 22 |
+
# PRIORITY 1: Basic triads (most common, highest priority)
|
| 23 |
+
'': ([0, 4, 7], 1.3), # Major - HIGHEST priority
|
| 24 |
+
'm': ([0, 3, 7], 1.3), # Minor - HIGHEST priority
|
| 25 |
+
|
| 26 |
+
# PRIORITY 2: Power chord & Suspended
|
| 27 |
+
'5': ([0, 7], 1.1), # Power chord
|
| 28 |
+
'sus4': ([0, 5, 7], 1.0),
|
| 29 |
+
'sus2': ([0, 2, 7], 1.0),
|
| 30 |
+
|
| 31 |
+
# PRIORITY 3: 7th chords
|
| 32 |
+
'maj7': ([0, 4, 7, 11], 0.95),
|
| 33 |
+
'm7': ([0, 3, 7, 10], 0.95),
|
| 34 |
+
'7': ([0, 4, 7, 10], 0.95), # Dominant 7
|
| 35 |
+
|
| 36 |
+
# PRIORITY 4: Extended & Other (lower priority to avoid false matches)
|
| 37 |
+
'dim': ([0, 3, 6], 0.9),
|
| 38 |
+
'aug': ([0, 4, 8], 0.9),
|
| 39 |
+
'6': ([0, 4, 7, 9], 0.85),
|
| 40 |
+
'm6': ([0, 3, 7, 9], 0.85),
|
| 41 |
+
'add9': ([0, 4, 7, 2], 0.85),
|
| 42 |
+
'madd9': ([0, 3, 7, 2], 0.85),
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
for i, root in enumerate(roots):
|
| 46 |
+
for quality, (intervals, priority) in qualities.items():
|
| 47 |
+
# Build chroma vector with weighted notes
|
| 48 |
+
vec = np.zeros(12)
|
| 49 |
+
for j, interval in enumerate(intervals):
|
| 50 |
+
idx = (i + interval) % 12
|
| 51 |
+
# Root = 2.0, Fifth = 1.5, Third = 1.2, Others = 1.0
|
| 52 |
+
if j == 0: # Root
|
| 53 |
+
weight = 2.0
|
| 54 |
+
elif interval == 7: # Fifth
|
| 55 |
+
weight = 1.5
|
| 56 |
+
elif interval in [3, 4]: # Third (major or minor)
|
| 57 |
+
weight = 1.2
|
| 58 |
+
else:
|
| 59 |
+
weight = 1.0
|
| 60 |
+
vec[idx] = weight
|
| 61 |
+
|
| 62 |
+
# Apply priority boost
|
| 63 |
+
vec *= priority
|
| 64 |
+
|
| 65 |
+
chord_name = f"{root}{quality}"
|
| 66 |
+
# Normalize vector
|
| 67 |
+
norm = np.linalg.norm(vec)
|
| 68 |
+
if norm > 0:
|
| 69 |
+
vec /= norm
|
| 70 |
+
templates[chord_name] = vec
|
| 71 |
+
|
| 72 |
+
return templates
|
| 73 |
+
|
| 74 |
+
def analyze(self, audio_path: str, sr=22050):
|
| 75 |
+
"""
|
| 76 |
+
Menganalisis file audio dan mengembalikan progresi chord dengan timestamp.
|
| 77 |
+
"""
|
| 78 |
+
print(f"Analyzing chords for: {audio_path}")
|
| 79 |
+
try:
|
| 80 |
+
y, sr = librosa.load(audio_path, sr=sr)
|
| 81 |
+
|
| 82 |
+
# Harmonic-Percussive Source Separation
|
| 83 |
+
y_harmonic, _ = librosa.effects.hpss(y)
|
| 84 |
+
|
| 85 |
+
# Compute Chroma CQT
|
| 86 |
+
chroma = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr, bins_per_octave=24)
|
| 87 |
+
|
| 88 |
+
# 1. TEMPORAL SMOOTHING (Median Filter)
|
| 89 |
+
# Increased filter size for more stability (21 frames ~= 0.5s)
|
| 90 |
+
import scipy.ndimage
|
| 91 |
+
chroma = scipy.ndimage.median_filter(chroma, size=(1, 21))
|
| 92 |
+
chroma = librosa.util.normalize(chroma)
|
| 93 |
+
|
| 94 |
+
num_frames = chroma.shape[1]
|
| 95 |
+
|
| 96 |
+
# Template matching
|
| 97 |
+
template_names = list(self.templates.keys())
|
| 98 |
+
template_matrix = np.array([self.templates[name] for name in template_names])
|
| 99 |
+
scores = np.dot(template_matrix, chroma)
|
| 100 |
+
|
| 101 |
+
max_indices = np.argmax(scores, axis=0)
|
| 102 |
+
max_scores = np.max(scores, axis=0)
|
| 103 |
+
|
| 104 |
+
# 2. POST-PROCESSING (Merge Short Segments)
|
| 105 |
+
current_chord = None
|
| 106 |
+
start_time = 0.0
|
| 107 |
+
|
| 108 |
+
THRESHOLD = 0.6 # Lower threshold for basic chord detection
|
| 109 |
+
MIN_DURATION = 0.8 # Chord must last 0.8s to be valid
|
| 110 |
+
|
| 111 |
+
raw_segments = []
|
| 112 |
+
|
| 113 |
+
# First Pass: Collect segments
|
| 114 |
+
for i in range(num_frames):
|
| 115 |
+
idx = max_indices[i]
|
| 116 |
+
score = max_scores[i]
|
| 117 |
+
timestamp = librosa.frames_to_time(i, sr=sr)
|
| 118 |
+
chord_name = template_names[idx] if score > THRESHOLD else "N.C."
|
| 119 |
+
|
| 120 |
+
if chord_name != current_chord:
|
| 121 |
+
if current_chord is not None:
|
| 122 |
+
raw_segments.append({
|
| 123 |
+
"chord": current_chord,
|
| 124 |
+
"start": start_time,
|
| 125 |
+
"end": timestamp,
|
| 126 |
+
"duration": timestamp - start_time
|
| 127 |
+
})
|
| 128 |
+
current_chord = chord_name
|
| 129 |
+
start_time = timestamp
|
| 130 |
+
|
| 131 |
+
# Append last
|
| 132 |
+
if current_chord is not None:
|
| 133 |
+
end_time = librosa.get_duration(y=y, sr=sr)
|
| 134 |
+
raw_segments.append({
|
| 135 |
+
"chord": current_chord,
|
| 136 |
+
"start": start_time,
|
| 137 |
+
"end": end_time,
|
| 138 |
+
"duration": end_time - start_time
|
| 139 |
+
})
|
| 140 |
+
|
| 141 |
+
# Second Pass: Merge short segments to neighbor
|
| 142 |
+
final_results = []
|
| 143 |
+
if not raw_segments: return []
|
| 144 |
+
|
| 145 |
+
# Simple heuristic: If segment < MIN_DURATION, merge to previous if possible
|
| 146 |
+
for seg in raw_segments:
|
| 147 |
+
if not final_results:
|
| 148 |
+
final_results.append(seg)
|
| 149 |
+
continue
|
| 150 |
+
|
| 151 |
+
prev = final_results[-1]
|
| 152 |
+
|
| 153 |
+
# Jika segmen sekarang terlalu pendek, "makan" oleh segmen sebelumnya (atau abaikan)
|
| 154 |
+
# TAPI jika chord-nya SAMA dengan sebelumnya, gabung saja.
|
| 155 |
+
if seg["chord"] == prev["chord"]:
|
| 156 |
+
prev["end"] = seg["end"]
|
| 157 |
+
prev["duration"] += seg["duration"]
|
| 158 |
+
elif seg["duration"] < MIN_DURATION:
|
| 159 |
+
# Merge to previous (extend previous to cover this short blip)
|
| 160 |
+
prev["end"] = seg["end"]
|
| 161 |
+
prev["duration"] += seg["duration"]
|
| 162 |
+
else:
|
| 163 |
+
final_results.append(seg)
|
| 164 |
+
|
| 165 |
+
# Format output logic (remove internal keys if needed)
|
| 166 |
+
formatted_results = []
|
| 167 |
+
for r in final_results:
|
| 168 |
+
formatted_results.append({
|
| 169 |
+
"chord": r["chord"],
|
| 170 |
+
"start": round(r["start"], 2),
|
| 171 |
+
"end": round(r["end"], 2)
|
| 172 |
+
})
|
| 173 |
+
|
| 174 |
+
return formatted_results
|
| 175 |
+
|
| 176 |
+
except Exception as e:
|
| 177 |
+
print(f"Chord Analysis Error: {e}")
|
| 178 |
+
return []
|
services/midi_converter.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from basic_pitch.inference import predict_and_save
|
| 2 |
+
from basic_pitch import ICASSP_2022_MODEL_PATH
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
class MidiConverter:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
print("Initializing Basic Pitch for MIDI conversion...")
|
| 8 |
+
self.output_dir = "midi_output"
|
| 9 |
+
|
| 10 |
+
def convert(self, audio_path: str, output_path: str):
|
| 11 |
+
"""
|
| 12 |
+
Convert audio file to MIDI using Basic Pitch.
|
| 13 |
+
"""
|
| 14 |
+
try:
|
| 15 |
+
# basic-pitch handles loading internally
|
| 16 |
+
# It saves multiple files (mid, csv, npz), we only care about mid
|
| 17 |
+
|
| 18 |
+
# output_path should be a directory or file?
|
| 19 |
+
# predict_and_save takes output_directory
|
| 20 |
+
|
| 21 |
+
output_dir = os.path.dirname(output_path)
|
| 22 |
+
file_name_no_ext = os.path.splitext(os.path.basename(output_path))[0]
|
| 23 |
+
|
| 24 |
+
print(f"Converting {audio_path} to MIDI...")
|
| 25 |
+
|
| 26 |
+
# Tuned parameters for better accuracy
|
| 27 |
+
predict_and_save(
|
| 28 |
+
[audio_path],
|
| 29 |
+
output_dir,
|
| 30 |
+
True,
|
| 31 |
+
False,
|
| 32 |
+
False,
|
| 33 |
+
False,
|
| 34 |
+
ICASSP_2022_MODEL_PATH,
|
| 35 |
+
onset_threshold=0.6, # Higher threshold to reduce noise
|
| 36 |
+
frame_threshold=0.4,
|
| 37 |
+
minimum_note_length=100.0, # ms
|
| 38 |
+
minimum_frequency=None,
|
| 39 |
+
maximum_frequency=None
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
# Basic Pitch saves as <original_name>_basic_pitch.mid
|
| 43 |
+
# We need to rename it to expected output_path if needed
|
| 44 |
+
original_basename = os.path.splitext(os.path.basename(audio_path))[0]
|
| 45 |
+
generated_midi = os.path.join(output_dir, f"{original_basename}_basic_pitch.mid")
|
| 46 |
+
|
| 47 |
+
if os.path.exists(generated_midi):
|
| 48 |
+
if os.path.exists(output_path):
|
| 49 |
+
os.remove(output_path)
|
| 50 |
+
os.rename(generated_midi, output_path)
|
| 51 |
+
print(f"MIDI saved to {output_path}")
|
| 52 |
+
return output_path
|
| 53 |
+
else:
|
| 54 |
+
print("Warning: Expected MIDI file not found.")
|
| 55 |
+
return None
|
| 56 |
+
|
| 57 |
+
except Exception as e:
|
| 58 |
+
print(f"Error converting to MIDI: {e}")
|
| 59 |
+
import traceback
|
| 60 |
+
traceback.print_exc()
|
| 61 |
+
return None
|