Spaces:

anggars
/

tunebase

Sleeping

App Files Files Community

anggars commited on Jan 25

Commit

6319e2f

1 Parent(s): 18eaef5

Initial HF Space deployment

Browse files

Files changed (15) hide show

Dockerfile +30 -0
README.md +18 -4
debug_audio.py +42 -0
main.py +200 -0
requirements.txt +11 -0
services/__pycache__/audio_separator.cpython-310.pyc +0 -0
services/__pycache__/audio_separator.cpython-312.pyc +0 -0
services/__pycache__/beat_tracker.cpython-310.pyc +0 -0
services/__pycache__/chord_analyzer.cpython-310.pyc +0 -0
services/__pycache__/chord_analyzer.cpython-312.pyc +0 -0
services/__pycache__/midi_converter.cpython-310.pyc +0 -0
services/audio_separator.py +233 -0
services/beat_tracker.py +37 -0
services/chord_analyzer.py +178 -0
services/midi_converter.py +61 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,30 @@

+# Use Python 3.10 with CUDA support for GPU
+FROM python:3.10-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    libsndfile1 \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create directories for uploads and processed files
+RUN mkdir -p uploads processed
+# Expose port 7860 (HF Spaces default)
+EXPOSE 7860
+# Run the application
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,11 +1,25 @@
 ---
 title: Tunebase
-emoji: 🐠
-colorFrom: green
-colorTo: blue
 sdk: docker
 pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Tunebase
+emoji: 🎵
+colorFrom: blue
+colorTo: purple
 sdk: docker
 pinned: false
 license: mit
 ---
+# Tunebase - AI Audio Stem Separation
+High-fidelity stem separation for musicians using Demucs.
+## Features
+- 2-stem (Vocals + Instruments)
+- 4-stem (Vocals, Drums, Bass, Other)
+- 6-stem (+ Guitar, Piano split)
+## API Endpoints
+- `POST /upload` - Upload audio file
+- `POST /process/{file_id}?mode=4stem` - Process with specified mode
+- `GET /status/{file_id}` - Check processing status

debug_audio.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import torch
+import torchaudio
+import soundfile
+import os
+print(f"Torch version: {torch.__version__}")
+print(f"Torchaudio version: {torchaudio.__version__}")
+print(f"Soundfile version: {soundfile.__version__}")
+print("\nChecking available backends:")
+try:
+    print(f"List audio backends: {torchaudio.list_audio_backends()}")
+except:
+    print("torchaudio.list_audio_backends() not available")
+try:
+    print(f"Get audio backend: {torchaudio.get_audio_backend()}")
+except:
+    pass
+print("\nTest writing and reading:")
+test_file = "test_audio.wav"
+try:
+    # Generate dummy audio
+    waveform = torch.rand(1, 16000)
+    sample_rate = 16000
+    print(f"Saving {test_file} with backend='soundfile'...")
+    torchaudio.save(test_file, waveform, sample_rate, backend="soundfile")
+    print("Save success.")
+    print(f"Loading {test_file} with backend='soundfile'...")
+    loaded_wav, loaded_sr = torchaudio.load(test_file, backend="soundfile")
+    print(f"Load success. Shape: {loaded_wav.shape}")
+except Exception as e:
+    import traceback
+    traceback.print_exc()
+    print(f"Test failed: {e}")
+finally:
+    if os.path.exists(test_file):
+        os.remove(test_file)

main.py ADDED Viewed

	@@ -0,0 +1,200 @@

+from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+import shutil
+import os
+import uuid
+from services.audio_separator import AudioSeparator
+from services.chord_analyzer import ChordAnalyzer
+from services.midi_converter import MidiConverter
+from services.beat_tracker import BeatTracker
+app = FastAPI(title="Tunebase AI Engine", description="High-performance audio separation for Math Rock", version="1.0.0")
+# Setup CORS
+origins = ["*"] # Allow all for dev
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Directories
+UPLOAD_DIR = "uploads"
+PROCESSED_DIR = "processed"
+os.makedirs(UPLOAD_DIR, exist_ok=True)
+os.makedirs(PROCESSED_DIR, exist_ok=True)
+# Mount statis files agar frontend bisa play hasil audio
+app.mount("/processed", StaticFiles(directory=PROCESSED_DIR), name="processed")
+# Mount uploads for verification if needed, but risky. Processed is enough.
+# Initialize Services
+# Warning: Loading models takes time and RAM.
+try:
+    separator = AudioSeparator()
+    analyzer = ChordAnalyzer()
+    midi_converter = MidiConverter()
+    beat_tracker = BeatTracker()
+except Exception as e:
+    print(f"Warning: Failed to load models on startup. {e}")
+    import traceback
+    traceback.print_exc()
+    separator = None
+    analyzer = None
+    midi_converter = None
+    beat_tracker = None
+@app.get("/")
+def read_root():
+    return {"message": "Tunebase AI Engine Ready 🎸"}
+@app.post("/upload")
+async def upload_audio(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
+    """
+    Upload file audio dan mulai proses separasi di background.
+    """
+    file_id = str(uuid.uuid4())
+    file_ext = file.filename.split(".")[-1]
+    file_path = os.path.join(UPLOAD_DIR, f"{file_id}.{file_ext}")
+    with open(file_path, "wb") as buffer:
+        shutil.copyfileobj(file.file, buffer)
+    return {"id": file_id, "filename": file.filename, "status": "uploaded"}
+# Cleanup Task
+import time
+import shutil
+def cleanup_old_files():
+    """
+    Menghapus file di folder PROCESSED_DIR yang lebih tua dari 1 jam
+    untuk menghemat disk space di Hugging Face Spaces.
+    """
+    print("Running cleanup task...")
+    now = time.time()
+    cutoff = now - 3600 # 1 hour (3600 seconds)
+    for folder in os.listdir(PROCESSED_DIR):
+        folder_path = os.path.join(PROCESSED_DIR, folder)
+        if os.path.isdir(folder_path):
+            try:
+                # Check creation/modify time
+                mtime = os.path.getmtime(folder_path)
+                if mtime < cutoff:
+                    print(f"Deleting old session: {folder}")
+                    shutil.rmtree(folder_path)
+            except Exception as e:
+                print(f"Error cleaning {folder}: {e}")
+# Global Progress Store (Simple in-memory)
+processing_status = {}
+@app.get("/status/{file_id}")
+async def get_status(file_id: str):
+    return processing_status.get(file_id, {"status": "unknown", "progress": 0, "step": "Waiting"})
+def update_progress(file_id, step, progress, status="processing", data=None):
+    state = {
+        "status": status,
+        "progress": progress,
+        "step": step
+    }
+    if data:
+        state.update(data)
+    processing_status[file_id] = state
+@app.post("/process/{file_id}")
+def process_audio(file_id: str, background_tasks: BackgroundTasks, mode: str = "4stem"):
+    """
+    Trigger separasi dan analisis.
+    Modes: 2stem (vocals+instruments), 4stem (default), 6stem (full)
+    """
+    if not separator:
+        raise HTTPException(status_code=503, detail="AI Engine not initialized")
+    # Cari file
+    input_path = None
+    # Simple search
+    for f in os.listdir(UPLOAD_DIR):
+        if f.startswith(file_id):
+            input_path = os.path.join(UPLOAD_DIR, f)
+            break
+    if not input_path:
+        raise HTTPException(status_code=404, detail="File not found")
+    output_dir = os.path.join(PROCESSED_DIR, file_id)
+    # Dynamic base URL for HF Space vs localhost
+    space_host = os.environ.get("SPACE_HOST")
+    if space_host:
+        base_url = f"https://{space_host}/processed"
+    else:
+        base_url = "http://localhost:8000/processed"
+    try:
+        update_progress(file_id, "Separating Audio Stems...", 10)
+        # 1. Separate Audio with specified mode
+        def progress_cb(step, prog):
+            update_progress(file_id, step, prog)
+        stems = separator.separate(input_path, output_dir, callback=progress_cb, mode=mode)
+        update_progress(file_id, "Analyzing Rhythm...", 50)
+        # 3. MIDI & Beat Analysis
+        update_progress(file_id, "Converting to MIDI...", 70)
+        midi_files = {}
+        if midi_converter:
+             # Convert separate stems to MIDI (e.g., Piano, Bass, Guitar)
+             # Basic Pitch works best on monophonic/polyphonic instruments, less on drums
+             target_stems = ['piano', 'bass', 'guitar_rhythm', 'guitar_lead', 'vocals']
+             for stem_name in target_stems:
+                 if stem_name in stems:
+                     stem_path = stems[stem_name]
+                     midi_out = os.path.join(output_dir, f"{stem_name}.mid")
+                     if midi_converter.convert(stem_path, midi_out):
+                         midi_files[stem_name] = f"{base_url}/{file_id}/{stem_name}.mid"
+        update_progress(file_id, "Analyzing Rhythm...", 85)
+        bpm = 0
+        beats = []
+        if beat_tracker:
+            # Use Drums for beat tracking if available, otherwise 'other' or input
+            beat_source = stems.get('drums') or stems.get('other') or input_path
+            rhythm_data = beat_tracker.track(beat_source)
+            bpm = rhythm_data['bpm']
+            beats = rhythm_data['beats']
+        update_progress(file_id, "Finalizing...", 95)
+        update_progress(file_id, "Finalizing...", 95)
+        # Construct full URLs for frontend
+        stems_url = {k: f"{base_url}/{file_id}/{os.path.basename(v)}" for k, v in stems.items()}
+        final_data = {
+            "stems": stems_url,
+            "midi": midi_files,
+            "bpm": bpm,
+            "beats": beats
+        }
+        print(f"Final Data for {file_id}: {final_data}") # Debug
+        update_progress(file_id, "Completed", 100, status="completed", data=final_data)
+        return {
+            "status": "completed",
+            **final_data
+        }
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        processing_status[file_id] = {"status": "error", "error": str(e)}
+        print(f"Error processing: {e}")
+        raise HTTPException(status_code=500, detail=str(e))

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+fastapi
+uvicorn[standard]
+python-multipart
+torch
+torchaudio
+demucs
+librosa
+numpy
+scipy
+soundfile
+basic-pitch

services/__pycache__/audio_separator.cpython-310.pyc ADDED Viewed

Binary file (5.41 kB). View file

services/__pycache__/audio_separator.cpython-312.pyc ADDED Viewed

Binary file (4.71 kB). View file

services/__pycache__/beat_tracker.cpython-310.pyc ADDED Viewed

Binary file (1.37 kB). View file

services/__pycache__/chord_analyzer.cpython-310.pyc ADDED Viewed

Binary file (3.85 kB). View file

services/__pycache__/chord_analyzer.cpython-312.pyc ADDED Viewed

Binary file (4.32 kB). View file

services/__pycache__/midi_converter.cpython-310.pyc ADDED Viewed

Binary file (1.68 kB). View file

services/audio_separator.py ADDED Viewed

	@@ -0,0 +1,233 @@

+import torch
+import torchaudio
+# Force backend to soundfile to avoid TorchCodec requirement on Windows
+try:
+    torchaudio.set_audio_backend("soundfile")
+except:
+    pass # In newer versions it might be automatic or different, but this is a safe attempt
+from demucs.apply import apply_model
+from demucs.pretrained import get_model
+import os
+import pathlib
+# Konfigurasi Model
+# Support multiple models for different separation modes
+MODELS = {
+    "2stem": "htdemucs",    # Will merge to 2 stems after
+    "4stem": "htdemucs",    # Default 4 stem (drums, bass, vocals, other)
+    "6stem": "htdemucs_6s", # Full 6 stem with guitar and piano
+}
+class AudioSeparator:
+    def __init__(self):
+        # Load all models on startup
+        self.models = {}
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Load unique models only
+        unique_models = set(MODELS.values())
+        for model_name in unique_models:
+            print(f"Loading Demucs Model: {model_name}...")
+            model = get_model(model_name)
+            model.to(self.device)
+            self.models[model_name] = model
+        print(f"All models loaded on {self.device}")
+    def separate(self, audio_path: str, output_dir: str, callback=None, mode="4stem"):
+        """
+        Memisahkan file audio menjadi stems.
+        Modes: 2stem, 4stem, 6stem
+        """
+        # Select model based on mode
+        model_name = MODELS.get(mode, "htdemucs")
+        model = self.models[model_name]
+        # Load audio using soundfile directly (bypass torchaudio backend issues)
+        import soundfile as sf
+        wav_np, sr = sf.read(audio_path)
+        # Convert to tensor
+        wav = torch.from_numpy(wav_np).float()
+        if wav.ndim == 1:
+            wav = wav.unsqueeze(0)
+        else:
+            wav = wav.t()
+        # Resample if needed
+        if sr != 44100:
+            if callback: callback("Resampling audio...", 15)
+            resampler = torchaudio.transforms.Resample(sr, 44100)
+            wav = resampler(wav)
+            sr = 44100
+        wav = wav.unsqueeze(0).to(self.device)
+        # Apply model
+        ref = wav.mean(0)
+        wav = (wav - ref.mean()) / ref.std()
+        if callback: callback("Running Demucs Inference...", 20)
+        print(f"Starting separation with {model_name} (mode: {mode})...")
+        sources = apply_model(model, wav, shifts=1, split=True, overlap=0.25, progress=True)[0]
+        source_names = model.sources
+        results = {}
+        os.makedirs(output_dir, exist_ok=True)
+        total_sources = len(source_names)
+        source_tensors = {name: source for name, source in zip(source_names, sources)}
+        # Handle different modes
+        if mode == "2stem":
+            # Merge to Vocals + Instruments
+            if callback: callback("Merging to 2 stems...", 45)
+            vocals = source_tensors.get('vocals')
+            instruments = None
+            for name, src in source_tensors.items():
+                if name != 'vocals':
+                    if instruments is None:
+                        instruments = src.clone()
+                    else:
+                        instruments += src
+            if vocals is not None:
+                self._save_audio(vocals, sr, os.path.join(output_dir, "vocals.mp3"))
+                results['vocals'] = os.path.join(output_dir, "vocals.mp3")
+            if instruments is not None:
+                self._save_audio(instruments, sr, os.path.join(output_dir, "instruments.mp3"))
+                results['instruments'] = os.path.join(output_dir, "instruments.mp3")
+        elif mode == "6stem":
+            # Full separation with guitar processing
+            for i, (name, source) in enumerate(source_tensors.items()):
+                progress = 30 + int((i / total_sources) * 20)
+                if callback: callback(f"Saving stem: {name}", progress)
+                if name == 'guitar':
+                    results.update(self._process_guitar(source, sr, output_dir))
+                else:
+                    stem_path = os.path.join(output_dir, f"{name}.mp3")
+                    self._save_audio(source, sr, stem_path)
+                    results[name] = stem_path
+        else:
+            # 4stem - standard separation
+            for i, (name, source) in enumerate(source_tensors.items()):
+                progress = 30 + int((i / total_sources) * 20)
+                if callback: callback(f"Saving stem: {name}", progress)
+                stem_path = os.path.join(output_dir, f"{name}.mp3")
+                self._save_audio(source, sr, stem_path)
+                results[name] = stem_path
+        return results
+    def _process_guitar(self, source, sr, output_dir):
+        """
+        Memisahkan stem gitar menjadi Lead dan Rhythm menggunakan Mid-Side processing.
+        - Mid (center) = Rhythm (biasanya power chords, strumming di center)
+        - Side (stereo difference) = Lead (biasanya di-pan atau dengan stereo effects)
+        """
+        # source shape: (2, samples)
+        # Check integrity
+        if source.shape[0] < 2:
+             print("Warning: Guitar stem is Mono. Cannot split Rhythm/Lead.")
+             path = os.path.join(output_dir, "guitar.mp3")
+             self._save_audio(source, sr, path)
+             return {"guitar_rhythm": path, "guitar_lead": path}
+        # MID-SIDE PROCESSING
+        # Mid = (L + R) / 2 -> Center content (usually rhythm)
+        # Side = (L - R) / 2 -> Stereo difference (usually lead)
+        left = source[0:1, :]   # (1, samples)
+        right = source[1:2, :]  # (1, samples)
+        mid = (left + right) / 2.0    # Center content -> Rhythm
+        side = (left - right) / 2.0   # Stereo diff -> Lead
+        # Apply subtle frequency filtering for better separation
+        # Rhythm: Emphasize low-mid (100-2000Hz) for chunky rhythm tones
+        # Lead: Emphasize mid-high (800-8000Hz) for melodic clarity
+        try:
+            import scipy.signal as signal
+            # Design filters
+            nyquist = sr / 2
+            # Rhythm: Low-pass + slight presence boost (keep fundamentals)
+            rhythm_lowcut = 80 / nyquist
+            rhythm_highcut = 4000 / nyquist
+            b_rhythm, a_rhythm = signal.butter(4, [rhythm_lowcut, rhythm_highcut], btype='band')
+            # Lead: Band-pass for melodic range
+            lead_lowcut = 200 / nyquist
+            lead_highcut = 8000 / nyquist
+            b_lead, a_lead = signal.butter(4, [lead_lowcut, lead_highcut], btype='band')
+            # Apply filters
+            mid_filtered = signal.filtfilt(b_rhythm, a_rhythm, mid.numpy())
+            side_filtered = signal.filtfilt(b_lead, a_lead, side.numpy())
+            mid = torch.from_numpy(mid_filtered).float()
+            side = torch.from_numpy(side_filtered).float()
+        except Exception as e:
+            print(f"Warning: Frequency filtering failed ({e}), using raw Mid-Side")
+        # Make stereo for output (center both)
+        # SWAPPED: Side = Rhythm (strumming often panned wide), Mid = Lead (melody often center)
+        rhythm_stereo = torch.cat([side, side], dim=0)
+        lead_stereo = torch.cat([mid, mid], dim=0)
+        # If side is too quiet (song has no stereo separation), mix some mid into lead
+        side_rms = torch.sqrt(torch.mean(side ** 2))
+        mid_rms = torch.sqrt(torch.mean(mid ** 2))
+        if side_rms < mid_rms * 0.1:  # Side is <10% of mid -> almost mono mix
+            print("Notice: Audio appears to be mostly mono. Rhythm separation may be limited.")
+            # Create pseudo-separation using low frequencies for rhythm
+            try:
+                rhythm_lowpass = 2000 / nyquist
+                b_lp, a_lp = signal.butter(4, rhythm_lowpass, btype='low')
+                rhythm_from_mid = signal.filtfilt(b_lp, a_lp, mid.numpy())
+                rhythm_stereo = torch.from_numpy(rhythm_from_mid).float()
+                rhythm_stereo = torch.cat([rhythm_stereo, rhythm_stereo], dim=0)
+            except:
+                pass
+        # Normalize to -3dB to prevent clipping
+        def normalize(tensor):
+            peak = tensor.abs().max()
+            if peak > 0:
+                target_peak = 0.707  # -3dB
+                return tensor * (target_peak / peak)
+            return tensor
+        rhythm_stereo = normalize(rhythm_stereo)
+        lead_stereo = normalize(lead_stereo)
+        # Save files
+        path_rhythm = os.path.join(output_dir, "guitar_rhythm.mp3")
+        path_lead = os.path.join(output_dir, "guitar_lead.mp3")
+        self._save_audio(rhythm_stereo, sr, path_rhythm)
+        self._save_audio(lead_stereo, sr, path_lead)
+        return {
+            "guitar_rhythm": path_rhythm,
+            "guitar_lead": path_lead
+        }
+    def _save_audio(self, source, sr, path):
+        # source is tensor (channels, samples) on device
+        # Move to cpu
+        source = source.cpu()
+        source = source.cpu()
+        # Save using soundfile
+        # source is (channels, samples) -> need (samples, channels)
+        import soundfile as sf
+        sf.write(path, source.t().numpy(), sr)

services/beat_tracker.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import librosa
+import numpy as np
+class BeatTracker:
+    def __init__(self):
+        pass
+    def track(self, audio_path: str):
+        """
+        Track beats and tempo from audio file.
+        Returns: {
+            "bpm": float,
+            "beats": list[float]  # timestamps
+        }
+        """
+        try:
+            print(f"Tracking beats for {audio_path}...")
+            y, sr = librosa.load(audio_path)
+            # Estimate tempo and beats
+            tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
+            # Log output to be sure
+            # tempo is usually a scalar, but sometimes an array in older versions
+            if isinstance(tempo, np.ndarray):
+                tempo = tempo[0]
+            beat_times = librosa.frames_to_time(beat_frames, sr=sr)
+            return {
+                "bpm": round(float(tempo), 2),
+                "beats": [round(float(t), 2) for t in beat_times]
+            }
+        except Exception as e:
+            print(f"Error tracking beats: {e}")
+            return {"bpm": 0, "beats": []}

services/chord_analyzer.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import librosa
+import numpy as np
+class ChordAnalyzer:
+    def __init__(self):
+        # Template Chord - Prioritize basic triads for accuracy
+        self.templates = self._generate_chord_templates()
+    def _generate_chord_templates(self):
+        """
+        Membuat template chroma untuk berbagai jenis chord.
+        12 Nada: C, C#, D, D#, E, F, F#, G, G#, A, A#, B
+        PRIORITIZED: Basic Major/Minor triads have higher matching priority
+        """
+        templates = {}
+        roots = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
+        # Define chord qualities with PRIORITY WEIGHTS
+        # Higher weight = more likely to be matched
+        # Format: (intervals, priority_boost)
+        qualities = {
+            # PRIORITY 1: Basic triads (most common, highest priority)
+            '': ([0, 4, 7], 1.3),           # Major - HIGHEST priority
+            'm': ([0, 3, 7], 1.3),          # Minor - HIGHEST priority
+            # PRIORITY 2: Power chord & Suspended
+            '5': ([0, 7], 1.1),             # Power chord
+            'sus4': ([0, 5, 7], 1.0),
+            'sus2': ([0, 2, 7], 1.0),
+            # PRIORITY 3: 7th chords
+            'maj7': ([0, 4, 7, 11], 0.95),
+            'm7': ([0, 3, 7, 10], 0.95),
+            '7': ([0, 4, 7, 10], 0.95),     # Dominant 7
+            # PRIORITY 4: Extended & Other (lower priority to avoid false matches)
+            'dim': ([0, 3, 6], 0.9),
+            'aug': ([0, 4, 8], 0.9),
+            '6': ([0, 4, 7, 9], 0.85),
+            'm6': ([0, 3, 7, 9], 0.85),
+            'add9': ([0, 4, 7, 2], 0.85),
+            'madd9': ([0, 3, 7, 2], 0.85),
+        }
+        for i, root in enumerate(roots):
+            for quality, (intervals, priority) in qualities.items():
+                # Build chroma vector with weighted notes
+                vec = np.zeros(12)
+                for j, interval in enumerate(intervals):
+                    idx = (i + interval) % 12
+                    # Root = 2.0, Fifth = 1.5, Third = 1.2, Others = 1.0
+                    if j == 0:  # Root
+                        weight = 2.0
+                    elif interval == 7:  # Fifth
+                        weight = 1.5
+                    elif interval in [3, 4]:  # Third (major or minor)
+                        weight = 1.2
+                    else:
+                        weight = 1.0
+                    vec[idx] = weight
+                # Apply priority boost
+                vec *= priority
+                chord_name = f"{root}{quality}"
+                # Normalize vector
+                norm = np.linalg.norm(vec)
+                if norm > 0:
+                    vec /= norm
+                templates[chord_name] = vec
+        return templates
+    def analyze(self, audio_path: str, sr=22050):
+        """
+        Menganalisis file audio dan mengembalikan progresi chord dengan timestamp.
+        """
+        print(f"Analyzing chords for: {audio_path}")
+        try:
+            y, sr = librosa.load(audio_path, sr=sr)
+            # Harmonic-Percussive Source Separation
+            y_harmonic, _ = librosa.effects.hpss(y)
+            # Compute Chroma CQT
+            chroma = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr, bins_per_octave=24)
+            # 1. TEMPORAL SMOOTHING (Median Filter)
+            # Increased filter size for more stability (21 frames ~= 0.5s)
+            import scipy.ndimage
+            chroma = scipy.ndimage.median_filter(chroma, size=(1, 21))
+            chroma = librosa.util.normalize(chroma)
+            num_frames = chroma.shape[1]
+            # Template matching
+            template_names = list(self.templates.keys())
+            template_matrix = np.array([self.templates[name] for name in template_names])
+            scores = np.dot(template_matrix, chroma)
+            max_indices = np.argmax(scores, axis=0)
+            max_scores = np.max(scores, axis=0)
+            # 2. POST-PROCESSING (Merge Short Segments)
+            current_chord = None
+            start_time = 0.0
+            THRESHOLD = 0.6  # Lower threshold for basic chord detection
+            MIN_DURATION = 0.8  # Chord must last 0.8s to be valid
+            raw_segments = []
+            # First Pass: Collect segments
+            for i in range(num_frames):
+                idx = max_indices[i]
+                score = max_scores[i]
+                timestamp = librosa.frames_to_time(i, sr=sr)
+                chord_name = template_names[idx] if score > THRESHOLD else "N.C."
+                if chord_name != current_chord:
+                    if current_chord is not None:
+                         raw_segments.append({
+                            "chord": current_chord,
+                            "start": start_time,
+                            "end": timestamp,
+                            "duration": timestamp - start_time
+                        })
+                    current_chord = chord_name
+                    start_time = timestamp
+            # Append last
+            if current_chord is not None:
+                 end_time = librosa.get_duration(y=y, sr=sr)
+                 raw_segments.append({
+                    "chord": current_chord,
+                    "start": start_time,
+                    "end": end_time,
+                    "duration": end_time - start_time
+                })
+            # Second Pass: Merge short segments to neighbor
+            final_results = []
+            if not raw_segments: return []
+            # Simple heuristic: If segment < MIN_DURATION, merge to previous if possible
+            for seg in raw_segments:
+                if not final_results:
+                    final_results.append(seg)
+                    continue
+                prev = final_results[-1]
+                # Jika segmen sekarang terlalu pendek, "makan" oleh segmen sebelumnya (atau abaikan)
+                # TAPI jika chord-nya SAMA dengan sebelumnya, gabung saja.
+                if seg["chord"] == prev["chord"]:
+                    prev["end"] = seg["end"]
+                    prev["duration"] += seg["duration"]
+                elif seg["duration"] < MIN_DURATION:
+                    # Merge to previous (extend previous to cover this short blip)
+                    prev["end"] = seg["end"]
+                    prev["duration"] += seg["duration"]
+                else:
+                    final_results.append(seg)
+            # Format output logic (remove internal keys if needed)
+            formatted_results = []
+            for r in final_results:
+                formatted_results.append({
+                    "chord": r["chord"],
+                    "start": round(r["start"], 2),
+                    "end": round(r["end"], 2)
+                })
+            return formatted_results
+        except Exception as e:
+            print(f"Chord Analysis Error: {e}")
+            return []

services/midi_converter.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from basic_pitch.inference import predict_and_save
+from basic_pitch import ICASSP_2022_MODEL_PATH
+import os
+class MidiConverter:
+    def __init__(self):
+        print("Initializing Basic Pitch for MIDI conversion...")
+        self.output_dir = "midi_output"
+    def convert(self, audio_path: str, output_path: str):
+        """
+        Convert audio file to MIDI using Basic Pitch.
+        """
+        try:
+            # basic-pitch handles loading internally
+            # It saves multiple files (mid, csv, npz), we only care about mid
+            # output_path should be a directory or file?
+            # predict_and_save takes output_directory
+            output_dir = os.path.dirname(output_path)
+            file_name_no_ext = os.path.splitext(os.path.basename(output_path))[0]
+            print(f"Converting {audio_path} to MIDI...")
+            # Tuned parameters for better accuracy
+            predict_and_save(
+                [audio_path],
+                output_dir,
+                True,
+                False,
+                False,
+                False,
+                ICASSP_2022_MODEL_PATH,
+                onset_threshold=0.6,    # Higher threshold to reduce noise
+                frame_threshold=0.4,
+                minimum_note_length=100.0, # ms
+                minimum_frequency=None,
+                maximum_frequency=None
+            )
+            # Basic Pitch saves as <original_name>_basic_pitch.mid
+            # We need to rename it to expected output_path if needed
+            original_basename = os.path.splitext(os.path.basename(audio_path))[0]
+            generated_midi = os.path.join(output_dir, f"{original_basename}_basic_pitch.mid")
+            if os.path.exists(generated_midi):
+                if os.path.exists(output_path):
+                    os.remove(output_path)
+                os.rename(generated_midi, output_path)
+                print(f"MIDI saved to {output_path}")
+                return output_path
+            else:
+                print("Warning: Expected MIDI file not found.")
+                return None
+        except Exception as e:
+            print(f"Error converting to MIDI: {e}")
+            import traceback
+            traceback.print_exc()
+            return None