/** * ML Bridge - Node.js interface to Python ML processor * * Calls Python scripts via subprocess for: * - Stem separation (Demucs) * - Audio fingerprinting (Chromaprint) * - Embedding generation (CLAP) */ import { spawn } from "child_process"; import path from "path"; import { fileURLToPath } from "url"; import fs from "fs/promises"; // Get directory name in ESM const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // Path to ML processor const ML_DIR = path.resolve(__dirname, "../ml"); const PROCESSOR_PATH = path.join(ML_DIR, "processor.py"); // Types for ML operations export interface StemResult { type: "vocals" | "drums" | "bass" | "other" | "guitar" | "piano"; path: string; duration: number | null; } export interface StemSeparationResult { success: boolean; stems?: StemResult[]; model?: string; output_dir?: string; error?: string; } export interface FingerprintResult { success: boolean; fingerprint?: string; duration?: number; algorithm?: string; version?: string; error?: string; } export interface EmbeddingResult { success: boolean; embedding?: number[]; dimension?: number; model?: string; error?: string; } export interface ProcessAllResult { success: boolean; stems?: Array<{ type: string; path: string; duration: number | null; fingerprint: string | null; fingerprint_error?: string; embedding: number[] | null; embedding_model?: string; embedding_error?: string; }>; error?: string; } export interface HealthCheckResult { success: boolean; demucs: boolean; chromaprint: boolean; clap: boolean; faiss?: boolean; demucs_version?: string; chromaprint_version?: string; clap_source?: string; faiss_version?: string; errors: string[]; } /** * Get Python command to use * Prefers PYTHON_PATH env var, then conda env, then system python */ function getPythonCommand(): string { // Allow explicit override via env var if (process.env.PYTHON_PATH) { return process.env.PYTHON_PATH; } // Default to system python return process.platform === "win32" ? "python" : "python3"; } /** * Execute Python processor with given operation and arguments */ async function execPython( operation: string, args: Record, timeoutMs: number = 600000 // 10 minute default ): Promise { return new Promise((resolve, reject) => { const argsJson = JSON.stringify(args); const pythonCmd = getPythonCommand(); const proc = spawn(pythonCmd, [PROCESSOR_PATH, operation, argsJson], { cwd: ML_DIR, env: { ...process.env, PYTHONUNBUFFERED: "1", // Ensure immediate output }, }); let stdout = ""; let stderr = ""; let timedOut = false; // Set timeout const timeout = setTimeout(() => { timedOut = true; proc.kill("SIGTERM"); reject(new Error(`ML operation timed out after ${timeoutMs}ms`)); }, timeoutMs); proc.stdout.on("data", (data) => { stdout += data.toString(); }); proc.stderr.on("data", (data) => { stderr += data.toString(); }); proc.on("error", (err) => { clearTimeout(timeout); if (err.message.includes("ENOENT")) { reject(new Error(`Python not found. Ensure python3 is installed and in PATH.`)); } else { reject(err); } }); proc.on("close", (code) => { clearTimeout(timeout); if (timedOut) return; // Already rejected if (code !== 0) { // Try to parse error from stdout (processor outputs JSON even on error) try { const result = JSON.parse(stdout); if (!result.success && result.error) { reject(new Error(result.error)); return; } } catch { // Ignore parse error } reject(new Error(`ML operation failed (exit code ${code}): ${stderr || stdout}`)); return; } try { const result = JSON.parse(stdout); resolve(result as T); } catch (e) { // Truncate output to avoid flooding logs with embeddings const truncated = stdout.length > 500 ? stdout.slice(0, 500) + "..." : stdout; reject(new Error(`Failed to parse ML result: ${e}. Output (truncated): ${truncated}`)); } }); }); } /** * Check if ML dependencies are available */ export async function checkMLHealth(): Promise { try { return await execPython("health", {}, 30000); } catch (error) { return { success: false, demucs: false, chromaprint: false, clap: false, errors: [error instanceof Error ? error.message : String(error)], }; } } /** * Separate audio into stems using Demucs */ export async function separateStems( inputPath: string, outputDir: string, model: string = "htdemucs" ): Promise { // Verify input file exists try { await fs.access(inputPath); } catch { return { success: false, error: `Input file not found: ${inputPath}`, }; } // Create output directory await fs.mkdir(outputDir, { recursive: true }); return execPython("separate", { input_path: inputPath, output_dir: outputDir, model, }); } /** * Generate audio fingerprint using Chromaprint */ export async function generateFingerprint( audioPath: string ): Promise { return execPython("fingerprint", { audio_path: audioPath, }, 120000); // 2 minute timeout for fingerprinting } /** * Generate audio embedding using CLAP */ export async function generateEmbedding( audioPath: string, model: string = "laion/larger_clap_music" ): Promise { return execPython("embed", { audio_path: audioPath, model, }, 300000); // 5 minute timeout for embedding } export interface ChunkEmbedding { start_time: number; end_time: number; embedding: number[]; dimension: number; } export interface ChunkEmbeddingsResult { success: boolean; chunks?: ChunkEmbedding[]; total_duration?: number; chunk_count?: number; error?: string; } /** * Generate chunk-based embeddings for an audio file * This splits the audio into overlapping windows and generates * an embedding for each chunk, enabling section-level matching. */ export async function generateChunkEmbeddings( audioPath: string, chunkDuration: number = 10.0, chunkOverlap: number = 5.0, model: string = "laion/larger_clap_music" ): Promise { return execPython("embed_chunks", { audio_path: audioPath, chunk_duration: chunkDuration, chunk_overlap: chunkOverlap, model, }, 600000); // 10 minute timeout for chunk embedding (longer audio) } /** * Process audio through full pipeline: separate -> fingerprint -> embed */ export async function processFullPipeline( inputPath: string, outputDir: string ): Promise { return execPython("process_all", { input_path: inputPath, output_dir: outputDir, }, 900000); // 15 minute timeout for full pipeline } /** * Check if Python ML environment is available */ export async function isPythonAvailable(): Promise { return new Promise((resolve) => { const pythonCmd = process.platform === "win32" ? "python" : "python3"; const proc = spawn(pythonCmd, ["--version"]); proc.on("error", () => resolve(false)); proc.on("close", (code) => resolve(code === 0)); }); } /** * Check if processor.py exists */ export async function isProcessorAvailable(): Promise { try { await fs.access(PROCESSOR_PATH); return true; } catch { return false; } } /** * Generic call to Python processor for any operation * Used for FAISS operations and other extensible functionality */ export async function callPythonProcessor>( operation: string, args: Record, timeoutMs: number = 60000 ): Promise { return execPython(operation, args, timeoutMs); } // ============== Fingerprint-based matching (Chromaprint) ============== export interface ChunkFingerprint { start_time: number; end_time: number; fingerprint: string; } export interface ChunkFingerprintsResult { success: boolean; chunks?: ChunkFingerprint[]; total_duration?: number; chunk_count?: number; error?: string; } /** * Generate fingerprints for audio chunks * Unlike CLAP embeddings, Chromaprint fingerprints give: * - 100% match for same audio * - ~2-3% match for different audio */ export async function generateChunkFingerprints( audioPath: string, chunkDuration: number = 10.0, chunkOverlap: number = 5.0 ): Promise { return execPython("fingerprint_chunks", { audio_path: audioPath, chunk_duration: chunkDuration, chunk_overlap: chunkOverlap, }, 600000); } export interface FingerprintMatch { score: number; trackId: string; stemType?: string; title: string; artist: string; startTime?: number; endTime?: number; } export interface FingerprintSearchResult { matches: FingerprintMatch[]; message?: string; } /** * Search fingerprint index for matches */ export async function searchFingerprints( fingerprint: string, k: number = 5, threshold: number = 0.3 ): Promise { return execPython("fp_search", { fingerprint, k, threshold, }, 30000); } export interface FingerprintIndexStats { exists: boolean; total: number; uniqueTracks: number; } /** * Get fingerprint index statistics */ export async function getFingerprintStats(): Promise { return execPython("fp_stats", {}, 10000); } // ============== Style-based similarity ============== export interface StyleFeatures { success: boolean; feature_vector?: number[]; dimension?: number; tempo?: number; error?: string; } export interface StyleMatch { score: number; trackId: string; title: string; artist: string; } export interface StyleSearchResult { matches: StyleMatch[]; } /** * Extract musical style features from audio */ export async function extractStyleFeatures( audioPath: string, duration?: number ): Promise { return execPython("style_extract", { audio_path: audioPath, duration, }, 120000); } export interface StyleChunk { start_time: number; end_time: number; feature_vector: number[]; } export interface StyleChunksResult { success: boolean; total_duration?: number; chunk_count?: number; chunks?: StyleChunk[]; error?: string; } /** * Extract chunk-level style features for granular matching */ export async function extractStyleChunks( audioPath: string, chunkDuration: number = 10.0, chunkOverlap: number = 5.0 ): Promise { return execPython("style_chunks", { audio_path: audioPath, chunk_duration: chunkDuration, chunk_overlap: chunkOverlap, }, 300000); } /** * Search for tracks with similar musical style */ export async function searchStyleSimilar( features: number[], k: number = 5, threshold: number = 0.85 ): Promise { return execPython("style_search", { features, k, threshold, }, 30000); } export interface StyleIndexStats { exists: boolean; total: number; uniqueTracks: number; } /** * Get style index statistics */ export async function getStyleStats(): Promise { return execPython("style_stats", {}, 10000); } // ============== MERT (Music-specific embeddings) ============== export interface MertChunk { start_time: number; end_time: number; embedding: number[]; } export interface MertChunksResult { success: boolean; total_duration?: number; chunk_count?: number; chunks?: MertChunk[]; error?: string; } /** * Extract MERT chunk embeddings for music-specific similarity * MERT gives much better discrimination than generic audio features */ export async function extractMertChunks( audioPath: string, chunkDuration: number = 10.0, chunkOverlap: number = 5.0 ): Promise { return execPython("mert_chunks", { audio_path: audioPath, chunk_duration: chunkDuration, chunk_overlap: chunkOverlap, }, 600000); // 10 min timeout } export interface MertMatch { score: number; trackId: string; title: string; artist: string; startTime?: number; endTime?: number; } export interface MertSearchResult { matches: MertMatch[]; } /** * Search MERT index for similar music * @param percentile If set (0-100), use dynamic threshold at this percentile */ export async function searchMertSimilar( embedding: number[], k: number = 5, threshold?: number, percentile?: number ): Promise { return execPython("mert_search", { embedding, k, threshold: threshold ?? 0.75, percentile, }, 30000); } export interface MertIndexStats { exists: boolean; total: number; uniqueTracks: number; } /** * Get MERT index statistics */ export async function getMertStats(): Promise { return execPython("mert_stats", {}, 10000); }