Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| /** | |
| * ML Bridge - Node.js interface to Python ML processor | |
| * | |
| * Calls Python scripts via subprocess for: | |
| * - Stem separation (Demucs) | |
| * - Audio fingerprinting (Chromaprint) | |
| * - Embedding generation (CLAP) | |
| */ | |
| import { spawn } from "child_process"; | |
| import path from "path"; | |
| import { fileURLToPath } from "url"; | |
| import fs from "fs/promises"; | |
| // Get directory name in ESM | |
| const __filename = fileURLToPath(import.meta.url); | |
| const __dirname = path.dirname(__filename); | |
| // Path to ML processor | |
| const ML_DIR = path.resolve(__dirname, "../ml"); | |
| const PROCESSOR_PATH = path.join(ML_DIR, "processor.py"); | |
| // Types for ML operations | |
| export interface StemResult { | |
| type: "vocals" | "drums" | "bass" | "other" | "guitar" | "piano"; | |
| path: string; | |
| duration: number | null; | |
| } | |
| export interface StemSeparationResult { | |
| success: boolean; | |
| stems?: StemResult[]; | |
| model?: string; | |
| output_dir?: string; | |
| error?: string; | |
| } | |
| export interface FingerprintResult { | |
| success: boolean; | |
| fingerprint?: string; | |
| duration?: number; | |
| algorithm?: string; | |
| version?: string; | |
| error?: string; | |
| } | |
| export interface EmbeddingResult { | |
| success: boolean; | |
| embedding?: number[]; | |
| dimension?: number; | |
| model?: string; | |
| error?: string; | |
| } | |
| export interface ProcessAllResult { | |
| success: boolean; | |
| stems?: Array<{ | |
| type: string; | |
| path: string; | |
| duration: number | null; | |
| fingerprint: string | null; | |
| fingerprint_error?: string; | |
| embedding: number[] | null; | |
| embedding_model?: string; | |
| embedding_error?: string; | |
| }>; | |
| error?: string; | |
| } | |
| export interface HealthCheckResult { | |
| success: boolean; | |
| demucs: boolean; | |
| chromaprint: boolean; | |
| clap: boolean; | |
| faiss?: boolean; | |
| demucs_version?: string; | |
| chromaprint_version?: string; | |
| clap_source?: string; | |
| faiss_version?: string; | |
| errors: string[]; | |
| } | |
| /** | |
| * Get Python command to use | |
| * Prefers PYTHON_PATH env var, then conda env, then system python | |
| */ | |
| function getPythonCommand(): string { | |
| // Allow explicit override via env var | |
| if (process.env.PYTHON_PATH) { | |
| return process.env.PYTHON_PATH; | |
| } | |
| // Default to system python | |
| return process.platform === "win32" ? "python" : "python3"; | |
| } | |
| /** | |
| * Execute Python processor with given operation and arguments | |
| */ | |
| async function execPython<T>( | |
| operation: string, | |
| args: Record<string, unknown>, | |
| timeoutMs: number = 600000 // 10 minute default | |
| ): Promise<T> { | |
| return new Promise((resolve, reject) => { | |
| const argsJson = JSON.stringify(args); | |
| const pythonCmd = getPythonCommand(); | |
| const proc = spawn(pythonCmd, [PROCESSOR_PATH, operation, argsJson], { | |
| cwd: ML_DIR, | |
| env: { | |
| ...process.env, | |
| PYTHONUNBUFFERED: "1", // Ensure immediate output | |
| }, | |
| }); | |
| let stdout = ""; | |
| let stderr = ""; | |
| let timedOut = false; | |
| // Set timeout | |
| const timeout = setTimeout(() => { | |
| timedOut = true; | |
| proc.kill("SIGTERM"); | |
| reject(new Error(`ML operation timed out after ${timeoutMs}ms`)); | |
| }, timeoutMs); | |
| proc.stdout.on("data", (data) => { | |
| stdout += data.toString(); | |
| }); | |
| proc.stderr.on("data", (data) => { | |
| stderr += data.toString(); | |
| }); | |
| proc.on("error", (err) => { | |
| clearTimeout(timeout); | |
| if (err.message.includes("ENOENT")) { | |
| reject(new Error(`Python not found. Ensure python3 is installed and in PATH.`)); | |
| } else { | |
| reject(err); | |
| } | |
| }); | |
| proc.on("close", (code) => { | |
| clearTimeout(timeout); | |
| if (timedOut) return; // Already rejected | |
| if (code !== 0) { | |
| // Try to parse error from stdout (processor outputs JSON even on error) | |
| try { | |
| const result = JSON.parse(stdout); | |
| if (!result.success && result.error) { | |
| reject(new Error(result.error)); | |
| return; | |
| } | |
| } catch { | |
| // Ignore parse error | |
| } | |
| reject(new Error(`ML operation failed (exit code ${code}): ${stderr || stdout}`)); | |
| return; | |
| } | |
| try { | |
| const result = JSON.parse(stdout); | |
| resolve(result as T); | |
| } catch (e) { | |
| // Truncate output to avoid flooding logs with embeddings | |
| const truncated = stdout.length > 500 ? stdout.slice(0, 500) + "..." : stdout; | |
| reject(new Error(`Failed to parse ML result: ${e}. Output (truncated): ${truncated}`)); | |
| } | |
| }); | |
| }); | |
| } | |
| /** | |
| * Check if ML dependencies are available | |
| */ | |
| export async function checkMLHealth(): Promise<HealthCheckResult> { | |
| try { | |
| return await execPython<HealthCheckResult>("health", {}, 30000); | |
| } catch (error) { | |
| return { | |
| success: false, | |
| demucs: false, | |
| chromaprint: false, | |
| clap: false, | |
| errors: [error instanceof Error ? error.message : String(error)], | |
| }; | |
| } | |
| } | |
| /** | |
| * Separate audio into stems using Demucs | |
| */ | |
| export async function separateStems( | |
| inputPath: string, | |
| outputDir: string, | |
| model: string = "htdemucs" | |
| ): Promise<StemSeparationResult> { | |
| // Verify input file exists | |
| try { | |
| await fs.access(inputPath); | |
| } catch { | |
| return { | |
| success: false, | |
| error: `Input file not found: ${inputPath}`, | |
| }; | |
| } | |
| // Create output directory | |
| await fs.mkdir(outputDir, { recursive: true }); | |
| return execPython<StemSeparationResult>("separate", { | |
| input_path: inputPath, | |
| output_dir: outputDir, | |
| model, | |
| }); | |
| } | |
| /** | |
| * Generate audio fingerprint using Chromaprint | |
| */ | |
| export async function generateFingerprint( | |
| audioPath: string | |
| ): Promise<FingerprintResult> { | |
| return execPython<FingerprintResult>("fingerprint", { | |
| audio_path: audioPath, | |
| }, 120000); // 2 minute timeout for fingerprinting | |
| } | |
| /** | |
| * Generate audio embedding using CLAP | |
| */ | |
| export async function generateEmbedding( | |
| audioPath: string, | |
| model: string = "laion/larger_clap_music" | |
| ): Promise<EmbeddingResult> { | |
| return execPython<EmbeddingResult>("embed", { | |
| audio_path: audioPath, | |
| model, | |
| }, 300000); // 5 minute timeout for embedding | |
| } | |
| export interface ChunkEmbedding { | |
| start_time: number; | |
| end_time: number; | |
| embedding: number[]; | |
| dimension: number; | |
| } | |
| export interface ChunkEmbeddingsResult { | |
| success: boolean; | |
| chunks?: ChunkEmbedding[]; | |
| total_duration?: number; | |
| chunk_count?: number; | |
| error?: string; | |
| } | |
| /** | |
| * Generate chunk-based embeddings for an audio file | |
| * This splits the audio into overlapping windows and generates | |
| * an embedding for each chunk, enabling section-level matching. | |
| */ | |
| export async function generateChunkEmbeddings( | |
| audioPath: string, | |
| chunkDuration: number = 10.0, | |
| chunkOverlap: number = 5.0, | |
| model: string = "laion/larger_clap_music" | |
| ): Promise<ChunkEmbeddingsResult> { | |
| return execPython<ChunkEmbeddingsResult>("embed_chunks", { | |
| audio_path: audioPath, | |
| chunk_duration: chunkDuration, | |
| chunk_overlap: chunkOverlap, | |
| model, | |
| }, 600000); // 10 minute timeout for chunk embedding (longer audio) | |
| } | |
| /** | |
| * Process audio through full pipeline: separate -> fingerprint -> embed | |
| */ | |
| export async function processFullPipeline( | |
| inputPath: string, | |
| outputDir: string | |
| ): Promise<ProcessAllResult> { | |
| return execPython<ProcessAllResult>("process_all", { | |
| input_path: inputPath, | |
| output_dir: outputDir, | |
| }, 900000); // 15 minute timeout for full pipeline | |
| } | |
| /** | |
| * Check if Python ML environment is available | |
| */ | |
| export async function isPythonAvailable(): Promise<boolean> { | |
| return new Promise((resolve) => { | |
| const pythonCmd = process.platform === "win32" ? "python" : "python3"; | |
| const proc = spawn(pythonCmd, ["--version"]); | |
| proc.on("error", () => resolve(false)); | |
| proc.on("close", (code) => resolve(code === 0)); | |
| }); | |
| } | |
| /** | |
| * Check if processor.py exists | |
| */ | |
| export async function isProcessorAvailable(): Promise<boolean> { | |
| try { | |
| await fs.access(PROCESSOR_PATH); | |
| return true; | |
| } catch { | |
| return false; | |
| } | |
| } | |
| /** | |
| * Generic call to Python processor for any operation | |
| * Used for FAISS operations and other extensible functionality | |
| */ | |
| export async function callPythonProcessor<T = Record<string, unknown>>( | |
| operation: string, | |
| args: Record<string, unknown>, | |
| timeoutMs: number = 60000 | |
| ): Promise<T> { | |
| return execPython<T>(operation, args, timeoutMs); | |
| } | |
| // ============== Fingerprint-based matching (Chromaprint) ============== | |
| export interface ChunkFingerprint { | |
| start_time: number; | |
| end_time: number; | |
| fingerprint: string; | |
| } | |
| export interface ChunkFingerprintsResult { | |
| success: boolean; | |
| chunks?: ChunkFingerprint[]; | |
| total_duration?: number; | |
| chunk_count?: number; | |
| error?: string; | |
| } | |
| /** | |
| * Generate fingerprints for audio chunks | |
| * Unlike CLAP embeddings, Chromaprint fingerprints give: | |
| * - 100% match for same audio | |
| * - ~2-3% match for different audio | |
| */ | |
| export async function generateChunkFingerprints( | |
| audioPath: string, | |
| chunkDuration: number = 10.0, | |
| chunkOverlap: number = 5.0 | |
| ): Promise<ChunkFingerprintsResult> { | |
| return execPython<ChunkFingerprintsResult>("fingerprint_chunks", { | |
| audio_path: audioPath, | |
| chunk_duration: chunkDuration, | |
| chunk_overlap: chunkOverlap, | |
| }, 600000); | |
| } | |
| export interface FingerprintMatch { | |
| score: number; | |
| trackId: string; | |
| stemType?: string; | |
| title: string; | |
| artist: string; | |
| startTime?: number; | |
| endTime?: number; | |
| } | |
| export interface FingerprintSearchResult { | |
| matches: FingerprintMatch[]; | |
| message?: string; | |
| } | |
| /** | |
| * Search fingerprint index for matches | |
| */ | |
| export async function searchFingerprints( | |
| fingerprint: string, | |
| k: number = 5, | |
| threshold: number = 0.3 | |
| ): Promise<FingerprintSearchResult> { | |
| return execPython<FingerprintSearchResult>("fp_search", { | |
| fingerprint, | |
| k, | |
| threshold, | |
| }, 30000); | |
| } | |
| export interface FingerprintIndexStats { | |
| exists: boolean; | |
| total: number; | |
| uniqueTracks: number; | |
| } | |
| /** | |
| * Get fingerprint index statistics | |
| */ | |
| export async function getFingerprintStats(): Promise<FingerprintIndexStats> { | |
| return execPython<FingerprintIndexStats>("fp_stats", {}, 10000); | |
| } | |
| // ============== Style-based similarity ============== | |
| export interface StyleFeatures { | |
| success: boolean; | |
| feature_vector?: number[]; | |
| dimension?: number; | |
| tempo?: number; | |
| error?: string; | |
| } | |
| export interface StyleMatch { | |
| score: number; | |
| trackId: string; | |
| title: string; | |
| artist: string; | |
| } | |
| export interface StyleSearchResult { | |
| matches: StyleMatch[]; | |
| } | |
| /** | |
| * Extract musical style features from audio | |
| */ | |
| export async function extractStyleFeatures( | |
| audioPath: string, | |
| duration?: number | |
| ): Promise<StyleFeatures> { | |
| return execPython<StyleFeatures>("style_extract", { | |
| audio_path: audioPath, | |
| duration, | |
| }, 120000); | |
| } | |
| export interface StyleChunk { | |
| start_time: number; | |
| end_time: number; | |
| feature_vector: number[]; | |
| } | |
| export interface StyleChunksResult { | |
| success: boolean; | |
| total_duration?: number; | |
| chunk_count?: number; | |
| chunks?: StyleChunk[]; | |
| error?: string; | |
| } | |
| /** | |
| * Extract chunk-level style features for granular matching | |
| */ | |
| export async function extractStyleChunks( | |
| audioPath: string, | |
| chunkDuration: number = 10.0, | |
| chunkOverlap: number = 5.0 | |
| ): Promise<StyleChunksResult> { | |
| return execPython<StyleChunksResult>("style_chunks", { | |
| audio_path: audioPath, | |
| chunk_duration: chunkDuration, | |
| chunk_overlap: chunkOverlap, | |
| }, 300000); | |
| } | |
| /** | |
| * Search for tracks with similar musical style | |
| */ | |
| export async function searchStyleSimilar( | |
| features: number[], | |
| k: number = 5, | |
| threshold: number = 0.85 | |
| ): Promise<StyleSearchResult> { | |
| return execPython<StyleSearchResult>("style_search", { | |
| features, | |
| k, | |
| threshold, | |
| }, 30000); | |
| } | |
| export interface StyleIndexStats { | |
| exists: boolean; | |
| total: number; | |
| uniqueTracks: number; | |
| } | |
| /** | |
| * Get style index statistics | |
| */ | |
| export async function getStyleStats(): Promise<StyleIndexStats> { | |
| return execPython<StyleIndexStats>("style_stats", {}, 10000); | |
| } | |
| // ============== MERT (Music-specific embeddings) ============== | |
| export interface MertChunk { | |
| start_time: number; | |
| end_time: number; | |
| embedding: number[]; | |
| } | |
| export interface MertChunksResult { | |
| success: boolean; | |
| total_duration?: number; | |
| chunk_count?: number; | |
| chunks?: MertChunk[]; | |
| error?: string; | |
| } | |
| /** | |
| * Extract MERT chunk embeddings for music-specific similarity | |
| * MERT gives much better discrimination than generic audio features | |
| */ | |
| export async function extractMertChunks( | |
| audioPath: string, | |
| chunkDuration: number = 10.0, | |
| chunkOverlap: number = 5.0 | |
| ): Promise<MertChunksResult> { | |
| return execPython<MertChunksResult>("mert_chunks", { | |
| audio_path: audioPath, | |
| chunk_duration: chunkDuration, | |
| chunk_overlap: chunkOverlap, | |
| }, 600000); // 10 min timeout | |
| } | |
| export interface MertMatch { | |
| score: number; | |
| trackId: string; | |
| title: string; | |
| artist: string; | |
| startTime?: number; | |
| endTime?: number; | |
| } | |
| export interface MertSearchResult { | |
| matches: MertMatch[]; | |
| } | |
| /** | |
| * Search MERT index for similar music | |
| * @param percentile If set (0-100), use dynamic threshold at this percentile | |
| */ | |
| export async function searchMertSimilar( | |
| embedding: number[], | |
| k: number = 5, | |
| threshold?: number, | |
| percentile?: number | |
| ): Promise<MertSearchResult & { threshold_used?: number }> { | |
| return execPython<MertSearchResult & { threshold_used?: number }>("mert_search", { | |
| embedding, | |
| k, | |
| threshold: threshold ?? 0.75, | |
| percentile, | |
| }, 30000); | |
| } | |
| export interface MertIndexStats { | |
| exists: boolean; | |
| total: number; | |
| uniqueTracks: number; | |
| } | |
| /** | |
| * Get MERT index statistics | |
| */ | |
| export async function getMertStats(): Promise<MertIndexStats> { | |
| return execPython<MertIndexStats>("mert_stats", {}, 10000); | |
| } | |