emresar's picture
Upload folder using huggingface_hub
6678fa1 verified
/**
* ML Bridge - Node.js interface to Python ML processor
*
* Calls Python scripts via subprocess for:
* - Stem separation (Demucs)
* - Audio fingerprinting (Chromaprint)
* - Embedding generation (CLAP)
*/
import { spawn } from "child_process";
import path from "path";
import { fileURLToPath } from "url";
import fs from "fs/promises";
// Get directory name in ESM
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// Path to ML processor
const ML_DIR = path.resolve(__dirname, "../ml");
const PROCESSOR_PATH = path.join(ML_DIR, "processor.py");
// Types for ML operations
export interface StemResult {
type: "vocals" | "drums" | "bass" | "other" | "guitar" | "piano";
path: string;
duration: number | null;
}
export interface StemSeparationResult {
success: boolean;
stems?: StemResult[];
model?: string;
output_dir?: string;
error?: string;
}
export interface FingerprintResult {
success: boolean;
fingerprint?: string;
duration?: number;
algorithm?: string;
version?: string;
error?: string;
}
export interface EmbeddingResult {
success: boolean;
embedding?: number[];
dimension?: number;
model?: string;
error?: string;
}
export interface ProcessAllResult {
success: boolean;
stems?: Array<{
type: string;
path: string;
duration: number | null;
fingerprint: string | null;
fingerprint_error?: string;
embedding: number[] | null;
embedding_model?: string;
embedding_error?: string;
}>;
error?: string;
}
export interface HealthCheckResult {
success: boolean;
demucs: boolean;
chromaprint: boolean;
clap: boolean;
faiss?: boolean;
demucs_version?: string;
chromaprint_version?: string;
clap_source?: string;
faiss_version?: string;
errors: string[];
}
/**
* Get Python command to use
* Prefers PYTHON_PATH env var, then conda env, then system python
*/
function getPythonCommand(): string {
// Allow explicit override via env var
if (process.env.PYTHON_PATH) {
return process.env.PYTHON_PATH;
}
// Default to system python
return process.platform === "win32" ? "python" : "python3";
}
/**
* Execute Python processor with given operation and arguments
*/
async function execPython<T>(
operation: string,
args: Record<string, unknown>,
timeoutMs: number = 600000 // 10 minute default
): Promise<T> {
return new Promise((resolve, reject) => {
const argsJson = JSON.stringify(args);
const pythonCmd = getPythonCommand();
const proc = spawn(pythonCmd, [PROCESSOR_PATH, operation, argsJson], {
cwd: ML_DIR,
env: {
...process.env,
PYTHONUNBUFFERED: "1", // Ensure immediate output
},
});
let stdout = "";
let stderr = "";
let timedOut = false;
// Set timeout
const timeout = setTimeout(() => {
timedOut = true;
proc.kill("SIGTERM");
reject(new Error(`ML operation timed out after ${timeoutMs}ms`));
}, timeoutMs);
proc.stdout.on("data", (data) => {
stdout += data.toString();
});
proc.stderr.on("data", (data) => {
stderr += data.toString();
});
proc.on("error", (err) => {
clearTimeout(timeout);
if (err.message.includes("ENOENT")) {
reject(new Error(`Python not found. Ensure python3 is installed and in PATH.`));
} else {
reject(err);
}
});
proc.on("close", (code) => {
clearTimeout(timeout);
if (timedOut) return; // Already rejected
if (code !== 0) {
// Try to parse error from stdout (processor outputs JSON even on error)
try {
const result = JSON.parse(stdout);
if (!result.success && result.error) {
reject(new Error(result.error));
return;
}
} catch {
// Ignore parse error
}
reject(new Error(`ML operation failed (exit code ${code}): ${stderr || stdout}`));
return;
}
try {
const result = JSON.parse(stdout);
resolve(result as T);
} catch (e) {
// Truncate output to avoid flooding logs with embeddings
const truncated = stdout.length > 500 ? stdout.slice(0, 500) + "..." : stdout;
reject(new Error(`Failed to parse ML result: ${e}. Output (truncated): ${truncated}`));
}
});
});
}
/**
* Check if ML dependencies are available
*/
export async function checkMLHealth(): Promise<HealthCheckResult> {
try {
return await execPython<HealthCheckResult>("health", {}, 30000);
} catch (error) {
return {
success: false,
demucs: false,
chromaprint: false,
clap: false,
errors: [error instanceof Error ? error.message : String(error)],
};
}
}
/**
* Separate audio into stems using Demucs
*/
export async function separateStems(
inputPath: string,
outputDir: string,
model: string = "htdemucs"
): Promise<StemSeparationResult> {
// Verify input file exists
try {
await fs.access(inputPath);
} catch {
return {
success: false,
error: `Input file not found: ${inputPath}`,
};
}
// Create output directory
await fs.mkdir(outputDir, { recursive: true });
return execPython<StemSeparationResult>("separate", {
input_path: inputPath,
output_dir: outputDir,
model,
});
}
/**
* Generate audio fingerprint using Chromaprint
*/
export async function generateFingerprint(
audioPath: string
): Promise<FingerprintResult> {
return execPython<FingerprintResult>("fingerprint", {
audio_path: audioPath,
}, 120000); // 2 minute timeout for fingerprinting
}
/**
* Generate audio embedding using CLAP
*/
export async function generateEmbedding(
audioPath: string,
model: string = "laion/larger_clap_music"
): Promise<EmbeddingResult> {
return execPython<EmbeddingResult>("embed", {
audio_path: audioPath,
model,
}, 300000); // 5 minute timeout for embedding
}
export interface ChunkEmbedding {
start_time: number;
end_time: number;
embedding: number[];
dimension: number;
}
export interface ChunkEmbeddingsResult {
success: boolean;
chunks?: ChunkEmbedding[];
total_duration?: number;
chunk_count?: number;
error?: string;
}
/**
* Generate chunk-based embeddings for an audio file
* This splits the audio into overlapping windows and generates
* an embedding for each chunk, enabling section-level matching.
*/
export async function generateChunkEmbeddings(
audioPath: string,
chunkDuration: number = 10.0,
chunkOverlap: number = 5.0,
model: string = "laion/larger_clap_music"
): Promise<ChunkEmbeddingsResult> {
return execPython<ChunkEmbeddingsResult>("embed_chunks", {
audio_path: audioPath,
chunk_duration: chunkDuration,
chunk_overlap: chunkOverlap,
model,
}, 600000); // 10 minute timeout for chunk embedding (longer audio)
}
/**
* Process audio through full pipeline: separate -> fingerprint -> embed
*/
export async function processFullPipeline(
inputPath: string,
outputDir: string
): Promise<ProcessAllResult> {
return execPython<ProcessAllResult>("process_all", {
input_path: inputPath,
output_dir: outputDir,
}, 900000); // 15 minute timeout for full pipeline
}
/**
* Check if Python ML environment is available
*/
export async function isPythonAvailable(): Promise<boolean> {
return new Promise((resolve) => {
const pythonCmd = process.platform === "win32" ? "python" : "python3";
const proc = spawn(pythonCmd, ["--version"]);
proc.on("error", () => resolve(false));
proc.on("close", (code) => resolve(code === 0));
});
}
/**
* Check if processor.py exists
*/
export async function isProcessorAvailable(): Promise<boolean> {
try {
await fs.access(PROCESSOR_PATH);
return true;
} catch {
return false;
}
}
/**
* Generic call to Python processor for any operation
* Used for FAISS operations and other extensible functionality
*/
export async function callPythonProcessor<T = Record<string, unknown>>(
operation: string,
args: Record<string, unknown>,
timeoutMs: number = 60000
): Promise<T> {
return execPython<T>(operation, args, timeoutMs);
}
// ============== Fingerprint-based matching (Chromaprint) ==============
export interface ChunkFingerprint {
start_time: number;
end_time: number;
fingerprint: string;
}
export interface ChunkFingerprintsResult {
success: boolean;
chunks?: ChunkFingerprint[];
total_duration?: number;
chunk_count?: number;
error?: string;
}
/**
* Generate fingerprints for audio chunks
* Unlike CLAP embeddings, Chromaprint fingerprints give:
* - 100% match for same audio
* - ~2-3% match for different audio
*/
export async function generateChunkFingerprints(
audioPath: string,
chunkDuration: number = 10.0,
chunkOverlap: number = 5.0
): Promise<ChunkFingerprintsResult> {
return execPython<ChunkFingerprintsResult>("fingerprint_chunks", {
audio_path: audioPath,
chunk_duration: chunkDuration,
chunk_overlap: chunkOverlap,
}, 600000);
}
export interface FingerprintMatch {
score: number;
trackId: string;
stemType?: string;
title: string;
artist: string;
startTime?: number;
endTime?: number;
}
export interface FingerprintSearchResult {
matches: FingerprintMatch[];
message?: string;
}
/**
* Search fingerprint index for matches
*/
export async function searchFingerprints(
fingerprint: string,
k: number = 5,
threshold: number = 0.3
): Promise<FingerprintSearchResult> {
return execPython<FingerprintSearchResult>("fp_search", {
fingerprint,
k,
threshold,
}, 30000);
}
export interface FingerprintIndexStats {
exists: boolean;
total: number;
uniqueTracks: number;
}
/**
* Get fingerprint index statistics
*/
export async function getFingerprintStats(): Promise<FingerprintIndexStats> {
return execPython<FingerprintIndexStats>("fp_stats", {}, 10000);
}
// ============== Style-based similarity ==============
export interface StyleFeatures {
success: boolean;
feature_vector?: number[];
dimension?: number;
tempo?: number;
error?: string;
}
export interface StyleMatch {
score: number;
trackId: string;
title: string;
artist: string;
}
export interface StyleSearchResult {
matches: StyleMatch[];
}
/**
* Extract musical style features from audio
*/
export async function extractStyleFeatures(
audioPath: string,
duration?: number
): Promise<StyleFeatures> {
return execPython<StyleFeatures>("style_extract", {
audio_path: audioPath,
duration,
}, 120000);
}
export interface StyleChunk {
start_time: number;
end_time: number;
feature_vector: number[];
}
export interface StyleChunksResult {
success: boolean;
total_duration?: number;
chunk_count?: number;
chunks?: StyleChunk[];
error?: string;
}
/**
* Extract chunk-level style features for granular matching
*/
export async function extractStyleChunks(
audioPath: string,
chunkDuration: number = 10.0,
chunkOverlap: number = 5.0
): Promise<StyleChunksResult> {
return execPython<StyleChunksResult>("style_chunks", {
audio_path: audioPath,
chunk_duration: chunkDuration,
chunk_overlap: chunkOverlap,
}, 300000);
}
/**
* Search for tracks with similar musical style
*/
export async function searchStyleSimilar(
features: number[],
k: number = 5,
threshold: number = 0.85
): Promise<StyleSearchResult> {
return execPython<StyleSearchResult>("style_search", {
features,
k,
threshold,
}, 30000);
}
export interface StyleIndexStats {
exists: boolean;
total: number;
uniqueTracks: number;
}
/**
* Get style index statistics
*/
export async function getStyleStats(): Promise<StyleIndexStats> {
return execPython<StyleIndexStats>("style_stats", {}, 10000);
}
// ============== MERT (Music-specific embeddings) ==============
export interface MertChunk {
start_time: number;
end_time: number;
embedding: number[];
}
export interface MertChunksResult {
success: boolean;
total_duration?: number;
chunk_count?: number;
chunks?: MertChunk[];
error?: string;
}
/**
* Extract MERT chunk embeddings for music-specific similarity
* MERT gives much better discrimination than generic audio features
*/
export async function extractMertChunks(
audioPath: string,
chunkDuration: number = 10.0,
chunkOverlap: number = 5.0
): Promise<MertChunksResult> {
return execPython<MertChunksResult>("mert_chunks", {
audio_path: audioPath,
chunk_duration: chunkDuration,
chunk_overlap: chunkOverlap,
}, 600000); // 10 min timeout
}
export interface MertMatch {
score: number;
trackId: string;
title: string;
artist: string;
startTime?: number;
endTime?: number;
}
export interface MertSearchResult {
matches: MertMatch[];
}
/**
* Search MERT index for similar music
* @param percentile If set (0-100), use dynamic threshold at this percentile
*/
export async function searchMertSimilar(
embedding: number[],
k: number = 5,
threshold?: number,
percentile?: number
): Promise<MertSearchResult & { threshold_used?: number }> {
return execPython<MertSearchResult & { threshold_used?: number }>("mert_search", {
embedding,
k,
threshold: threshold ?? 0.75,
percentile,
}, 30000);
}
export interface MertIndexStats {
exists: boolean;
total: number;
uniqueTracks: number;
}
/**
* Get MERT index statistics
*/
export async function getMertStats(): Promise<MertIndexStats> {
return execPython<MertIndexStats>("mert_stats", {}, 10000);
}