Spaces:

emresar
/

aimusic-attribution

Running on CPU Upgrade

App Files Files Community

aimusic-attribution / server /mlBridge.ts

emresar

Upload folder using huggingface_hub

6678fa1 verified about 1 month ago

raw

history blame contribute delete

13.6 kB

	/**
	* ML Bridge - Node.js interface to Python ML processor
	*
	* Calls Python scripts via subprocess for:
	* - Stem separation (Demucs)
	* - Audio fingerprinting (Chromaprint)
	* - Embedding generation (CLAP)
	*/

	import { spawn } from "child_process";
	import path from "path";
	import { fileURLToPath } from "url";
	import fs from "fs/promises";

	// Get directory name in ESM
	const __filename = fileURLToPath(import.meta.url);
	const __dirname = path.dirname(__filename);

	// Path to ML processor
	const ML_DIR = path.resolve(__dirname, "../ml");
	const PROCESSOR_PATH = path.join(ML_DIR, "processor.py");

	// Types for ML operations
	export interface StemResult {
	type: "vocals" \| "drums" \| "bass" \| "other" \| "guitar" \| "piano";
	path: string;
	duration: number \| null;
	}

	export interface StemSeparationResult {
	success: boolean;
	stems?: StemResult[];
	model?: string;
	output_dir?: string;
	error?: string;
	}

	export interface FingerprintResult {
	success: boolean;
	fingerprint?: string;
	duration?: number;
	algorithm?: string;
	version?: string;
	error?: string;
	}

	export interface EmbeddingResult {
	success: boolean;
	embedding?: number[];
	dimension?: number;
	model?: string;
	error?: string;
	}

	export interface ProcessAllResult {
	success: boolean;
	stems?: Array<{
	type: string;
	path: string;
	duration: number \| null;
	fingerprint: string \| null;
	fingerprint_error?: string;
	embedding: number[] \| null;
	embedding_model?: string;
	embedding_error?: string;
	}>;
	error?: string;
	}

	export interface HealthCheckResult {
	success: boolean;
	demucs: boolean;
	chromaprint: boolean;
	clap: boolean;
	faiss?: boolean;
	demucs_version?: string;
	chromaprint_version?: string;
	clap_source?: string;
	faiss_version?: string;
	errors: string[];
	}

	/**
	* Get Python command to use
	* Prefers PYTHON_PATH env var, then conda env, then system python
	*/
	function getPythonCommand(): string {
	// Allow explicit override via env var
	if (process.env.PYTHON_PATH) {
	return process.env.PYTHON_PATH;
	}

	// Default to system python
	return process.platform === "win32" ? "python" : "python3";
	}

	/**
	* Execute Python processor with given operation and arguments
	*/
	async function execPython<T>(
	operation: string,
	args: Record<string, unknown>,
	timeoutMs: number = 600000 // 10 minute default
	): Promise<T> {
	return new Promise((resolve, reject) => {
	const argsJson = JSON.stringify(args);

	const pythonCmd = getPythonCommand();

	const proc = spawn(pythonCmd, [PROCESSOR_PATH, operation, argsJson], {
	cwd: ML_DIR,
	env: {
	...process.env,
	PYTHONUNBUFFERED: "1", // Ensure immediate output
	},
	});

	let stdout = "";
	let stderr = "";
	let timedOut = false;

	// Set timeout
	const timeout = setTimeout(() => {
	timedOut = true;
	proc.kill("SIGTERM");
	reject(new Error(`ML operation timed out after ${timeoutMs}ms`));
	}, timeoutMs);

	proc.stdout.on("data", (data) => {
	stdout += data.toString();
	});

	proc.stderr.on("data", (data) => {
	stderr += data.toString();
	});

	proc.on("error", (err) => {
	clearTimeout(timeout);
	if (err.message.includes("ENOENT")) {
	reject(new Error(`Python not found. Ensure python3 is installed and in PATH.`));
	} else {
	reject(err);
	}
	});

	proc.on("close", (code) => {
	clearTimeout(timeout);

	if (timedOut) return; // Already rejected

	if (code !== 0) {
	// Try to parse error from stdout (processor outputs JSON even on error)
	try {
	const result = JSON.parse(stdout);
	if (!result.success && result.error) {
	reject(new Error(result.error));
	return;
	}
	} catch {
	// Ignore parse error
	}
	reject(new Error(`ML operation failed (exit code ${code}): ${stderr \|\| stdout}`));
	return;
	}

	try {
	const result = JSON.parse(stdout);
	resolve(result as T);
	} catch (e) {
	// Truncate output to avoid flooding logs with embeddings
	const truncated = stdout.length > 500 ? stdout.slice(0, 500) + "..." : stdout;
	reject(new Error(`Failed to parse ML result: ${e}. Output (truncated): ${truncated}`));
	}
	});
	});
	}

	/**
	* Check if ML dependencies are available
	*/
	export async function checkMLHealth(): Promise<HealthCheckResult> {
	try {
	return await execPython<HealthCheckResult>("health", {}, 30000);
	} catch (error) {
	return {
	success: false,
	demucs: false,
	chromaprint: false,
	clap: false,
	errors: [error instanceof Error ? error.message : String(error)],
	};
	}
	}

	/**
	* Separate audio into stems using Demucs
	*/
	export async function separateStems(
	inputPath: string,
	outputDir: string,
	model: string = "htdemucs"
	): Promise<StemSeparationResult> {
	// Verify input file exists
	try {
	await fs.access(inputPath);
	} catch {
	return {
	success: false,
	error: `Input file not found: ${inputPath}`,
	};
	}

	// Create output directory
	await fs.mkdir(outputDir, { recursive: true });

	return execPython<StemSeparationResult>("separate", {
	input_path: inputPath,
	output_dir: outputDir,
	model,
	});
	}

	/**
	* Generate audio fingerprint using Chromaprint
	*/
	export async function generateFingerprint(
	audioPath: string
	): Promise<FingerprintResult> {
	return execPython<FingerprintResult>("fingerprint", {
	audio_path: audioPath,
	}, 120000); // 2 minute timeout for fingerprinting
	}

	/**
	* Generate audio embedding using CLAP
	*/
	export async function generateEmbedding(
	audioPath: string,
	model: string = "laion/larger_clap_music"
	): Promise<EmbeddingResult> {
	return execPython<EmbeddingResult>("embed", {
	audio_path: audioPath,
	model,
	}, 300000); // 5 minute timeout for embedding
	}

	export interface ChunkEmbedding {
	start_time: number;
	end_time: number;
	embedding: number[];
	dimension: number;
	}

	export interface ChunkEmbeddingsResult {
	success: boolean;
	chunks?: ChunkEmbedding[];
	total_duration?: number;
	chunk_count?: number;
	error?: string;
	}

	/**
	* Generate chunk-based embeddings for an audio file
	* This splits the audio into overlapping windows and generates
	* an embedding for each chunk, enabling section-level matching.
	*/
	export async function generateChunkEmbeddings(
	audioPath: string,
	chunkDuration: number = 10.0,
	chunkOverlap: number = 5.0,
	model: string = "laion/larger_clap_music"
	): Promise<ChunkEmbeddingsResult> {
	return execPython<ChunkEmbeddingsResult>("embed_chunks", {
	audio_path: audioPath,
	chunk_duration: chunkDuration,
	chunk_overlap: chunkOverlap,
	model,
	}, 600000); // 10 minute timeout for chunk embedding (longer audio)
	}

	/**
	* Process audio through full pipeline: separate -> fingerprint -> embed
	*/
	export async function processFullPipeline(
	inputPath: string,
	outputDir: string
	): Promise<ProcessAllResult> {
	return execPython<ProcessAllResult>("process_all", {
	input_path: inputPath,
	output_dir: outputDir,
	}, 900000); // 15 minute timeout for full pipeline
	}

	/**
	* Check if Python ML environment is available
	*/
	export async function isPythonAvailable(): Promise<boolean> {
	return new Promise((resolve) => {
	const pythonCmd = process.platform === "win32" ? "python" : "python3";
	const proc = spawn(pythonCmd, ["--version"]);

	proc.on("error", () => resolve(false));
	proc.on("close", (code) => resolve(code === 0));
	});
	}

	/**
	* Check if processor.py exists
	*/
	export async function isProcessorAvailable(): Promise<boolean> {
	try {
	await fs.access(PROCESSOR_PATH);
	return true;
	} catch {
	return false;
	}
	}

	/**
	* Generic call to Python processor for any operation
	* Used for FAISS operations and other extensible functionality
	*/
	export async function callPythonProcessor<T = Record<string, unknown>>(
	operation: string,
	args: Record<string, unknown>,
	timeoutMs: number = 60000
	): Promise<T> {
	return execPython<T>(operation, args, timeoutMs);
	}

	// ============== Fingerprint-based matching (Chromaprint) ==============

	export interface ChunkFingerprint {
	start_time: number;
	end_time: number;
	fingerprint: string;
	}

	export interface ChunkFingerprintsResult {
	success: boolean;
	chunks?: ChunkFingerprint[];
	total_duration?: number;
	chunk_count?: number;
	error?: string;
	}

	/**
	* Generate fingerprints for audio chunks
	* Unlike CLAP embeddings, Chromaprint fingerprints give:
	* - 100% match for same audio
	* - ~2-3% match for different audio
	*/
	export async function generateChunkFingerprints(
	audioPath: string,
	chunkDuration: number = 10.0,
	chunkOverlap: number = 5.0
	): Promise<ChunkFingerprintsResult> {
	return execPython<ChunkFingerprintsResult>("fingerprint_chunks", {
	audio_path: audioPath,
	chunk_duration: chunkDuration,
	chunk_overlap: chunkOverlap,
	}, 600000);
	}

	export interface FingerprintMatch {
	score: number;
	trackId: string;
	stemType?: string;
	title: string;
	artist: string;
	startTime?: number;
	endTime?: number;
	}

	export interface FingerprintSearchResult {
	matches: FingerprintMatch[];
	message?: string;
	}

	/**
	* Search fingerprint index for matches
	*/
	export async function searchFingerprints(
	fingerprint: string,
	k: number = 5,
	threshold: number = 0.3
	): Promise<FingerprintSearchResult> {
	return execPython<FingerprintSearchResult>("fp_search", {
	fingerprint,
	k,
	threshold,
	}, 30000);
	}

	export interface FingerprintIndexStats {
	exists: boolean;
	total: number;
	uniqueTracks: number;
	}

	/**
	* Get fingerprint index statistics
	*/
	export async function getFingerprintStats(): Promise<FingerprintIndexStats> {
	return execPython<FingerprintIndexStats>("fp_stats", {}, 10000);
	}

	// ============== Style-based similarity ==============

	export interface StyleFeatures {
	success: boolean;
	feature_vector?: number[];
	dimension?: number;
	tempo?: number;
	error?: string;
	}

	export interface StyleMatch {
	score: number;
	trackId: string;
	title: string;
	artist: string;
	}

	export interface StyleSearchResult {
	matches: StyleMatch[];
	}

	/**
	* Extract musical style features from audio
	*/
	export async function extractStyleFeatures(
	audioPath: string,
	duration?: number
	): Promise<StyleFeatures> {
	return execPython<StyleFeatures>("style_extract", {
	audio_path: audioPath,
	duration,
	}, 120000);
	}

	export interface StyleChunk {
	start_time: number;
	end_time: number;
	feature_vector: number[];
	}

	export interface StyleChunksResult {
	success: boolean;
	total_duration?: number;
	chunk_count?: number;
	chunks?: StyleChunk[];
	error?: string;
	}

	/**
	* Extract chunk-level style features for granular matching
	*/
	export async function extractStyleChunks(
	audioPath: string,
	chunkDuration: number = 10.0,
	chunkOverlap: number = 5.0
	): Promise<StyleChunksResult> {
	return execPython<StyleChunksResult>("style_chunks", {
	audio_path: audioPath,
	chunk_duration: chunkDuration,
	chunk_overlap: chunkOverlap,
	}, 300000);
	}

	/**
	* Search for tracks with similar musical style
	*/
	export async function searchStyleSimilar(
	features: number[],
	k: number = 5,
	threshold: number = 0.85
	): Promise<StyleSearchResult> {
	return execPython<StyleSearchResult>("style_search", {
	features,
	k,
	threshold,
	}, 30000);
	}

	export interface StyleIndexStats {
	exists: boolean;
	total: number;
	uniqueTracks: number;
	}

	/**
	* Get style index statistics
	*/
	export async function getStyleStats(): Promise<StyleIndexStats> {
	return execPython<StyleIndexStats>("style_stats", {}, 10000);
	}

	// ============== MERT (Music-specific embeddings) ==============

	export interface MertChunk {
	start_time: number;
	end_time: number;
	embedding: number[];
	}

	export interface MertChunksResult {
	success: boolean;
	total_duration?: number;
	chunk_count?: number;
	chunks?: MertChunk[];
	error?: string;
	}

	/**
	* Extract MERT chunk embeddings for music-specific similarity
	* MERT gives much better discrimination than generic audio features
	*/
	export async function extractMertChunks(
	audioPath: string,
	chunkDuration: number = 10.0,
	chunkOverlap: number = 5.0
	): Promise<MertChunksResult> {
	return execPython<MertChunksResult>("mert_chunks", {
	audio_path: audioPath,
	chunk_duration: chunkDuration,
	chunk_overlap: chunkOverlap,
	}, 600000); // 10 min timeout
	}

	export interface MertMatch {
	score: number;
	trackId: string;
	title: string;
	artist: string;
	startTime?: number;
	endTime?: number;
	}

	export interface MertSearchResult {
	matches: MertMatch[];
	}

	/**
	* Search MERT index for similar music
	* @param percentile If set (0-100), use dynamic threshold at this percentile
	*/
	export async function searchMertSimilar(
	embedding: number[],
	k: number = 5,
	threshold?: number,
	percentile?: number
	): Promise<MertSearchResult & { threshold_used?: number }> {
	return execPython<MertSearchResult & { threshold_used?: number }>("mert_search", {
	embedding,
	k,
	threshold: threshold ?? 0.75,
	percentile,
	}, 30000);
	}

	export interface MertIndexStats {
	exists: boolean;
	total: number;
	uniqueTracks: number;
	}

	/**
	* Get MERT index statistics
	*/
	export async function getMertStats(): Promise<MertIndexStats> {
	return execPython<MertIndexStats>("mert_stats", {}, 10000);
	}