Spaces:

emresar
/

aimusic-attribution

Running on CPU Upgrade

App Files Files Community

aimusic-attribution / server /audioProcessing.ts

emresar

Upload folder using huggingface_hub

6678fa1 verified 29 days ago

raw

history blame contribute delete

18.1 kB

	/**
	* Audio Processing Service
	*
	* Handles stem separation, fingerprinting, and attribution.
	* Uses real ML models (Demucs, Chromaprint, CLAP) when available,
	* falls back to simulation when ML dependencies aren't installed.
	*/

	import { nanoid } from "nanoid";
	import path from "path";
	import fs from "fs/promises";
	import * as mlBridge from "./mlBridge";

	// Cache ML availability check
	let _mlAvailable: boolean \| null = null;
	let _mlHealth: mlBridge.HealthCheckResult \| null = null;

	export interface StemSeparationResult {
	stems: Array<{
	type: "vocals" \| "drums" \| "bass" \| "other";
	fileKey: string;
	fileUrl: string;
	duration: number;
	}>;
	usingRealML?: boolean;
	}

	export interface FingerprintResult {
	algorithm: string;
	fingerprintData: string;
	version: string;
	usingRealML?: boolean;
	}

	export interface EmbeddingResult {
	model: string;
	vector: number[];
	dimension: number;
	usingRealML?: boolean;
	}

	export interface AttributionMatch {
	trainingTrackId: number;
	trainingStemId: number \| null;
	method: string;
	score: number;
	confidence: number;
	metadata?: Record<string, unknown>;
	}

	/**
	* Check if ML processing is available
	*/
	export async function isMLAvailable(): Promise<boolean> {
	if (_mlAvailable !== null) {
	return _mlAvailable;
	}

	try {
	const pythonAvailable = await mlBridge.isPythonAvailable();
	const processorAvailable = await mlBridge.isProcessorAvailable();

	if (!pythonAvailable \|\| !processorAvailable) {
	_mlAvailable = false;
	return false;
	}

	// Check actual ML health
	_mlHealth = await mlBridge.checkMLHealth();
	_mlAvailable = _mlHealth.success;

	if (_mlAvailable) {
	console.log("[AudioProcessing] ML models available:", {
	demucs: _mlHealth.demucs,
	chromaprint: _mlHealth.chromaprint,
	clap: _mlHealth.clap,
	});
	} else {
	console.log("[AudioProcessing] ML models not fully available:", _mlHealth.errors);
	}

	return _mlAvailable;
	} catch (error) {
	console.log("[AudioProcessing] ML check failed, using simulation:", error);
	_mlAvailable = false;
	return false;
	}
	}

	/**
	* Get ML health status
	*/
	export async function getMLHealth(): Promise<mlBridge.HealthCheckResult \| null> {
	await isMLAvailable();
	return _mlHealth;
	}

	/**
	* Separate audio into stems
	* Uses Demucs if available, otherwise simulates
	*/
	export async function separateStems(
	trackId: number,
	inputFilePath: string,
	outputDir: string
	): Promise<StemSeparationResult> {
	const mlAvailable = await isMLAvailable();

	if (mlAvailable && _mlHealth?.demucs) {
	try {
	console.log(`[AudioProcessing] Separating stems with Demucs for track ${trackId}`);

	const result = await mlBridge.separateStems(inputFilePath, outputDir);

	if (result.success && result.stems) {
	// Convert ML result to our format
	const stems = result.stems.map(stem => ({
	type: stem.type as "vocals" \| "drums" \| "bass" \| "other",
	fileKey: `tracks/${trackId}/stems/${stem.type}-${nanoid(8)}.wav`,
	fileUrl: stem.path, // Local path for now, would upload to S3 in production
	duration: stem.duration \|\| 0,
	}));

	return { stems, usingRealML: true };
	}

	console.warn(`[AudioProcessing] Demucs failed, falling back to simulation:`, result.error);
	} catch (error) {
	console.warn(`[AudioProcessing] Demucs error, falling back to simulation:`, error);
	}
	}

	// Fallback to simulation
	return simulateStemSeparation(trackId, inputFilePath, 180);
	}

	/**
	* Generate audio fingerprint
	* Uses Chromaprint if available, otherwise simulates
	*/
	export async function generateFingerprint(
	stemId: number,
	audioPath: string,
	stemType: string
	): Promise<FingerprintResult> {
	const mlAvailable = await isMLAvailable();

	if (mlAvailable && _mlHealth?.chromaprint) {
	try {
	console.log(`[AudioProcessing] Generating fingerprint with Chromaprint for stem ${stemId}`);

	const result = await mlBridge.generateFingerprint(audioPath);

	if (result.success && result.fingerprint) {
	return {
	algorithm: result.algorithm \|\| "chromaprint",
	fingerprintData: result.fingerprint,
	version: result.version \|\| "1.5",
	usingRealML: true,
	};
	}

	console.warn(`[AudioProcessing] Chromaprint failed, falling back to simulation:`, result.error);
	} catch (error) {
	console.warn(`[AudioProcessing] Chromaprint error, falling back to simulation:`, error);
	}
	}

	// Fallback to simulation
	return simulateFingerprinting(stemId, stemType);
	}

	/**
	* Generate audio embedding
	* Uses CLAP if available, otherwise simulates
	*/
	export async function generateEmbedding(
	stemId: number,
	audioPath: string,
	stemType: string
	): Promise<EmbeddingResult> {
	const mlAvailable = await isMLAvailable();

	if (mlAvailable && _mlHealth?.clap) {
	try {
	console.log(`[AudioProcessing] Generating embedding with CLAP for stem ${stemId}`);

	const result = await mlBridge.generateEmbedding(audioPath);

	if (result.success && result.embedding) {
	return {
	model: result.model \|\| "laion-clap",
	vector: result.embedding,
	dimension: result.dimension \|\| result.embedding.length,
	usingRealML: true,
	};
	}

	console.warn(`[AudioProcessing] CLAP failed, falling back to simulation:`, result.error);
	} catch (error) {
	console.warn(`[AudioProcessing] CLAP error, falling back to simulation:`, error);
	}
	}

	// Fallback to simulation
	return simulateEmbeddingGeneration(stemId, stemType);
	}

	// ============================================================================
	// Simulation functions (used when ML not available or as fallback)
	// ============================================================================

	/**
	* Simulate stem separation using Demucs
	*/
	export async function simulateStemSeparation(
	trackId: number,
	originalFileUrl: string,
	duration: number
	): Promise<StemSeparationResult> {
	// Simulate processing delay
	await new Promise(resolve => setTimeout(resolve, 1000));

	const stemTypes: Array<"vocals" \| "drums" \| "bass" \| "other"> = ["vocals", "drums", "bass", "other"];

	const stems = stemTypes.map(type => {
	const fileKey = `tracks/${trackId}/stems/${type}-${nanoid(8)}.wav`;
	// In simulation mode, these are placeholder URLs
	const fileUrl = `https://storage.example.com/${fileKey}`;

	return {
	type,
	fileKey,
	fileUrl,
	duration,
	};
	});

	return { stems, usingRealML: false };
	}

	/**
	* Simulate audio fingerprinting using Chromaprint
	*/
	export async function simulateFingerprinting(
	stemId: number,
	stemType: string
	): Promise<FingerprintResult> {
	// Simulate processing delay
	await new Promise(resolve => setTimeout(resolve, 200));

	// Generate a mock fingerprint hash
	const mockHash = Array.from({ length: 40 }, () =>
	Math.floor(Math.random() * 16).toString(16)
	).join('');

	return {
	algorithm: "chromaprint",
	fingerprintData: mockHash,
	version: "1.5.1-simulated",
	usingRealML: false,
	};
	}

	/**
	* Simulate embedding generation using CLAP
	*/
	export async function simulateEmbeddingGeneration(
	stemId: number,
	stemType: string
	): Promise<EmbeddingResult> {
	// Simulate processing delay
	await new Promise(resolve => setTimeout(resolve, 300));

	// Generate a mock 512-dimensional embedding vector
	const dimension = 512;
	const vector = Array.from({ length: dimension }, () =>
	Math.random() * 2 - 1 // Random values between -1 and 1
	);

	return {
	model: "clap-htsat-fused-simulated",
	vector,
	dimension,
	usingRealML: false,
	};
	}

	/**
	* Search FAISS index for similar embeddings
	*/
	export async function searchFAISS(
	embedding: number[],
	k: number = 10,
	threshold: number = 0.5
	): Promise<Array<{
	score: number;
	trackId: string;
	stemId?: string;
	stemType?: string;
	title: string;
	artist: string;
	}>> {
	try {
	const result = await mlBridge.callPythonProcessor("faiss_search", {
	embedding,
	k,
	threshold,
	});

	if (result.matches) {
	return result.matches;
	}

	return [];
	} catch (error) {
	console.warn("[AudioProcessing] FAISS search failed:", error);
	return [];
	}
	}

	/**
	* Add embeddings to FAISS index
	*/
	export async function addToFAISS(
	embeddings: Array<{
	trackId: string \| number;
	stemId?: number;
	stemType?: string;
	embedding: number[];
	title: string;
	artist: string;
	}>
	): Promise<{ success: boolean; added: number; total: number }> {
	try {
	const result = await mlBridge.callPythonProcessor("faiss_add", {
	embeddings,
	});

	return {
	success: result.success \|\| false,
	added: result.added \|\| 0,
	total: result.total \|\| 0,
	};
	} catch (error) {
	console.warn("[AudioProcessing] FAISS add failed:", error);
	return { success: false, added: 0, total: 0 };
	}
	}

	/**
	* Get FAISS index statistics
	*/
	export async function getFAISSStats(): Promise<{
	exists: boolean;
	total: number;
	dimension: number;
	uniqueTracks?: number;
	}> {
	try {
	const result = await mlBridge.callPythonProcessor("faiss_stats", {});
	return {
	exists: result.exists \|\| false,
	total: result.total \|\| 0,
	dimension: result.dimension \|\| 512,
	uniqueTracks: result.uniqueTracks,
	};
	} catch (error) {
	console.warn("[AudioProcessing] FAISS stats failed:", error);
	return { exists: false, total: 0, dimension: 512 };
	}
	}

	/**
	* Perform attribution matching
	* Uses FAISS for real embedding-based matching when available,
	* falls back to database matching or simulation
	*/
	export async function performAttribution(
	aiTrackId: number,
	aiStems: Array<{ id: number; stemType: string; embedding?: number[] }>,
	trainingTracks: Array<{ id: number; title: string; artist: string \| null; embedding?: number[] }>
	): Promise<AttributionMatch[]> {
	const hasRealEmbeddings = aiStems.some(s => s.embedding && s.embedding.length > 0);

	if (hasRealEmbeddings) {
	// Check if FAISS index exists and has data
	const faissStats = await getFAISSStats();

	if (faissStats.exists && faissStats.total > 0) {
	console.log(`[AudioProcessing] Using FAISS index with ${faissStats.total} embeddings`);
	return performFAISSAttribution(aiTrackId, aiStems);
	}

	// Fall back to in-memory cosine similarity if we have training embeddings
	if (trainingTracks.some(t => t.embedding && t.embedding.length > 0)) {
	console.log("[AudioProcessing] Using in-memory cosine similarity");
	return performCosineAttribution(aiTrackId, aiStems, trainingTracks);
	}
	}

	// No real embeddings available, use simulation
	console.log("[AudioProcessing] Using simulated attribution (no real embeddings)");
	return simulateAttribution(aiTrackId, aiStems, trainingTracks);
	}

	/**
	* Perform attribution using FAISS vector search
	*/
	async function performFAISSAttribution(
	aiTrackId: number,
	aiStems: Array<{ id: number; stemType: string; embedding?: number[] }>
	): Promise<AttributionMatch[]> {
	const matches: AttributionMatch[] = [];

	for (const stem of aiStems) {
	if (!stem.embedding \|\| stem.embedding.length === 0) continue;

	// Search FAISS for similar embeddings (z-score threshold of 1 = 1 std above baseline)
	const faissMatches = await searchFAISS(stem.embedding, 5, 1.0);

	for (let i = 0; i < faissMatches.length; i++) {
	const match = faissMatches[i];
	if (!match) continue;

	matches.push({
	trainingTrackId: typeof match.trackId === "string" ? parseInt(match.trackId) \|\| 0 : match.trackId,
	trainingStemId: match.stemId ? (typeof match.stemId === "string" ? parseInt(match.stemId) : match.stemId) : null,
	method: "embedding",
	score: match.score,
	confidence: match.score * 0.95, // Slightly lower confidence than raw score
	metadata: {
	stemType: stem.stemType,
	matchedStemType: match.stemType,
	matchRank: i + 1,
	algorithmVersion: "1.0.0",
	method: "faiss",
	matchedTitle: match.title,
	matchedArtist: match.artist,
	},
	});
	}
	}

	// Sort by score descending and deduplicate
	return deduplicateMatches(matches.sort((a, b) => b.score - a.score));
	}

	/**
	* Perform attribution using in-memory cosine similarity
	*/
	function performCosineAttribution(
	aiTrackId: number,
	aiStems: Array<{ id: number; stemType: string; embedding?: number[] }>,
	trainingTracks: Array<{ id: number; title: string; artist: string \| null; embedding?: number[] }>
	): AttributionMatch[] {
	const matches: AttributionMatch[] = [];

	for (const stem of aiStems) {
	if (!stem.embedding \|\| stem.embedding.length === 0) continue;

	// Calculate cosine similarity with all training embeddings
	const similarities: Array<{ trackId: number; score: number; title: string; artist: string \| null }> = [];

	for (const training of trainingTracks) {
	if (!training.embedding \|\| training.embedding.length === 0) continue;

	const score = cosineSimilarity(stem.embedding, training.embedding);
	similarities.push({
	trackId: training.id,
	score,
	title: training.title,
	artist: training.artist,
	});
	}

	// Sort by similarity and take top 5
	similarities.sort((a, b) => b.score - a.score);
	const topMatches = similarities.slice(0, 5).filter(s => s.score > 0.5);

	for (let i = 0; i < topMatches.length; i++) {
	const match = topMatches[i];
	if (!match) continue;

	matches.push({
	trainingTrackId: match.trackId,
	trainingStemId: null,
	method: "embedding",
	score: match.score,
	confidence: match.score * 0.9,
	metadata: {
	stemType: stem.stemType,
	matchRank: i + 1,
	algorithmVersion: "1.0.0",
	method: "cosine",
	matchedTitle: match.title,
	matchedArtist: match.artist,
	},
	});
	}
	}

	return deduplicateMatches(matches.sort((a, b) => b.score - a.score));
	}

	/**
	* Calculate cosine similarity between two vectors
	*/
	function cosineSimilarity(a: number[], b: number[]): number {
	if (a.length !== b.length) return 0;

	let dotProduct = 0;
	let normA = 0;
	let normB = 0;

	for (let i = 0; i < a.length; i++) {
	dotProduct += a[i] * b[i];
	normA += a[i] * a[i];
	normB += b[i] * b[i];
	}

	normA = Math.sqrt(normA);
	normB = Math.sqrt(normB);

	if (normA === 0 \|\| normB === 0) return 0;

	return dotProduct / (normA * normB);
	}

	/**
	* Deduplicate matches keeping highest score per training track
	*/
	function deduplicateMatches(matches: AttributionMatch[]): AttributionMatch[] {
	const seen = new Map<number, AttributionMatch>();

	for (const match of matches) {
	const existing = seen.get(match.trainingTrackId);
	if (!existing \|\| match.score > existing.score) {
	seen.set(match.trainingTrackId, match);
	}
	}

	return Array.from(seen.values()).sort((a, b) => b.score - a.score);
	}

	/**
	* Simulate attribution matching
	*/
	export async function simulateAttribution(
	aiTrackId: number,
	aiStems: Array<{ id: number; stemType: string }>,
	trainingTracks: Array<{ id: number; title: string; artist: string \| null }>
	): Promise<AttributionMatch[]> {
	// Simulate processing delay
	await new Promise(resolve => setTimeout(resolve, 1500));

	const matches: AttributionMatch[] = [];

	// For each AI stem, find top 3 matching training tracks
	for (const aiStem of aiStems) {
	// Randomly select 2-3 training tracks as matches
	const numMatches = Math.floor(Math.random() * 2) + 2; // 2 or 3 matches
	const selectedTracks = trainingTracks
	.sort(() => Math.random() - 0.5)
	.slice(0, numMatches);

	for (let i = 0; i < selectedTracks.length; i++) {
	const track = selectedTracks[i];
	if (!track) continue;

	// Generate realistic scores (higher for first match, lower for subsequent)
	const baseScore = 0.95 - (i * 0.15) - (Math.random() * 0.1);
	const score = Math.max(0.5, Math.min(1.0, baseScore));
	const confidence = score * (0.85 + Math.random() * 0.15);

	matches.push({
	trainingTrackId: track.id,
	trainingStemId: null,
	method: i === 0 ? "fingerprint" : "embedding",
	score,
	confidence,
	metadata: {
	stemType: aiStem.stemType,
	matchRank: i + 1,
	algorithmVersion: "1.0.0",
	simulated: true,
	},
	});
	}
	}

	// Sort by score descending
	return matches.sort((a, b) => b.score - a.score);
	}

	/**
	* Generate mock training data for demonstration
	*/
	export function generateMockTrainingTracks() {
	const artists = [
	"The Beatles", "Led Zeppelin", "Pink Floyd", "Queen", "The Rolling Stones",
	"Nirvana", "Radiohead", "Daft Punk", "Kendrick Lamar", "Taylor Swift",
	"Billie Eilish", "The Weeknd", "Drake", "Adele", "Ed Sheeran"
	];

	const songTitles = [
	"Electric Dreams", "Midnight Echoes", "Neon Lights", "Digital Soul",
	"Synthetic Love", "Binary Beats", "Virtual Reality", "Cyber Groove",
	"Quantum Melody", "Neural Network", "Algorithm Blues", "Data Stream",
	"Cloud Nine", "Pixel Perfect", "Code Symphony"
	];

	return artists.map((artist, i) => ({
	title: songTitles[i] \|\| `Track ${i + 1}`,
	artist,
	trackType: "training_reference" as const,
	fileKey: `training/${nanoid(16)}.mp3`,
	fileUrl: `https://storage.example.com/training/${nanoid(16)}.mp3`,
	fileSize: Math.floor(Math.random() * 5000000) + 2000000, // 2-7 MB
	mimeType: "audio/mpeg",
	duration: Math.floor(Math.random() * 180) + 120, // 2-5 minutes
	status: "completed" as const,
	}));
	}