aimusic-attribution / server /audioProcessing.ts
emresar's picture
Upload folder using huggingface_hub
6678fa1 verified
/**
* Audio Processing Service
*
* Handles stem separation, fingerprinting, and attribution.
* Uses real ML models (Demucs, Chromaprint, CLAP) when available,
* falls back to simulation when ML dependencies aren't installed.
*/
import { nanoid } from "nanoid";
import path from "path";
import fs from "fs/promises";
import * as mlBridge from "./mlBridge";
// Cache ML availability check
let _mlAvailable: boolean | null = null;
let _mlHealth: mlBridge.HealthCheckResult | null = null;
export interface StemSeparationResult {
stems: Array<{
type: "vocals" | "drums" | "bass" | "other";
fileKey: string;
fileUrl: string;
duration: number;
}>;
usingRealML?: boolean;
}
export interface FingerprintResult {
algorithm: string;
fingerprintData: string;
version: string;
usingRealML?: boolean;
}
export interface EmbeddingResult {
model: string;
vector: number[];
dimension: number;
usingRealML?: boolean;
}
export interface AttributionMatch {
trainingTrackId: number;
trainingStemId: number | null;
method: string;
score: number;
confidence: number;
metadata?: Record<string, unknown>;
}
/**
* Check if ML processing is available
*/
export async function isMLAvailable(): Promise<boolean> {
if (_mlAvailable !== null) {
return _mlAvailable;
}
try {
const pythonAvailable = await mlBridge.isPythonAvailable();
const processorAvailable = await mlBridge.isProcessorAvailable();
if (!pythonAvailable || !processorAvailable) {
_mlAvailable = false;
return false;
}
// Check actual ML health
_mlHealth = await mlBridge.checkMLHealth();
_mlAvailable = _mlHealth.success;
if (_mlAvailable) {
console.log("[AudioProcessing] ML models available:", {
demucs: _mlHealth.demucs,
chromaprint: _mlHealth.chromaprint,
clap: _mlHealth.clap,
});
} else {
console.log("[AudioProcessing] ML models not fully available:", _mlHealth.errors);
}
return _mlAvailable;
} catch (error) {
console.log("[AudioProcessing] ML check failed, using simulation:", error);
_mlAvailable = false;
return false;
}
}
/**
* Get ML health status
*/
export async function getMLHealth(): Promise<mlBridge.HealthCheckResult | null> {
await isMLAvailable();
return _mlHealth;
}
/**
* Separate audio into stems
* Uses Demucs if available, otherwise simulates
*/
export async function separateStems(
trackId: number,
inputFilePath: string,
outputDir: string
): Promise<StemSeparationResult> {
const mlAvailable = await isMLAvailable();
if (mlAvailable && _mlHealth?.demucs) {
try {
console.log(`[AudioProcessing] Separating stems with Demucs for track ${trackId}`);
const result = await mlBridge.separateStems(inputFilePath, outputDir);
if (result.success && result.stems) {
// Convert ML result to our format
const stems = result.stems.map(stem => ({
type: stem.type as "vocals" | "drums" | "bass" | "other",
fileKey: `tracks/${trackId}/stems/${stem.type}-${nanoid(8)}.wav`,
fileUrl: stem.path, // Local path for now, would upload to S3 in production
duration: stem.duration || 0,
}));
return { stems, usingRealML: true };
}
console.warn(`[AudioProcessing] Demucs failed, falling back to simulation:`, result.error);
} catch (error) {
console.warn(`[AudioProcessing] Demucs error, falling back to simulation:`, error);
}
}
// Fallback to simulation
return simulateStemSeparation(trackId, inputFilePath, 180);
}
/**
* Generate audio fingerprint
* Uses Chromaprint if available, otherwise simulates
*/
export async function generateFingerprint(
stemId: number,
audioPath: string,
stemType: string
): Promise<FingerprintResult> {
const mlAvailable = await isMLAvailable();
if (mlAvailable && _mlHealth?.chromaprint) {
try {
console.log(`[AudioProcessing] Generating fingerprint with Chromaprint for stem ${stemId}`);
const result = await mlBridge.generateFingerprint(audioPath);
if (result.success && result.fingerprint) {
return {
algorithm: result.algorithm || "chromaprint",
fingerprintData: result.fingerprint,
version: result.version || "1.5",
usingRealML: true,
};
}
console.warn(`[AudioProcessing] Chromaprint failed, falling back to simulation:`, result.error);
} catch (error) {
console.warn(`[AudioProcessing] Chromaprint error, falling back to simulation:`, error);
}
}
// Fallback to simulation
return simulateFingerprinting(stemId, stemType);
}
/**
* Generate audio embedding
* Uses CLAP if available, otherwise simulates
*/
export async function generateEmbedding(
stemId: number,
audioPath: string,
stemType: string
): Promise<EmbeddingResult> {
const mlAvailable = await isMLAvailable();
if (mlAvailable && _mlHealth?.clap) {
try {
console.log(`[AudioProcessing] Generating embedding with CLAP for stem ${stemId}`);
const result = await mlBridge.generateEmbedding(audioPath);
if (result.success && result.embedding) {
return {
model: result.model || "laion-clap",
vector: result.embedding,
dimension: result.dimension || result.embedding.length,
usingRealML: true,
};
}
console.warn(`[AudioProcessing] CLAP failed, falling back to simulation:`, result.error);
} catch (error) {
console.warn(`[AudioProcessing] CLAP error, falling back to simulation:`, error);
}
}
// Fallback to simulation
return simulateEmbeddingGeneration(stemId, stemType);
}
// ============================================================================
// Simulation functions (used when ML not available or as fallback)
// ============================================================================
/**
* Simulate stem separation using Demucs
*/
export async function simulateStemSeparation(
trackId: number,
originalFileUrl: string,
duration: number
): Promise<StemSeparationResult> {
// Simulate processing delay
await new Promise(resolve => setTimeout(resolve, 1000));
const stemTypes: Array<"vocals" | "drums" | "bass" | "other"> = ["vocals", "drums", "bass", "other"];
const stems = stemTypes.map(type => {
const fileKey = `tracks/${trackId}/stems/${type}-${nanoid(8)}.wav`;
// In simulation mode, these are placeholder URLs
const fileUrl = `https://storage.example.com/${fileKey}`;
return {
type,
fileKey,
fileUrl,
duration,
};
});
return { stems, usingRealML: false };
}
/**
* Simulate audio fingerprinting using Chromaprint
*/
export async function simulateFingerprinting(
stemId: number,
stemType: string
): Promise<FingerprintResult> {
// Simulate processing delay
await new Promise(resolve => setTimeout(resolve, 200));
// Generate a mock fingerprint hash
const mockHash = Array.from({ length: 40 }, () =>
Math.floor(Math.random() * 16).toString(16)
).join('');
return {
algorithm: "chromaprint",
fingerprintData: mockHash,
version: "1.5.1-simulated",
usingRealML: false,
};
}
/**
* Simulate embedding generation using CLAP
*/
export async function simulateEmbeddingGeneration(
stemId: number,
stemType: string
): Promise<EmbeddingResult> {
// Simulate processing delay
await new Promise(resolve => setTimeout(resolve, 300));
// Generate a mock 512-dimensional embedding vector
const dimension = 512;
const vector = Array.from({ length: dimension }, () =>
Math.random() * 2 - 1 // Random values between -1 and 1
);
return {
model: "clap-htsat-fused-simulated",
vector,
dimension,
usingRealML: false,
};
}
/**
* Search FAISS index for similar embeddings
*/
export async function searchFAISS(
embedding: number[],
k: number = 10,
threshold: number = 0.5
): Promise<Array<{
score: number;
trackId: string;
stemId?: string;
stemType?: string;
title: string;
artist: string;
}>> {
try {
const result = await mlBridge.callPythonProcessor("faiss_search", {
embedding,
k,
threshold,
});
if (result.matches) {
return result.matches;
}
return [];
} catch (error) {
console.warn("[AudioProcessing] FAISS search failed:", error);
return [];
}
}
/**
* Add embeddings to FAISS index
*/
export async function addToFAISS(
embeddings: Array<{
trackId: string | number;
stemId?: number;
stemType?: string;
embedding: number[];
title: string;
artist: string;
}>
): Promise<{ success: boolean; added: number; total: number }> {
try {
const result = await mlBridge.callPythonProcessor("faiss_add", {
embeddings,
});
return {
success: result.success || false,
added: result.added || 0,
total: result.total || 0,
};
} catch (error) {
console.warn("[AudioProcessing] FAISS add failed:", error);
return { success: false, added: 0, total: 0 };
}
}
/**
* Get FAISS index statistics
*/
export async function getFAISSStats(): Promise<{
exists: boolean;
total: number;
dimension: number;
uniqueTracks?: number;
}> {
try {
const result = await mlBridge.callPythonProcessor("faiss_stats", {});
return {
exists: result.exists || false,
total: result.total || 0,
dimension: result.dimension || 512,
uniqueTracks: result.uniqueTracks,
};
} catch (error) {
console.warn("[AudioProcessing] FAISS stats failed:", error);
return { exists: false, total: 0, dimension: 512 };
}
}
/**
* Perform attribution matching
* Uses FAISS for real embedding-based matching when available,
* falls back to database matching or simulation
*/
export async function performAttribution(
aiTrackId: number,
aiStems: Array<{ id: number; stemType: string; embedding?: number[] }>,
trainingTracks: Array<{ id: number; title: string; artist: string | null; embedding?: number[] }>
): Promise<AttributionMatch[]> {
const hasRealEmbeddings = aiStems.some(s => s.embedding && s.embedding.length > 0);
if (hasRealEmbeddings) {
// Check if FAISS index exists and has data
const faissStats = await getFAISSStats();
if (faissStats.exists && faissStats.total > 0) {
console.log(`[AudioProcessing] Using FAISS index with ${faissStats.total} embeddings`);
return performFAISSAttribution(aiTrackId, aiStems);
}
// Fall back to in-memory cosine similarity if we have training embeddings
if (trainingTracks.some(t => t.embedding && t.embedding.length > 0)) {
console.log("[AudioProcessing] Using in-memory cosine similarity");
return performCosineAttribution(aiTrackId, aiStems, trainingTracks);
}
}
// No real embeddings available, use simulation
console.log("[AudioProcessing] Using simulated attribution (no real embeddings)");
return simulateAttribution(aiTrackId, aiStems, trainingTracks);
}
/**
* Perform attribution using FAISS vector search
*/
async function performFAISSAttribution(
aiTrackId: number,
aiStems: Array<{ id: number; stemType: string; embedding?: number[] }>
): Promise<AttributionMatch[]> {
const matches: AttributionMatch[] = [];
for (const stem of aiStems) {
if (!stem.embedding || stem.embedding.length === 0) continue;
// Search FAISS for similar embeddings (z-score threshold of 1 = 1 std above baseline)
const faissMatches = await searchFAISS(stem.embedding, 5, 1.0);
for (let i = 0; i < faissMatches.length; i++) {
const match = faissMatches[i];
if (!match) continue;
matches.push({
trainingTrackId: typeof match.trackId === "string" ? parseInt(match.trackId) || 0 : match.trackId,
trainingStemId: match.stemId ? (typeof match.stemId === "string" ? parseInt(match.stemId) : match.stemId) : null,
method: "embedding",
score: match.score,
confidence: match.score * 0.95, // Slightly lower confidence than raw score
metadata: {
stemType: stem.stemType,
matchedStemType: match.stemType,
matchRank: i + 1,
algorithmVersion: "1.0.0",
method: "faiss",
matchedTitle: match.title,
matchedArtist: match.artist,
},
});
}
}
// Sort by score descending and deduplicate
return deduplicateMatches(matches.sort((a, b) => b.score - a.score));
}
/**
* Perform attribution using in-memory cosine similarity
*/
function performCosineAttribution(
aiTrackId: number,
aiStems: Array<{ id: number; stemType: string; embedding?: number[] }>,
trainingTracks: Array<{ id: number; title: string; artist: string | null; embedding?: number[] }>
): AttributionMatch[] {
const matches: AttributionMatch[] = [];
for (const stem of aiStems) {
if (!stem.embedding || stem.embedding.length === 0) continue;
// Calculate cosine similarity with all training embeddings
const similarities: Array<{ trackId: number; score: number; title: string; artist: string | null }> = [];
for (const training of trainingTracks) {
if (!training.embedding || training.embedding.length === 0) continue;
const score = cosineSimilarity(stem.embedding, training.embedding);
similarities.push({
trackId: training.id,
score,
title: training.title,
artist: training.artist,
});
}
// Sort by similarity and take top 5
similarities.sort((a, b) => b.score - a.score);
const topMatches = similarities.slice(0, 5).filter(s => s.score > 0.5);
for (let i = 0; i < topMatches.length; i++) {
const match = topMatches[i];
if (!match) continue;
matches.push({
trainingTrackId: match.trackId,
trainingStemId: null,
method: "embedding",
score: match.score,
confidence: match.score * 0.9,
metadata: {
stemType: stem.stemType,
matchRank: i + 1,
algorithmVersion: "1.0.0",
method: "cosine",
matchedTitle: match.title,
matchedArtist: match.artist,
},
});
}
}
return deduplicateMatches(matches.sort((a, b) => b.score - a.score));
}
/**
* Calculate cosine similarity between two vectors
*/
function cosineSimilarity(a: number[], b: number[]): number {
if (a.length !== b.length) return 0;
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
normA = Math.sqrt(normA);
normB = Math.sqrt(normB);
if (normA === 0 || normB === 0) return 0;
return dotProduct / (normA * normB);
}
/**
* Deduplicate matches keeping highest score per training track
*/
function deduplicateMatches(matches: AttributionMatch[]): AttributionMatch[] {
const seen = new Map<number, AttributionMatch>();
for (const match of matches) {
const existing = seen.get(match.trainingTrackId);
if (!existing || match.score > existing.score) {
seen.set(match.trainingTrackId, match);
}
}
return Array.from(seen.values()).sort((a, b) => b.score - a.score);
}
/**
* Simulate attribution matching
*/
export async function simulateAttribution(
aiTrackId: number,
aiStems: Array<{ id: number; stemType: string }>,
trainingTracks: Array<{ id: number; title: string; artist: string | null }>
): Promise<AttributionMatch[]> {
// Simulate processing delay
await new Promise(resolve => setTimeout(resolve, 1500));
const matches: AttributionMatch[] = [];
// For each AI stem, find top 3 matching training tracks
for (const aiStem of aiStems) {
// Randomly select 2-3 training tracks as matches
const numMatches = Math.floor(Math.random() * 2) + 2; // 2 or 3 matches
const selectedTracks = trainingTracks
.sort(() => Math.random() - 0.5)
.slice(0, numMatches);
for (let i = 0; i < selectedTracks.length; i++) {
const track = selectedTracks[i];
if (!track) continue;
// Generate realistic scores (higher for first match, lower for subsequent)
const baseScore = 0.95 - (i * 0.15) - (Math.random() * 0.1);
const score = Math.max(0.5, Math.min(1.0, baseScore));
const confidence = score * (0.85 + Math.random() * 0.15);
matches.push({
trainingTrackId: track.id,
trainingStemId: null,
method: i === 0 ? "fingerprint" : "embedding",
score,
confidence,
metadata: {
stemType: aiStem.stemType,
matchRank: i + 1,
algorithmVersion: "1.0.0",
simulated: true,
},
});
}
}
// Sort by score descending
return matches.sort((a, b) => b.score - a.score);
}
/**
* Generate mock training data for demonstration
*/
export function generateMockTrainingTracks() {
const artists = [
"The Beatles", "Led Zeppelin", "Pink Floyd", "Queen", "The Rolling Stones",
"Nirvana", "Radiohead", "Daft Punk", "Kendrick Lamar", "Taylor Swift",
"Billie Eilish", "The Weeknd", "Drake", "Adele", "Ed Sheeran"
];
const songTitles = [
"Electric Dreams", "Midnight Echoes", "Neon Lights", "Digital Soul",
"Synthetic Love", "Binary Beats", "Virtual Reality", "Cyber Groove",
"Quantum Melody", "Neural Network", "Algorithm Blues", "Data Stream",
"Cloud Nine", "Pixel Perfect", "Code Symphony"
];
return artists.map((artist, i) => ({
title: songTitles[i] || `Track ${i + 1}`,
artist,
trackType: "training_reference" as const,
fileKey: `training/${nanoid(16)}.mp3`,
fileUrl: `https://storage.example.com/training/${nanoid(16)}.mp3`,
fileSize: Math.floor(Math.random() * 5000000) + 2000000, // 2-7 MB
mimeType: "audio/mpeg",
duration: Math.floor(Math.random() * 180) + 120, // 2-5 minutes
status: "completed" as const,
}));
}