/** * Audio Processing Service * * Handles stem separation, fingerprinting, and attribution. * Uses real ML models (Demucs, Chromaprint, CLAP) when available, * falls back to simulation when ML dependencies aren't installed. */ import { nanoid } from "nanoid"; import path from "path"; import fs from "fs/promises"; import * as mlBridge from "./mlBridge"; // Cache ML availability check let _mlAvailable: boolean | null = null; let _mlHealth: mlBridge.HealthCheckResult | null = null; export interface StemSeparationResult { stems: Array<{ type: "vocals" | "drums" | "bass" | "other"; fileKey: string; fileUrl: string; duration: number; }>; usingRealML?: boolean; } export interface FingerprintResult { algorithm: string; fingerprintData: string; version: string; usingRealML?: boolean; } export interface EmbeddingResult { model: string; vector: number[]; dimension: number; usingRealML?: boolean; } export interface AttributionMatch { trainingTrackId: number; trainingStemId: number | null; method: string; score: number; confidence: number; metadata?: Record; } /** * Check if ML processing is available */ export async function isMLAvailable(): Promise { if (_mlAvailable !== null) { return _mlAvailable; } try { const pythonAvailable = await mlBridge.isPythonAvailable(); const processorAvailable = await mlBridge.isProcessorAvailable(); if (!pythonAvailable || !processorAvailable) { _mlAvailable = false; return false; } // Check actual ML health _mlHealth = await mlBridge.checkMLHealth(); _mlAvailable = _mlHealth.success; if (_mlAvailable) { console.log("[AudioProcessing] ML models available:", { demucs: _mlHealth.demucs, chromaprint: _mlHealth.chromaprint, clap: _mlHealth.clap, }); } else { console.log("[AudioProcessing] ML models not fully available:", _mlHealth.errors); } return _mlAvailable; } catch (error) { console.log("[AudioProcessing] ML check failed, using simulation:", error); _mlAvailable = false; return false; } } /** * Get ML health status */ export async function getMLHealth(): Promise { await isMLAvailable(); return _mlHealth; } /** * Separate audio into stems * Uses Demucs if available, otherwise simulates */ export async function separateStems( trackId: number, inputFilePath: string, outputDir: string ): Promise { const mlAvailable = await isMLAvailable(); if (mlAvailable && _mlHealth?.demucs) { try { console.log(`[AudioProcessing] Separating stems with Demucs for track ${trackId}`); const result = await mlBridge.separateStems(inputFilePath, outputDir); if (result.success && result.stems) { // Convert ML result to our format const stems = result.stems.map(stem => ({ type: stem.type as "vocals" | "drums" | "bass" | "other", fileKey: `tracks/${trackId}/stems/${stem.type}-${nanoid(8)}.wav`, fileUrl: stem.path, // Local path for now, would upload to S3 in production duration: stem.duration || 0, })); return { stems, usingRealML: true }; } console.warn(`[AudioProcessing] Demucs failed, falling back to simulation:`, result.error); } catch (error) { console.warn(`[AudioProcessing] Demucs error, falling back to simulation:`, error); } } // Fallback to simulation return simulateStemSeparation(trackId, inputFilePath, 180); } /** * Generate audio fingerprint * Uses Chromaprint if available, otherwise simulates */ export async function generateFingerprint( stemId: number, audioPath: string, stemType: string ): Promise { const mlAvailable = await isMLAvailable(); if (mlAvailable && _mlHealth?.chromaprint) { try { console.log(`[AudioProcessing] Generating fingerprint with Chromaprint for stem ${stemId}`); const result = await mlBridge.generateFingerprint(audioPath); if (result.success && result.fingerprint) { return { algorithm: result.algorithm || "chromaprint", fingerprintData: result.fingerprint, version: result.version || "1.5", usingRealML: true, }; } console.warn(`[AudioProcessing] Chromaprint failed, falling back to simulation:`, result.error); } catch (error) { console.warn(`[AudioProcessing] Chromaprint error, falling back to simulation:`, error); } } // Fallback to simulation return simulateFingerprinting(stemId, stemType); } /** * Generate audio embedding * Uses CLAP if available, otherwise simulates */ export async function generateEmbedding( stemId: number, audioPath: string, stemType: string ): Promise { const mlAvailable = await isMLAvailable(); if (mlAvailable && _mlHealth?.clap) { try { console.log(`[AudioProcessing] Generating embedding with CLAP for stem ${stemId}`); const result = await mlBridge.generateEmbedding(audioPath); if (result.success && result.embedding) { return { model: result.model || "laion-clap", vector: result.embedding, dimension: result.dimension || result.embedding.length, usingRealML: true, }; } console.warn(`[AudioProcessing] CLAP failed, falling back to simulation:`, result.error); } catch (error) { console.warn(`[AudioProcessing] CLAP error, falling back to simulation:`, error); } } // Fallback to simulation return simulateEmbeddingGeneration(stemId, stemType); } // ============================================================================ // Simulation functions (used when ML not available or as fallback) // ============================================================================ /** * Simulate stem separation using Demucs */ export async function simulateStemSeparation( trackId: number, originalFileUrl: string, duration: number ): Promise { // Simulate processing delay await new Promise(resolve => setTimeout(resolve, 1000)); const stemTypes: Array<"vocals" | "drums" | "bass" | "other"> = ["vocals", "drums", "bass", "other"]; const stems = stemTypes.map(type => { const fileKey = `tracks/${trackId}/stems/${type}-${nanoid(8)}.wav`; // In simulation mode, these are placeholder URLs const fileUrl = `https://storage.example.com/${fileKey}`; return { type, fileKey, fileUrl, duration, }; }); return { stems, usingRealML: false }; } /** * Simulate audio fingerprinting using Chromaprint */ export async function simulateFingerprinting( stemId: number, stemType: string ): Promise { // Simulate processing delay await new Promise(resolve => setTimeout(resolve, 200)); // Generate a mock fingerprint hash const mockHash = Array.from({ length: 40 }, () => Math.floor(Math.random() * 16).toString(16) ).join(''); return { algorithm: "chromaprint", fingerprintData: mockHash, version: "1.5.1-simulated", usingRealML: false, }; } /** * Simulate embedding generation using CLAP */ export async function simulateEmbeddingGeneration( stemId: number, stemType: string ): Promise { // Simulate processing delay await new Promise(resolve => setTimeout(resolve, 300)); // Generate a mock 512-dimensional embedding vector const dimension = 512; const vector = Array.from({ length: dimension }, () => Math.random() * 2 - 1 // Random values between -1 and 1 ); return { model: "clap-htsat-fused-simulated", vector, dimension, usingRealML: false, }; } /** * Search FAISS index for similar embeddings */ export async function searchFAISS( embedding: number[], k: number = 10, threshold: number = 0.5 ): Promise> { try { const result = await mlBridge.callPythonProcessor("faiss_search", { embedding, k, threshold, }); if (result.matches) { return result.matches; } return []; } catch (error) { console.warn("[AudioProcessing] FAISS search failed:", error); return []; } } /** * Add embeddings to FAISS index */ export async function addToFAISS( embeddings: Array<{ trackId: string | number; stemId?: number; stemType?: string; embedding: number[]; title: string; artist: string; }> ): Promise<{ success: boolean; added: number; total: number }> { try { const result = await mlBridge.callPythonProcessor("faiss_add", { embeddings, }); return { success: result.success || false, added: result.added || 0, total: result.total || 0, }; } catch (error) { console.warn("[AudioProcessing] FAISS add failed:", error); return { success: false, added: 0, total: 0 }; } } /** * Get FAISS index statistics */ export async function getFAISSStats(): Promise<{ exists: boolean; total: number; dimension: number; uniqueTracks?: number; }> { try { const result = await mlBridge.callPythonProcessor("faiss_stats", {}); return { exists: result.exists || false, total: result.total || 0, dimension: result.dimension || 512, uniqueTracks: result.uniqueTracks, }; } catch (error) { console.warn("[AudioProcessing] FAISS stats failed:", error); return { exists: false, total: 0, dimension: 512 }; } } /** * Perform attribution matching * Uses FAISS for real embedding-based matching when available, * falls back to database matching or simulation */ export async function performAttribution( aiTrackId: number, aiStems: Array<{ id: number; stemType: string; embedding?: number[] }>, trainingTracks: Array<{ id: number; title: string; artist: string | null; embedding?: number[] }> ): Promise { const hasRealEmbeddings = aiStems.some(s => s.embedding && s.embedding.length > 0); if (hasRealEmbeddings) { // Check if FAISS index exists and has data const faissStats = await getFAISSStats(); if (faissStats.exists && faissStats.total > 0) { console.log(`[AudioProcessing] Using FAISS index with ${faissStats.total} embeddings`); return performFAISSAttribution(aiTrackId, aiStems); } // Fall back to in-memory cosine similarity if we have training embeddings if (trainingTracks.some(t => t.embedding && t.embedding.length > 0)) { console.log("[AudioProcessing] Using in-memory cosine similarity"); return performCosineAttribution(aiTrackId, aiStems, trainingTracks); } } // No real embeddings available, use simulation console.log("[AudioProcessing] Using simulated attribution (no real embeddings)"); return simulateAttribution(aiTrackId, aiStems, trainingTracks); } /** * Perform attribution using FAISS vector search */ async function performFAISSAttribution( aiTrackId: number, aiStems: Array<{ id: number; stemType: string; embedding?: number[] }> ): Promise { const matches: AttributionMatch[] = []; for (const stem of aiStems) { if (!stem.embedding || stem.embedding.length === 0) continue; // Search FAISS for similar embeddings (z-score threshold of 1 = 1 std above baseline) const faissMatches = await searchFAISS(stem.embedding, 5, 1.0); for (let i = 0; i < faissMatches.length; i++) { const match = faissMatches[i]; if (!match) continue; matches.push({ trainingTrackId: typeof match.trackId === "string" ? parseInt(match.trackId) || 0 : match.trackId, trainingStemId: match.stemId ? (typeof match.stemId === "string" ? parseInt(match.stemId) : match.stemId) : null, method: "embedding", score: match.score, confidence: match.score * 0.95, // Slightly lower confidence than raw score metadata: { stemType: stem.stemType, matchedStemType: match.stemType, matchRank: i + 1, algorithmVersion: "1.0.0", method: "faiss", matchedTitle: match.title, matchedArtist: match.artist, }, }); } } // Sort by score descending and deduplicate return deduplicateMatches(matches.sort((a, b) => b.score - a.score)); } /** * Perform attribution using in-memory cosine similarity */ function performCosineAttribution( aiTrackId: number, aiStems: Array<{ id: number; stemType: string; embedding?: number[] }>, trainingTracks: Array<{ id: number; title: string; artist: string | null; embedding?: number[] }> ): AttributionMatch[] { const matches: AttributionMatch[] = []; for (const stem of aiStems) { if (!stem.embedding || stem.embedding.length === 0) continue; // Calculate cosine similarity with all training embeddings const similarities: Array<{ trackId: number; score: number; title: string; artist: string | null }> = []; for (const training of trainingTracks) { if (!training.embedding || training.embedding.length === 0) continue; const score = cosineSimilarity(stem.embedding, training.embedding); similarities.push({ trackId: training.id, score, title: training.title, artist: training.artist, }); } // Sort by similarity and take top 5 similarities.sort((a, b) => b.score - a.score); const topMatches = similarities.slice(0, 5).filter(s => s.score > 0.5); for (let i = 0; i < topMatches.length; i++) { const match = topMatches[i]; if (!match) continue; matches.push({ trainingTrackId: match.trackId, trainingStemId: null, method: "embedding", score: match.score, confidence: match.score * 0.9, metadata: { stemType: stem.stemType, matchRank: i + 1, algorithmVersion: "1.0.0", method: "cosine", matchedTitle: match.title, matchedArtist: match.artist, }, }); } } return deduplicateMatches(matches.sort((a, b) => b.score - a.score)); } /** * Calculate cosine similarity between two vectors */ function cosineSimilarity(a: number[], b: number[]): number { if (a.length !== b.length) return 0; let dotProduct = 0; let normA = 0; let normB = 0; for (let i = 0; i < a.length; i++) { dotProduct += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; } normA = Math.sqrt(normA); normB = Math.sqrt(normB); if (normA === 0 || normB === 0) return 0; return dotProduct / (normA * normB); } /** * Deduplicate matches keeping highest score per training track */ function deduplicateMatches(matches: AttributionMatch[]): AttributionMatch[] { const seen = new Map(); for (const match of matches) { const existing = seen.get(match.trainingTrackId); if (!existing || match.score > existing.score) { seen.set(match.trainingTrackId, match); } } return Array.from(seen.values()).sort((a, b) => b.score - a.score); } /** * Simulate attribution matching */ export async function simulateAttribution( aiTrackId: number, aiStems: Array<{ id: number; stemType: string }>, trainingTracks: Array<{ id: number; title: string; artist: string | null }> ): Promise { // Simulate processing delay await new Promise(resolve => setTimeout(resolve, 1500)); const matches: AttributionMatch[] = []; // For each AI stem, find top 3 matching training tracks for (const aiStem of aiStems) { // Randomly select 2-3 training tracks as matches const numMatches = Math.floor(Math.random() * 2) + 2; // 2 or 3 matches const selectedTracks = trainingTracks .sort(() => Math.random() - 0.5) .slice(0, numMatches); for (let i = 0; i < selectedTracks.length; i++) { const track = selectedTracks[i]; if (!track) continue; // Generate realistic scores (higher for first match, lower for subsequent) const baseScore = 0.95 - (i * 0.15) - (Math.random() * 0.1); const score = Math.max(0.5, Math.min(1.0, baseScore)); const confidence = score * (0.85 + Math.random() * 0.15); matches.push({ trainingTrackId: track.id, trainingStemId: null, method: i === 0 ? "fingerprint" : "embedding", score, confidence, metadata: { stemType: aiStem.stemType, matchRank: i + 1, algorithmVersion: "1.0.0", simulated: true, }, }); } } // Sort by score descending return matches.sort((a, b) => b.score - a.score); } /** * Generate mock training data for demonstration */ export function generateMockTrainingTracks() { const artists = [ "The Beatles", "Led Zeppelin", "Pink Floyd", "Queen", "The Rolling Stones", "Nirvana", "Radiohead", "Daft Punk", "Kendrick Lamar", "Taylor Swift", "Billie Eilish", "The Weeknd", "Drake", "Adele", "Ed Sheeran" ]; const songTitles = [ "Electric Dreams", "Midnight Echoes", "Neon Lights", "Digital Soul", "Synthetic Love", "Binary Beats", "Virtual Reality", "Cyber Groove", "Quantum Melody", "Neural Network", "Algorithm Blues", "Data Stream", "Cloud Nine", "Pixel Perfect", "Code Symphony" ]; return artists.map((artist, i) => ({ title: songTitles[i] || `Track ${i + 1}`, artist, trackType: "training_reference" as const, fileKey: `training/${nanoid(16)}.mp3`, fileUrl: `https://storage.example.com/training/${nanoid(16)}.mp3`, fileSize: Math.floor(Math.random() * 5000000) + 2000000, // 2-7 MB mimeType: "audio/mpeg", duration: Math.floor(Math.random() * 180) + 120, // 2-5 minutes status: "completed" as const, })); }