Spaces:

emresar
/

aimusic-attribution

Running on CPU Upgrade

File size: 18,124 Bytes

6678fa1

/**
 * Audio Processing Service
 * 
 * Handles stem separation, fingerprinting, and attribution.
 * Uses real ML models (Demucs, Chromaprint, CLAP) when available,
 * falls back to simulation when ML dependencies aren't installed.
 */

import { nanoid } from "nanoid";
import path from "path";
import fs from "fs/promises";
import * as mlBridge from "./mlBridge";

// Cache ML availability check
let _mlAvailable: boolean | null = null;
let _mlHealth: mlBridge.HealthCheckResult | null = null;

export interface StemSeparationResult {
  stems: Array<{
    type: "vocals" | "drums" | "bass" | "other";
    fileKey: string;
    fileUrl: string;
    duration: number;
  }>;
  usingRealML?: boolean;
}

export interface FingerprintResult {
  algorithm: string;
  fingerprintData: string;
  version: string;
  usingRealML?: boolean;
}

export interface EmbeddingResult {
  model: string;
  vector: number[];
  dimension: number;
  usingRealML?: boolean;
}

export interface AttributionMatch {
  trainingTrackId: number;
  trainingStemId: number | null;
  method: string;
  score: number;
  confidence: number;
  metadata?: Record<string, unknown>;
}

/**
 * Check if ML processing is available
 */
export async function isMLAvailable(): Promise<boolean> {
  if (_mlAvailable !== null) {
    return _mlAvailable;
  }
  
  try {
    const pythonAvailable = await mlBridge.isPythonAvailable();
    const processorAvailable = await mlBridge.isProcessorAvailable();
    
    if (!pythonAvailable || !processorAvailable) {
      _mlAvailable = false;
      return false;
    }
    
    // Check actual ML health
    _mlHealth = await mlBridge.checkMLHealth();
    _mlAvailable = _mlHealth.success;
    
    if (_mlAvailable) {
      console.log("[AudioProcessing] ML models available:", {
        demucs: _mlHealth.demucs,
        chromaprint: _mlHealth.chromaprint,
        clap: _mlHealth.clap,
      });
    } else {
      console.log("[AudioProcessing] ML models not fully available:", _mlHealth.errors);
    }
    
    return _mlAvailable;
  } catch (error) {
    console.log("[AudioProcessing] ML check failed, using simulation:", error);
    _mlAvailable = false;
    return false;
  }
}

/**
 * Get ML health status
 */
export async function getMLHealth(): Promise<mlBridge.HealthCheckResult | null> {
  await isMLAvailable();
  return _mlHealth;
}

/**
 * Separate audio into stems
 * Uses Demucs if available, otherwise simulates
 */
export async function separateStems(
  trackId: number,
  inputFilePath: string,
  outputDir: string
): Promise<StemSeparationResult> {
  const mlAvailable = await isMLAvailable();
  
  if (mlAvailable && _mlHealth?.demucs) {
    try {
      console.log(`[AudioProcessing] Separating stems with Demucs for track ${trackId}`);
      
      const result = await mlBridge.separateStems(inputFilePath, outputDir);
      
      if (result.success && result.stems) {
        // Convert ML result to our format
        const stems = result.stems.map(stem => ({
          type: stem.type as "vocals" | "drums" | "bass" | "other",
          fileKey: `tracks/${trackId}/stems/${stem.type}-${nanoid(8)}.wav`,
          fileUrl: stem.path, // Local path for now, would upload to S3 in production
          duration: stem.duration || 0,
        }));
        
        return { stems, usingRealML: true };
      }
      
      console.warn(`[AudioProcessing] Demucs failed, falling back to simulation:`, result.error);
    } catch (error) {
      console.warn(`[AudioProcessing] Demucs error, falling back to simulation:`, error);
    }
  }
  
  // Fallback to simulation
  return simulateStemSeparation(trackId, inputFilePath, 180);
}

/**
 * Generate audio fingerprint
 * Uses Chromaprint if available, otherwise simulates
 */
export async function generateFingerprint(
  stemId: number,
  audioPath: string,
  stemType: string
): Promise<FingerprintResult> {
  const mlAvailable = await isMLAvailable();
  
  if (mlAvailable && _mlHealth?.chromaprint) {
    try {
      console.log(`[AudioProcessing] Generating fingerprint with Chromaprint for stem ${stemId}`);
      
      const result = await mlBridge.generateFingerprint(audioPath);
      
      if (result.success && result.fingerprint) {
        return {
          algorithm: result.algorithm || "chromaprint",
          fingerprintData: result.fingerprint,
          version: result.version || "1.5",
          usingRealML: true,
        };
      }
      
      console.warn(`[AudioProcessing] Chromaprint failed, falling back to simulation:`, result.error);
    } catch (error) {
      console.warn(`[AudioProcessing] Chromaprint error, falling back to simulation:`, error);
    }
  }
  
  // Fallback to simulation
  return simulateFingerprinting(stemId, stemType);
}

/**
 * Generate audio embedding
 * Uses CLAP if available, otherwise simulates
 */
export async function generateEmbedding(
  stemId: number,
  audioPath: string,
  stemType: string
): Promise<EmbeddingResult> {
  const mlAvailable = await isMLAvailable();
  
  if (mlAvailable && _mlHealth?.clap) {
    try {
      console.log(`[AudioProcessing] Generating embedding with CLAP for stem ${stemId}`);
      
      const result = await mlBridge.generateEmbedding(audioPath);
      
      if (result.success && result.embedding) {
        return {
          model: result.model || "laion-clap",
          vector: result.embedding,
          dimension: result.dimension || result.embedding.length,
          usingRealML: true,
        };
      }
      
      console.warn(`[AudioProcessing] CLAP failed, falling back to simulation:`, result.error);
    } catch (error) {
      console.warn(`[AudioProcessing] CLAP error, falling back to simulation:`, error);
    }
  }
  
  // Fallback to simulation
  return simulateEmbeddingGeneration(stemId, stemType);
}

// ============================================================================
// Simulation functions (used when ML not available or as fallback)
// ============================================================================

/**
 * Simulate stem separation using Demucs
 */
export async function simulateStemSeparation(
  trackId: number,
  originalFileUrl: string,
  duration: number
): Promise<StemSeparationResult> {
  // Simulate processing delay
  await new Promise(resolve => setTimeout(resolve, 1000));

  const stemTypes: Array<"vocals" | "drums" | "bass" | "other"> = ["vocals", "drums", "bass", "other"];
  
  const stems = stemTypes.map(type => {
    const fileKey = `tracks/${trackId}/stems/${type}-${nanoid(8)}.wav`;
    // In simulation mode, these are placeholder URLs
    const fileUrl = `https://storage.example.com/${fileKey}`;
    
    return {
      type,
      fileKey,
      fileUrl,
      duration,
    };
  });

  return { stems, usingRealML: false };
}

/**
 * Simulate audio fingerprinting using Chromaprint
 */
export async function simulateFingerprinting(
  stemId: number,
  stemType: string
): Promise<FingerprintResult> {
  // Simulate processing delay
  await new Promise(resolve => setTimeout(resolve, 200));

  // Generate a mock fingerprint hash
  const mockHash = Array.from({ length: 40 }, () => 
    Math.floor(Math.random() * 16).toString(16)
  ).join('');

  return {
    algorithm: "chromaprint",
    fingerprintData: mockHash,
    version: "1.5.1-simulated",
    usingRealML: false,
  };
}

/**
 * Simulate embedding generation using CLAP
 */
export async function simulateEmbeddingGeneration(
  stemId: number,
  stemType: string
): Promise<EmbeddingResult> {
  // Simulate processing delay
  await new Promise(resolve => setTimeout(resolve, 300));

  // Generate a mock 512-dimensional embedding vector
  const dimension = 512;
  const vector = Array.from({ length: dimension }, () => 
    Math.random() * 2 - 1 // Random values between -1 and 1
  );

  return {
    model: "clap-htsat-fused-simulated",
    vector,
    dimension,
    usingRealML: false,
  };
}

/**
 * Search FAISS index for similar embeddings
 */
export async function searchFAISS(
  embedding: number[],
  k: number = 10,
  threshold: number = 0.5
): Promise<Array<{
  score: number;
  trackId: string;
  stemId?: string;
  stemType?: string;
  title: string;
  artist: string;
}>> {
  try {
    const result = await mlBridge.callPythonProcessor("faiss_search", {
      embedding,
      k,
      threshold,
    });
    
    if (result.matches) {
      return result.matches;
    }
    
    return [];
  } catch (error) {
    console.warn("[AudioProcessing] FAISS search failed:", error);
    return [];
  }
}

/**
 * Add embeddings to FAISS index
 */
export async function addToFAISS(
  embeddings: Array<{
    trackId: string | number;
    stemId?: number;
    stemType?: string;
    embedding: number[];
    title: string;
    artist: string;
  }>
): Promise<{ success: boolean; added: number; total: number }> {
  try {
    const result = await mlBridge.callPythonProcessor("faiss_add", {
      embeddings,
    });
    
    return {
      success: result.success || false,
      added: result.added || 0,
      total: result.total || 0,
    };
  } catch (error) {
    console.warn("[AudioProcessing] FAISS add failed:", error);
    return { success: false, added: 0, total: 0 };
  }
}

/**
 * Get FAISS index statistics
 */
export async function getFAISSStats(): Promise<{
  exists: boolean;
  total: number;
  dimension: number;
  uniqueTracks?: number;
}> {
  try {
    const result = await mlBridge.callPythonProcessor("faiss_stats", {});
    return {
      exists: result.exists || false,
      total: result.total || 0,
      dimension: result.dimension || 512,
      uniqueTracks: result.uniqueTracks,
    };
  } catch (error) {
    console.warn("[AudioProcessing] FAISS stats failed:", error);
    return { exists: false, total: 0, dimension: 512 };
  }
}

/**
 * Perform attribution matching
 * Uses FAISS for real embedding-based matching when available,
 * falls back to database matching or simulation
 */
export async function performAttribution(
  aiTrackId: number,
  aiStems: Array<{ id: number; stemType: string; embedding?: number[] }>,
  trainingTracks: Array<{ id: number; title: string; artist: string | null; embedding?: number[] }>
): Promise<AttributionMatch[]> {
  const hasRealEmbeddings = aiStems.some(s => s.embedding && s.embedding.length > 0);
  
  if (hasRealEmbeddings) {
    // Check if FAISS index exists and has data
    const faissStats = await getFAISSStats();
    
    if (faissStats.exists && faissStats.total > 0) {
      console.log(`[AudioProcessing] Using FAISS index with ${faissStats.total} embeddings`);
      return performFAISSAttribution(aiTrackId, aiStems);
    }
    
    // Fall back to in-memory cosine similarity if we have training embeddings
    if (trainingTracks.some(t => t.embedding && t.embedding.length > 0)) {
      console.log("[AudioProcessing] Using in-memory cosine similarity");
      return performCosineAttribution(aiTrackId, aiStems, trainingTracks);
    }
  }
  
  // No real embeddings available, use simulation
  console.log("[AudioProcessing] Using simulated attribution (no real embeddings)");
  return simulateAttribution(aiTrackId, aiStems, trainingTracks);
}

/**
 * Perform attribution using FAISS vector search
 */
async function performFAISSAttribution(
  aiTrackId: number,
  aiStems: Array<{ id: number; stemType: string; embedding?: number[] }>
): Promise<AttributionMatch[]> {
  const matches: AttributionMatch[] = [];
  
  for (const stem of aiStems) {
    if (!stem.embedding || stem.embedding.length === 0) continue;
    
    // Search FAISS for similar embeddings (z-score threshold of 1 = 1 std above baseline)
    const faissMatches = await searchFAISS(stem.embedding, 5, 1.0);
    
    for (let i = 0; i < faissMatches.length; i++) {
      const match = faissMatches[i];
      if (!match) continue;
      
      matches.push({
        trainingTrackId: typeof match.trackId === "string" ? parseInt(match.trackId) || 0 : match.trackId,
        trainingStemId: match.stemId ? (typeof match.stemId === "string" ? parseInt(match.stemId) : match.stemId) : null,
        method: "embedding",
        score: match.score,
        confidence: match.score * 0.95, // Slightly lower confidence than raw score
        metadata: {
          stemType: stem.stemType,
          matchedStemType: match.stemType,
          matchRank: i + 1,
          algorithmVersion: "1.0.0",
          method: "faiss",
          matchedTitle: match.title,
          matchedArtist: match.artist,
        },
      });
    }
  }
  
  // Sort by score descending and deduplicate
  return deduplicateMatches(matches.sort((a, b) => b.score - a.score));
}

/**
 * Perform attribution using in-memory cosine similarity
 */
function performCosineAttribution(
  aiTrackId: number,
  aiStems: Array<{ id: number; stemType: string; embedding?: number[] }>,
  trainingTracks: Array<{ id: number; title: string; artist: string | null; embedding?: number[] }>
): AttributionMatch[] {
  const matches: AttributionMatch[] = [];
  
  for (const stem of aiStems) {
    if (!stem.embedding || stem.embedding.length === 0) continue;
    
    // Calculate cosine similarity with all training embeddings
    const similarities: Array<{ trackId: number; score: number; title: string; artist: string | null }> = [];
    
    for (const training of trainingTracks) {
      if (!training.embedding || training.embedding.length === 0) continue;
      
      const score = cosineSimilarity(stem.embedding, training.embedding);
      similarities.push({
        trackId: training.id,
        score,
        title: training.title,
        artist: training.artist,
      });
    }
    
    // Sort by similarity and take top 5
    similarities.sort((a, b) => b.score - a.score);
    const topMatches = similarities.slice(0, 5).filter(s => s.score > 0.5);
    
    for (let i = 0; i < topMatches.length; i++) {
      const match = topMatches[i];
      if (!match) continue;
      
      matches.push({
        trainingTrackId: match.trackId,
        trainingStemId: null,
        method: "embedding",
        score: match.score,
        confidence: match.score * 0.9,
        metadata: {
          stemType: stem.stemType,
          matchRank: i + 1,
          algorithmVersion: "1.0.0",
          method: "cosine",
          matchedTitle: match.title,
          matchedArtist: match.artist,
        },
      });
    }
  }
  
  return deduplicateMatches(matches.sort((a, b) => b.score - a.score));
}

/**
 * Calculate cosine similarity between two vectors
 */
function cosineSimilarity(a: number[], b: number[]): number {
  if (a.length !== b.length) return 0;
  
  let dotProduct = 0;
  let normA = 0;
  let normB = 0;
  
  for (let i = 0; i < a.length; i++) {
    dotProduct += a[i] * b[i];
    normA += a[i] * a[i];
    normB += b[i] * b[i];
  }
  
  normA = Math.sqrt(normA);
  normB = Math.sqrt(normB);
  
  if (normA === 0 || normB === 0) return 0;
  
  return dotProduct / (normA * normB);
}

/**
 * Deduplicate matches keeping highest score per training track
 */
function deduplicateMatches(matches: AttributionMatch[]): AttributionMatch[] {
  const seen = new Map<number, AttributionMatch>();
  
  for (const match of matches) {
    const existing = seen.get(match.trainingTrackId);
    if (!existing || match.score > existing.score) {
      seen.set(match.trainingTrackId, match);
    }
  }
  
  return Array.from(seen.values()).sort((a, b) => b.score - a.score);
}

/**
 * Simulate attribution matching
 */
export async function simulateAttribution(
  aiTrackId: number,
  aiStems: Array<{ id: number; stemType: string }>,
  trainingTracks: Array<{ id: number; title: string; artist: string | null }>
): Promise<AttributionMatch[]> {
  // Simulate processing delay
  await new Promise(resolve => setTimeout(resolve, 1500));

  const matches: AttributionMatch[] = [];

  // For each AI stem, find top 3 matching training tracks
  for (const aiStem of aiStems) {
    // Randomly select 2-3 training tracks as matches
    const numMatches = Math.floor(Math.random() * 2) + 2; // 2 or 3 matches
    const selectedTracks = trainingTracks
      .sort(() => Math.random() - 0.5)
      .slice(0, numMatches);

    for (let i = 0; i < selectedTracks.length; i++) {
      const track = selectedTracks[i];
      if (!track) continue;

      // Generate realistic scores (higher for first match, lower for subsequent)
      const baseScore = 0.95 - (i * 0.15) - (Math.random() * 0.1);
      const score = Math.max(0.5, Math.min(1.0, baseScore));
      const confidence = score * (0.85 + Math.random() * 0.15);

      matches.push({
        trainingTrackId: track.id,
        trainingStemId: null,
        method: i === 0 ? "fingerprint" : "embedding",
        score,
        confidence,
        metadata: {
          stemType: aiStem.stemType,
          matchRank: i + 1,
          algorithmVersion: "1.0.0",
          simulated: true,
        },
      });
    }
  }

  // Sort by score descending
  return matches.sort((a, b) => b.score - a.score);
}

/**
 * Generate mock training data for demonstration
 */
export function generateMockTrainingTracks() {
  const artists = [
    "The Beatles", "Led Zeppelin", "Pink Floyd", "Queen", "The Rolling Stones",
    "Nirvana", "Radiohead", "Daft Punk", "Kendrick Lamar", "Taylor Swift",
    "Billie Eilish", "The Weeknd", "Drake", "Adele", "Ed Sheeran"
  ];

  const songTitles = [
    "Electric Dreams", "Midnight Echoes", "Neon Lights", "Digital Soul",
    "Synthetic Love", "Binary Beats", "Virtual Reality", "Cyber Groove",
    "Quantum Melody", "Neural Network", "Algorithm Blues", "Data Stream",
    "Cloud Nine", "Pixel Perfect", "Code Symphony"
  ];

  return artists.map((artist, i) => ({
    title: songTitles[i] || `Track ${i + 1}`,
    artist,
    trackType: "training_reference" as const,
    fileKey: `training/${nanoid(16)}.mp3`,
    fileUrl: `https://storage.example.com/training/${nanoid(16)}.mp3`,
    fileSize: Math.floor(Math.random() * 5000000) + 2000000, // 2-7 MB
    mimeType: "audio/mpeg",
    duration: Math.floor(Math.random() * 180) + 120, // 2-5 minutes
    status: "completed" as const,
  }));
}