Spaces:

tfrere
/

fontmap

Running

File size: 8,658 Bytes

b3f0c1a

/**
 * UMAP Utils - Fonctions utilitaires pour le calcul UMAP
 * 
 * Contient les fonctions pures utilisées par le Web Worker
 */

/**
 * Charge les embeddings depuis le fichier JSON
 */
export async function loadEmbeddings() {
  console.log('🔄 Chargement des embeddings CLIP...');

  try {
    const response = await fetch('/data/embeddings.json');
    if (!response.ok) {
      throw new Error(`HTTP Error: ${response.status}`);
    }

    const data = await response.json();

    console.log(`✅ ${data.fonts.length} polices chargées`);

    return data;
  } catch (error) {
    console.error('❌ Erreur lors du chargement des embeddings:', error);
    throw error;
  }
}

/**
 * Extrait le préfixe pour la fusion des familles
 */
export function extractFusionPrefix(fontId, fontData) {
  const parts = fontId.split('-');
  if (parts.length <= 1) {
    return fontId;
  }

  // Vérifier les subsets non standards
  if (fontData && fontData.subsets && Array.isArray(fontData.subsets)) {
    const commonSubsets = ['latin', 'latin-ext', 'cyrillic', 'cyrillic-ext', 'greek', 'greek-ext'];
    for (const subset of fontData.subsets) {
      if (!commonSubsets.includes(subset) && fontId.includes(subset)) {
        const baseName = fontId.replace(`-${subset}`, '').replace(subset, '');
        if (baseName && baseName !== fontId) {
          return baseName;
        }
      }
    }
  }

  // Cas spéciaux
  const specialCases = {
    'baloo': ['baloo-2', 'baloo-bhai-2', 'baloo-bhaijaan-2', 'baloo-bhaina-2', 'baloo-chettan-2', 'baloo-da-2', 'baloo-paaji-2', 'baloo-tamma-2', 'baloo-tammudu-2', 'baloo-thambi-2'],
    'ibm-plex': ['ibm-plex'],
    'playwrite': ['playwrite']
  };

  for (const [familyPrefix, patterns] of Object.entries(specialCases)) {
    for (const pattern of patterns) {
      if (fontId.startsWith(pattern)) {
        return familyPrefix;
      }
    }
  }

  // Noto fonts
  if (fontId.startsWith('noto-serif-')) return 'noto-serif';
  if (fontId.startsWith('noto-')) return 'noto';

  // Second word special
  const secondWord = parts[1];
  if (secondWord === 'sans' || secondWord === 'serif' || secondWord === 'plex') {
    return parts.slice(0, 2).join('-');
  }

  return parts[0];
}

/**
 * Fusionne les familles de polices
 */
export function mergeFontFamilies(fontDataList, embeddingMatrices, enableFusion = true) {
  if (!enableFusion) {
    return { fontDataList, embeddingMatrices };
  }

  const prefixGroups = {};
  const prefixEmbeddingGroups = {};

  // Grouper par préfixe
  for (let i = 0; i < fontDataList.length; i++) {
    const font = fontDataList[i];
    const prefix = extractFusionPrefix(font.id, font);

    if (!prefixGroups[prefix]) {
      prefixGroups[prefix] = [];
      prefixEmbeddingGroups[prefix] = [];
    }

    prefixGroups[prefix].push(font);
    prefixEmbeddingGroups[prefix].push(embeddingMatrices[i]);
  }

  const mergedFonts = [];
  const mergedEmbeddings = [];

  // Créer les polices fusionnées
  for (const [prefix, fonts] of Object.entries(prefixGroups)) {
    if (fonts.length > 1) {
      let representativeFont = fonts[0];

      // Choix du représentant pour certaines familles
      const representatives = {
        'noto': 'noto-sans-arabic',
        'noto-serif': 'noto-serif-latin',
        'ibm-plex': 'ibm-plex-sans',
        'baloo': 'baloo-2'
      };

      if (representatives[prefix]) {
        const found = fonts.find(f => f.id === representatives[prefix]);
        if (found) representativeFont = found;
      }

      const representativeIndex = fonts.findIndex(f => f.id === representativeFont.id);
      const representativeEmbedding = prefixEmbeddingGroups[prefix][representativeIndex];

      const mergedFont = {
        ...representativeFont,
        id: prefix,
        name: prefix.replace(/-/g, ' ').replace(/\b\w/g, l => l.toUpperCase()),
        imageName: representativeFont.id
      };

      mergedFonts.push(mergedFont);
      mergedEmbeddings.push(representativeEmbedding);
    } else {
      mergedFonts.push({ ...fonts[0], imageName: fonts[0].id });
      mergedEmbeddings.push(prefixEmbeddingGroups[prefix][0]);
    }
  }

  return {
    fontDataList: mergedFonts,
    embeddingMatrices: mergedEmbeddings
  };
}

/**
 * Normalise les données (standardisation Z-score)
 */
export function normalizeData(data) {
  const rows = data.length;
  const cols = data[0].length;

  const means = new Array(cols).fill(0);
  const stds = new Array(cols).fill(0);

  for (let i = 0; i < rows; i++) {
    for (let j = 0; j < cols; j++) {
      means[j] += data[i][j];
    }
  }
  for (let j = 0; j < cols; j++) {
    means[j] /= rows;
  }

  for (let i = 0; i < rows; i++) {
    for (let j = 0; j < cols; j++) {
      const diff = data[i][j] - means[j];
      stds[j] += diff * diff;
    }
  }
  for (let j = 0; j < cols; j++) {
    stds[j] = Math.sqrt(stds[j] / rows);
    if (stds[j] === 0) stds[j] = 1;
  }

  const normalized = data.map(row =>
    row.map((val, j) => (val - means[j]) / stds[j])
  );

  return normalized;
}

/**
 * PCA via covariance eigen-decomposition (browser-friendly, no ml-matrix dependency).
 * Reduces nDims → nComponents, concentrating variance for better UMAP quality.
 */
export function applyPCA(data, nComponents = 50) {
  const rows = data.length;
  const cols = data[0].length;
  const target = Math.min(nComponents, cols, rows);

  // Center columns
  const means = new Array(cols).fill(0);
  for (let i = 0; i < rows; i++) {
    for (let j = 0; j < cols; j++) means[j] += data[i][j];
  }
  for (let j = 0; j < cols; j++) means[j] /= rows;

  const centered = data.map(row => row.map((v, j) => v - means[j]));

  // For browser perf: use SVD-like approach via X^T * X when cols > rows
  // When rows < cols (typical: ~800 fonts, 512 dims), compute rows×rows gram matrix
  if (rows < cols) {
    // Gram matrix: X * X^T (rows × rows)
    const gram = Array.from({ length: rows }, () => new Float64Array(rows));
    for (let i = 0; i < rows; i++) {
      for (let j = i; j < rows; j++) {
        let dot = 0;
        for (let k = 0; k < cols; k++) dot += centered[i][k] * centered[j][k];
        gram[i][j] = dot / (rows - 1);
        gram[j][i] = gram[i][j];
      }
    }

    // Power iteration for top eigenvectors of gram matrix
    const eigenvectors = [];
    const eigenvalues = [];
    const gramCopy = gram.map(row => Float64Array.from(row));

    for (let comp = 0; comp < target; comp++) {
      let vec = new Float64Array(rows);
      for (let i = 0; i < rows; i++) vec[i] = Math.random() - 0.5;

      for (let iter = 0; iter < 100; iter++) {
        const newVec = new Float64Array(rows);
        for (let i = 0; i < rows; i++) {
          let sum = 0;
          for (let j = 0; j < rows; j++) sum += gramCopy[i][j] * vec[j];
          newVec[i] = sum;
        }

        let norm = 0;
        for (let i = 0; i < rows; i++) norm += newVec[i] * newVec[i];
        norm = Math.sqrt(norm);
        if (norm === 0) break;
        for (let i = 0; i < rows; i++) newVec[i] /= norm;

        let diff = 0;
        for (let i = 0; i < rows; i++) diff += (newVec[i] - vec[i]) ** 2;
        vec = newVec;
        if (diff < 1e-10) break;
      }

      let eigenvalue = 0;
      const Av = new Float64Array(rows);
      for (let i = 0; i < rows; i++) {
        let sum = 0;
        for (let j = 0; j < rows; j++) sum += gramCopy[i][j] * vec[j];
        Av[i] = sum;
      }
      for (let i = 0; i < rows; i++) eigenvalue += vec[i] * Av[i];

      eigenvalues.push(eigenvalue);
      eigenvectors.push(vec);

      // Deflate
      for (let i = 0; i < rows; i++) {
        for (let j = 0; j < rows; j++) {
          gramCopy[i][j] -= eigenvalue * vec[i] * vec[j];
        }
      }
    }

    // Project: each component = X^T * u_i / sqrt(lambda_i * (n-1))
    const result = Array.from({ length: rows }, () => new Array(target));
    for (let comp = 0; comp < target; comp++) {
      for (let i = 0; i < rows; i++) {
        result[i][comp] = eigenvectors[comp][i] * Math.sqrt(Math.max(0, eigenvalues[comp]) * (rows - 1));
      }
    }

    const totalVar = eigenvalues.reduce((s, v) => s + Math.max(0, v), 0) || 1;
    const explainedVar = eigenvalues.slice(0, target).reduce((s, v) => s + Math.max(0, v), 0);
    console.log(`📐 PCA: ${cols}D → ${target}D (${(explainedVar / totalVar * 100).toFixed(1)}% variance)`);

    return result;
  }

  // Standard path when rows >= cols: covariance matrix cols × cols
  // (fallback, unlikely for fonts dataset)
  console.log(`📐 PCA: using standard covariance path (${cols}D → ${target}D)`);
  return centered.map(row => row.slice(0, target));
}