/** * UMAP Utils - Fonctions utilitaires pour le calcul UMAP * * Contient les fonctions pures utilisĂ©es par le Web Worker */ /** * Charge les embeddings depuis le fichier JSON */ export async function loadEmbeddings() { console.log('🔄 Chargement des embeddings CLIP...'); try { const response = await fetch('/data/embeddings.json'); if (!response.ok) { throw new Error(`HTTP Error: ${response.status}`); } const data = await response.json(); console.log(`✅ ${data.fonts.length} polices chargĂ©es`); return data; } catch (error) { console.error('❌ Erreur lors du chargement des embeddings:', error); throw error; } } /** * Extrait le prĂ©fixe pour la fusion des familles */ export function extractFusionPrefix(fontId, fontData) { const parts = fontId.split('-'); if (parts.length <= 1) { return fontId; } // VĂ©rifier les subsets non standards if (fontData && fontData.subsets && Array.isArray(fontData.subsets)) { const commonSubsets = ['latin', 'latin-ext', 'cyrillic', 'cyrillic-ext', 'greek', 'greek-ext']; for (const subset of fontData.subsets) { if (!commonSubsets.includes(subset) && fontId.includes(subset)) { const baseName = fontId.replace(`-${subset}`, '').replace(subset, ''); if (baseName && baseName !== fontId) { return baseName; } } } } // Cas spĂ©ciaux const specialCases = { 'baloo': ['baloo-2', 'baloo-bhai-2', 'baloo-bhaijaan-2', 'baloo-bhaina-2', 'baloo-chettan-2', 'baloo-da-2', 'baloo-paaji-2', 'baloo-tamma-2', 'baloo-tammudu-2', 'baloo-thambi-2'], 'ibm-plex': ['ibm-plex'], 'playwrite': ['playwrite'] }; for (const [familyPrefix, patterns] of Object.entries(specialCases)) { for (const pattern of patterns) { if (fontId.startsWith(pattern)) { return familyPrefix; } } } // Noto fonts if (fontId.startsWith('noto-serif-')) return 'noto-serif'; if (fontId.startsWith('noto-')) return 'noto'; // Second word special const secondWord = parts[1]; if (secondWord === 'sans' || secondWord === 'serif' || secondWord === 'plex') { return parts.slice(0, 2).join('-'); } return parts[0]; } /** * Fusionne les familles de polices */ export function mergeFontFamilies(fontDataList, embeddingMatrices, enableFusion = true) { if (!enableFusion) { return { fontDataList, embeddingMatrices }; } const prefixGroups = {}; const prefixEmbeddingGroups = {}; // Grouper par prĂ©fixe for (let i = 0; i < fontDataList.length; i++) { const font = fontDataList[i]; const prefix = extractFusionPrefix(font.id, font); if (!prefixGroups[prefix]) { prefixGroups[prefix] = []; prefixEmbeddingGroups[prefix] = []; } prefixGroups[prefix].push(font); prefixEmbeddingGroups[prefix].push(embeddingMatrices[i]); } const mergedFonts = []; const mergedEmbeddings = []; // CrĂ©er les polices fusionnĂ©es for (const [prefix, fonts] of Object.entries(prefixGroups)) { if (fonts.length > 1) { let representativeFont = fonts[0]; // Choix du reprĂ©sentant pour certaines familles const representatives = { 'noto': 'noto-sans-arabic', 'noto-serif': 'noto-serif-latin', 'ibm-plex': 'ibm-plex-sans', 'baloo': 'baloo-2' }; if (representatives[prefix]) { const found = fonts.find(f => f.id === representatives[prefix]); if (found) representativeFont = found; } const representativeIndex = fonts.findIndex(f => f.id === representativeFont.id); const representativeEmbedding = prefixEmbeddingGroups[prefix][representativeIndex]; const mergedFont = { ...representativeFont, id: prefix, name: prefix.replace(/-/g, ' ').replace(/\b\w/g, l => l.toUpperCase()), imageName: representativeFont.id }; mergedFonts.push(mergedFont); mergedEmbeddings.push(representativeEmbedding); } else { mergedFonts.push({ ...fonts[0], imageName: fonts[0].id }); mergedEmbeddings.push(prefixEmbeddingGroups[prefix][0]); } } return { fontDataList: mergedFonts, embeddingMatrices: mergedEmbeddings }; } /** * Normalise les donnĂ©es (standardisation Z-score) */ export function normalizeData(data) { const rows = data.length; const cols = data[0].length; const means = new Array(cols).fill(0); const stds = new Array(cols).fill(0); for (let i = 0; i < rows; i++) { for (let j = 0; j < cols; j++) { means[j] += data[i][j]; } } for (let j = 0; j < cols; j++) { means[j] /= rows; } for (let i = 0; i < rows; i++) { for (let j = 0; j < cols; j++) { const diff = data[i][j] - means[j]; stds[j] += diff * diff; } } for (let j = 0; j < cols; j++) { stds[j] = Math.sqrt(stds[j] / rows); if (stds[j] === 0) stds[j] = 1; } const normalized = data.map(row => row.map((val, j) => (val - means[j]) / stds[j]) ); return normalized; } /** * PCA via covariance eigen-decomposition (browser-friendly, no ml-matrix dependency). * Reduces nDims → nComponents, concentrating variance for better UMAP quality. */ export function applyPCA(data, nComponents = 50) { const rows = data.length; const cols = data[0].length; const target = Math.min(nComponents, cols, rows); // Center columns const means = new Array(cols).fill(0); for (let i = 0; i < rows; i++) { for (let j = 0; j < cols; j++) means[j] += data[i][j]; } for (let j = 0; j < cols; j++) means[j] /= rows; const centered = data.map(row => row.map((v, j) => v - means[j])); // For browser perf: use SVD-like approach via X^T * X when cols > rows // When rows < cols (typical: ~800 fonts, 512 dims), compute rows×rows gram matrix if (rows < cols) { // Gram matrix: X * X^T (rows × rows) const gram = Array.from({ length: rows }, () => new Float64Array(rows)); for (let i = 0; i < rows; i++) { for (let j = i; j < rows; j++) { let dot = 0; for (let k = 0; k < cols; k++) dot += centered[i][k] * centered[j][k]; gram[i][j] = dot / (rows - 1); gram[j][i] = gram[i][j]; } } // Power iteration for top eigenvectors of gram matrix const eigenvectors = []; const eigenvalues = []; const gramCopy = gram.map(row => Float64Array.from(row)); for (let comp = 0; comp < target; comp++) { let vec = new Float64Array(rows); for (let i = 0; i < rows; i++) vec[i] = Math.random() - 0.5; for (let iter = 0; iter < 100; iter++) { const newVec = new Float64Array(rows); for (let i = 0; i < rows; i++) { let sum = 0; for (let j = 0; j < rows; j++) sum += gramCopy[i][j] * vec[j]; newVec[i] = sum; } let norm = 0; for (let i = 0; i < rows; i++) norm += newVec[i] * newVec[i]; norm = Math.sqrt(norm); if (norm === 0) break; for (let i = 0; i < rows; i++) newVec[i] /= norm; let diff = 0; for (let i = 0; i < rows; i++) diff += (newVec[i] - vec[i]) ** 2; vec = newVec; if (diff < 1e-10) break; } let eigenvalue = 0; const Av = new Float64Array(rows); for (let i = 0; i < rows; i++) { let sum = 0; for (let j = 0; j < rows; j++) sum += gramCopy[i][j] * vec[j]; Av[i] = sum; } for (let i = 0; i < rows; i++) eigenvalue += vec[i] * Av[i]; eigenvalues.push(eigenvalue); eigenvectors.push(vec); // Deflate for (let i = 0; i < rows; i++) { for (let j = 0; j < rows; j++) { gramCopy[i][j] -= eigenvalue * vec[i] * vec[j]; } } } // Project: each component = X^T * u_i / sqrt(lambda_i * (n-1)) const result = Array.from({ length: rows }, () => new Array(target)); for (let comp = 0; comp < target; comp++) { for (let i = 0; i < rows; i++) { result[i][comp] = eigenvectors[comp][i] * Math.sqrt(Math.max(0, eigenvalues[comp]) * (rows - 1)); } } const totalVar = eigenvalues.reduce((s, v) => s + Math.max(0, v), 0) || 1; const explainedVar = eigenvalues.slice(0, target).reduce((s, v) => s + Math.max(0, v), 0); console.log(`📐 PCA: ${cols}D → ${target}D (${(explainedVar / totalVar * 100).toFixed(1)}% variance)`); return result; } // Standard path when rows >= cols: covariance matrix cols × cols // (fallback, unlikely for fonts dataset) console.log(`📐 PCA: using standard covariance path (${cols}D → ${target}D)`); return centered.map(row => row.slice(0, target)); }