Spaces:
Configuration error
Configuration error
| /** | |
| * UMAP Utils - Fonctions utilitaires pour le calcul UMAP | |
| * | |
| * Contient les fonctions pures utilisées par le Web Worker | |
| */ | |
| /** | |
| * Charge les embeddings depuis le fichier JSON | |
| */ | |
| export async function loadEmbeddings() { | |
| console.log('🔄 Chargement des embeddings CLIP...'); | |
| try { | |
| const response = await fetch('/data/embeddings.json'); | |
| if (!response.ok) { | |
| throw new Error(`HTTP Error: ${response.status}`); | |
| } | |
| const data = await response.json(); | |
| console.log(`✅ ${data.fonts.length} polices chargées`); | |
| return data; | |
| } catch (error) { | |
| console.error('❌ Erreur lors du chargement des embeddings:', error); | |
| throw error; | |
| } | |
| } | |
| /** | |
| * Extrait le préfixe pour la fusion des familles | |
| */ | |
| export function extractFusionPrefix(fontId, fontData) { | |
| const parts = fontId.split('-'); | |
| if (parts.length <= 1) { | |
| return fontId; | |
| } | |
| // Vérifier les subsets non standards | |
| if (fontData && fontData.subsets && Array.isArray(fontData.subsets)) { | |
| const commonSubsets = ['latin', 'latin-ext', 'cyrillic', 'cyrillic-ext', 'greek', 'greek-ext']; | |
| for (const subset of fontData.subsets) { | |
| if (!commonSubsets.includes(subset) && fontId.includes(subset)) { | |
| const baseName = fontId.replace(`-${subset}`, '').replace(subset, ''); | |
| if (baseName && baseName !== fontId) { | |
| return baseName; | |
| } | |
| } | |
| } | |
| } | |
| // Cas spéciaux | |
| const specialCases = { | |
| 'baloo': ['baloo-2', 'baloo-bhai-2', 'baloo-bhaijaan-2', 'baloo-bhaina-2', 'baloo-chettan-2', 'baloo-da-2', 'baloo-paaji-2', 'baloo-tamma-2', 'baloo-tammudu-2', 'baloo-thambi-2'], | |
| 'ibm-plex': ['ibm-plex'], | |
| 'playwrite': ['playwrite'] | |
| }; | |
| for (const [familyPrefix, patterns] of Object.entries(specialCases)) { | |
| for (const pattern of patterns) { | |
| if (fontId.startsWith(pattern)) { | |
| return familyPrefix; | |
| } | |
| } | |
| } | |
| // Noto fonts | |
| if (fontId.startsWith('noto-serif-')) return 'noto-serif'; | |
| if (fontId.startsWith('noto-')) return 'noto'; | |
| // Second word special | |
| const secondWord = parts[1]; | |
| if (secondWord === 'sans' || secondWord === 'serif' || secondWord === 'plex') { | |
| return parts.slice(0, 2).join('-'); | |
| } | |
| return parts[0]; | |
| } | |
| /** | |
| * Fusionne les familles de polices | |
| */ | |
| export function mergeFontFamilies(fontDataList, embeddingMatrices, enableFusion = true) { | |
| if (!enableFusion) { | |
| return { fontDataList, embeddingMatrices }; | |
| } | |
| const prefixGroups = {}; | |
| const prefixEmbeddingGroups = {}; | |
| // Grouper par préfixe | |
| for (let i = 0; i < fontDataList.length; i++) { | |
| const font = fontDataList[i]; | |
| const prefix = extractFusionPrefix(font.id, font); | |
| if (!prefixGroups[prefix]) { | |
| prefixGroups[prefix] = []; | |
| prefixEmbeddingGroups[prefix] = []; | |
| } | |
| prefixGroups[prefix].push(font); | |
| prefixEmbeddingGroups[prefix].push(embeddingMatrices[i]); | |
| } | |
| const mergedFonts = []; | |
| const mergedEmbeddings = []; | |
| // Créer les polices fusionnées | |
| for (const [prefix, fonts] of Object.entries(prefixGroups)) { | |
| if (fonts.length > 1) { | |
| let representativeFont = fonts[0]; | |
| // Choix du représentant pour certaines familles | |
| const representatives = { | |
| 'noto': 'noto-sans-arabic', | |
| 'noto-serif': 'noto-serif-latin', | |
| 'ibm-plex': 'ibm-plex-sans', | |
| 'baloo': 'baloo-2' | |
| }; | |
| if (representatives[prefix]) { | |
| const found = fonts.find(f => f.id === representatives[prefix]); | |
| if (found) representativeFont = found; | |
| } | |
| const representativeIndex = fonts.findIndex(f => f.id === representativeFont.id); | |
| const representativeEmbedding = prefixEmbeddingGroups[prefix][representativeIndex]; | |
| const mergedFont = { | |
| ...representativeFont, | |
| id: prefix, | |
| name: prefix.replace(/-/g, ' ').replace(/\b\w/g, l => l.toUpperCase()), | |
| imageName: representativeFont.id | |
| }; | |
| mergedFonts.push(mergedFont); | |
| mergedEmbeddings.push(representativeEmbedding); | |
| } else { | |
| mergedFonts.push({ ...fonts[0], imageName: fonts[0].id }); | |
| mergedEmbeddings.push(prefixEmbeddingGroups[prefix][0]); | |
| } | |
| } | |
| return { | |
| fontDataList: mergedFonts, | |
| embeddingMatrices: mergedEmbeddings | |
| }; | |
| } | |
| /** | |
| * Normalise les données (standardisation Z-score) | |
| */ | |
| export function normalizeData(data) { | |
| const rows = data.length; | |
| const cols = data[0].length; | |
| const means = new Array(cols).fill(0); | |
| const stds = new Array(cols).fill(0); | |
| for (let i = 0; i < rows; i++) { | |
| for (let j = 0; j < cols; j++) { | |
| means[j] += data[i][j]; | |
| } | |
| } | |
| for (let j = 0; j < cols; j++) { | |
| means[j] /= rows; | |
| } | |
| for (let i = 0; i < rows; i++) { | |
| for (let j = 0; j < cols; j++) { | |
| const diff = data[i][j] - means[j]; | |
| stds[j] += diff * diff; | |
| } | |
| } | |
| for (let j = 0; j < cols; j++) { | |
| stds[j] = Math.sqrt(stds[j] / rows); | |
| if (stds[j] === 0) stds[j] = 1; | |
| } | |
| const normalized = data.map(row => | |
| row.map((val, j) => (val - means[j]) / stds[j]) | |
| ); | |
| return normalized; | |
| } | |
| /** | |
| * PCA via covariance eigen-decomposition (browser-friendly, no ml-matrix dependency). | |
| * Reduces nDims → nComponents, concentrating variance for better UMAP quality. | |
| */ | |
| export function applyPCA(data, nComponents = 50) { | |
| const rows = data.length; | |
| const cols = data[0].length; | |
| const target = Math.min(nComponents, cols, rows); | |
| // Center columns | |
| const means = new Array(cols).fill(0); | |
| for (let i = 0; i < rows; i++) { | |
| for (let j = 0; j < cols; j++) means[j] += data[i][j]; | |
| } | |
| for (let j = 0; j < cols; j++) means[j] /= rows; | |
| const centered = data.map(row => row.map((v, j) => v - means[j])); | |
| // For browser perf: use SVD-like approach via X^T * X when cols > rows | |
| // When rows < cols (typical: ~800 fonts, 512 dims), compute rows×rows gram matrix | |
| if (rows < cols) { | |
| // Gram matrix: X * X^T (rows × rows) | |
| const gram = Array.from({ length: rows }, () => new Float64Array(rows)); | |
| for (let i = 0; i < rows; i++) { | |
| for (let j = i; j < rows; j++) { | |
| let dot = 0; | |
| for (let k = 0; k < cols; k++) dot += centered[i][k] * centered[j][k]; | |
| gram[i][j] = dot / (rows - 1); | |
| gram[j][i] = gram[i][j]; | |
| } | |
| } | |
| // Power iteration for top eigenvectors of gram matrix | |
| const eigenvectors = []; | |
| const eigenvalues = []; | |
| const gramCopy = gram.map(row => Float64Array.from(row)); | |
| for (let comp = 0; comp < target; comp++) { | |
| let vec = new Float64Array(rows); | |
| for (let i = 0; i < rows; i++) vec[i] = Math.random() - 0.5; | |
| for (let iter = 0; iter < 100; iter++) { | |
| const newVec = new Float64Array(rows); | |
| for (let i = 0; i < rows; i++) { | |
| let sum = 0; | |
| for (let j = 0; j < rows; j++) sum += gramCopy[i][j] * vec[j]; | |
| newVec[i] = sum; | |
| } | |
| let norm = 0; | |
| for (let i = 0; i < rows; i++) norm += newVec[i] * newVec[i]; | |
| norm = Math.sqrt(norm); | |
| if (norm === 0) break; | |
| for (let i = 0; i < rows; i++) newVec[i] /= norm; | |
| let diff = 0; | |
| for (let i = 0; i < rows; i++) diff += (newVec[i] - vec[i]) ** 2; | |
| vec = newVec; | |
| if (diff < 1e-10) break; | |
| } | |
| let eigenvalue = 0; | |
| const Av = new Float64Array(rows); | |
| for (let i = 0; i < rows; i++) { | |
| let sum = 0; | |
| for (let j = 0; j < rows; j++) sum += gramCopy[i][j] * vec[j]; | |
| Av[i] = sum; | |
| } | |
| for (let i = 0; i < rows; i++) eigenvalue += vec[i] * Av[i]; | |
| eigenvalues.push(eigenvalue); | |
| eigenvectors.push(vec); | |
| // Deflate | |
| for (let i = 0; i < rows; i++) { | |
| for (let j = 0; j < rows; j++) { | |
| gramCopy[i][j] -= eigenvalue * vec[i] * vec[j]; | |
| } | |
| } | |
| } | |
| // Project: each component = X^T * u_i / sqrt(lambda_i * (n-1)) | |
| const result = Array.from({ length: rows }, () => new Array(target)); | |
| for (let comp = 0; comp < target; comp++) { | |
| for (let i = 0; i < rows; i++) { | |
| result[i][comp] = eigenvectors[comp][i] * Math.sqrt(Math.max(0, eigenvalues[comp]) * (rows - 1)); | |
| } | |
| } | |
| const totalVar = eigenvalues.reduce((s, v) => s + Math.max(0, v), 0) || 1; | |
| const explainedVar = eigenvalues.slice(0, target).reduce((s, v) => s + Math.max(0, v), 0); | |
| console.log(`📐 PCA: ${cols}D → ${target}D (${(explainedVar / totalVar * 100).toFixed(1)}% variance)`); | |
| return result; | |
| } | |
| // Standard path when rows >= cols: covariance matrix cols × cols | |
| // (fallback, unlikely for fonts dataset) | |
| console.log(`📐 PCA: using standard covariance path (${cols}D → ${target}D)`); | |
| return centered.map(row => row.slice(0, target)); | |
| } | |