OCR_DATASET_MAKER / src /core /font-manager.ts
Omarrran's picture
OCR Dataset Generator for HF Spaces
24a732c
/**
* Font Manager - Multi-font loading and percentage-based distribution
*/
import { readdir, stat } from 'fs/promises';
import { join, extname, basename } from 'path';
import type { FontConfig, FontDistributionEntry, FontSizeConfig } from './config.js';
export interface LoadedFont {
family: string;
path: string;
filename: string;
style?: string;
weight?: number;
}
export interface FontSelection {
font: LoadedFont;
size: number;
}
export interface FontManagerStats {
totalFonts: number;
byFamily: Record<string, number>;
distribution: { family: string; percentage: number; actual: number }[];
}
/**
* Supported font file extensions
*/
const FONT_EXTENSIONS = ['.ttf', '.otf', '.woff', '.woff2'];
/**
* Parse font filename to extract family and style info
*/
function parseFontFilename(filename: string): { family: string; style?: string } {
const name = basename(filename, extname(filename));
// Common patterns: "FontName-Style", "FontName_Style", "FontName Style"
const parts = name.split(/[-_\s]+/);
if (parts.length > 1) {
const lastPart = parts[parts.length - 1].toLowerCase();
const stylePatterns = ['regular', 'bold', 'italic', 'light', 'medium', 'semibold', 'thin', 'black'];
if (stylePatterns.includes(lastPart)) {
return {
family: parts.slice(0, -1).join(' '),
style: lastPart,
};
}
}
return { family: name };
}
/**
* Font Manager class for handling font loading and selection
*/
export class FontManager {
private fonts: LoadedFont[] = [];
private fontsByFamily: Map<string, LoadedFont[]> = new Map();
private distribution: FontDistributionEntry[] = [];
private sizeConfig: FontSizeConfig;
private fallbackFamily?: string;
private selectionCounts: Map<string, number> = new Map();
private totalSelections = 0;
private random: () => number;
constructor(config: FontConfig, seed?: number) {
this.distribution = config.distribution;
this.sizeConfig = config.size;
this.fallbackFamily = config.fallback;
// Seeded random number generator
if (seed !== undefined) {
let s = seed;
this.random = () => {
s = (s * 1103515245 + 12345) % 2147483648;
return s / 2147483648;
};
} else {
this.random = Math.random;
}
}
/**
* Load fonts from the specified directory
*/
async loadFonts(directory: string): Promise<void> {
try {
const files = await readdir(directory);
for (const file of files) {
const ext = extname(file).toLowerCase();
if (!FONT_EXTENSIONS.includes(ext)) continue;
const filePath = join(directory, file);
const fileStat = await stat(filePath);
if (!fileStat.isFile()) continue;
const { family, style } = parseFontFilename(file);
const loadedFont: LoadedFont = {
family,
path: filePath,
filename: file,
style,
};
this.fonts.push(loadedFont);
// Group by family
if (!this.fontsByFamily.has(family)) {
this.fontsByFamily.set(family, []);
}
this.fontsByFamily.get(family)!.push(loadedFont);
}
// Initialize selection counts
for (const font of this.fonts) {
this.selectionCounts.set(font.family, 0);
}
console.log(`Loaded ${this.fonts.length} fonts from ${this.fontsByFamily.size} families`);
} catch (error) {
console.error(`Error loading fonts from ${directory}:`, error);
throw error;
}
}
/**
* Get list of loaded font families
*/
getFamilies(): string[] {
return [...this.fontsByFamily.keys()];
}
/**
* Get all loaded fonts
*/
getAllFonts(): LoadedFont[] {
return [...this.fonts];
}
/**
* Select a font based on the configured distribution
*/
selectFont(): LoadedFont | null {
if (this.fonts.length === 0) {
console.warn('No fonts loaded');
return null;
}
// If no distribution configured, select randomly
if (this.distribution.length === 0) {
const idx = Math.floor(this.random() * this.fonts.length);
return this.fonts[idx];
}
// Select based on percentage distribution
const r = this.random() * 100;
let cumulative = 0;
for (const entry of this.distribution) {
cumulative += entry.percentage;
if (r <= cumulative) {
// Find fonts matching this family
const familyFonts = this.findFontsByFamily(entry.family);
if (familyFonts.length > 0) {
const font = familyFonts[Math.floor(this.random() * familyFonts.length)];
this.recordSelection(font.family);
return font;
}
}
}
// Fallback to first available font
const fallback = this.getFallbackFont();
if (fallback) {
this.recordSelection(fallback.family);
}
return fallback;
}
/**
* Find fonts by family name (fuzzy matching)
*/
private findFontsByFamily(family: string): LoadedFont[] {
// Exact match
if (this.fontsByFamily.has(family)) {
return this.fontsByFamily.get(family)!;
}
// Case-insensitive match
const lowerFamily = family.toLowerCase();
for (const [key, fonts] of this.fontsByFamily) {
if (key.toLowerCase() === lowerFamily) {
return fonts;
}
}
// Partial match
for (const [key, fonts] of this.fontsByFamily) {
if (key.toLowerCase().includes(lowerFamily) || lowerFamily.includes(key.toLowerCase())) {
return fonts;
}
}
return [];
}
/**
* Get fallback font
*/
private getFallbackFont(): LoadedFont | null {
if (this.fallbackFamily) {
const fonts = this.findFontsByFamily(this.fallbackFamily);
if (fonts.length > 0) {
return fonts[0];
}
}
return this.fonts.length > 0 ? this.fonts[0] : null;
}
/**
* Generate a font size based on configuration
*/
selectSize(): number {
const { min, max, distribution } = this.sizeConfig;
switch (distribution) {
case 'normal': {
// Normal distribution (Box-Muller transform)
const u1 = this.random();
const u2 = this.random();
const z = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
const mean = (min + max) / 2;
const std = (max - min) / 6; // 99.7% within range
const size = Math.round(mean + z * std);
return Math.max(min, Math.min(max, size));
}
case 'uniform':
case 'random':
default:
return Math.round(min + this.random() * (max - min));
}
}
/**
* Select font and size together
*/
select(): FontSelection | null {
const font = this.selectFont();
if (!font) return null;
return {
font,
size: this.selectSize(),
};
}
/**
* Record a font selection for statistics
*/
private recordSelection(family: string): void {
const current = this.selectionCounts.get(family) || 0;
this.selectionCounts.set(family, current + 1);
this.totalSelections++;
}
/**
* Get statistics about font usage
*/
getStats(): FontManagerStats {
const byFamily: Record<string, number> = {};
const distribution: { family: string; percentage: number; actual: number }[] = [];
for (const [family, count] of this.selectionCounts) {
byFamily[family] = count;
}
for (const entry of this.distribution) {
const count = this.selectionCounts.get(entry.family) || 0;
const actual = this.totalSelections > 0
? (count / this.totalSelections) * 100
: 0;
distribution.push({
family: entry.family,
percentage: entry.percentage,
actual: Math.round(actual * 10) / 10,
});
}
return {
totalFonts: this.fonts.length,
byFamily,
distribution,
};
}
/**
* Reset statistics
*/
resetStats(): void {
for (const family of this.selectionCounts.keys()) {
this.selectionCounts.set(family, 0);
}
this.totalSelections = 0;
}
/**
* Validate font distribution (should sum to 100%)
*/
validateDistribution(): { valid: boolean; total: number; message?: string } {
const total = this.distribution.reduce((sum, entry) => sum + entry.percentage, 0);
if (Math.abs(total - 100) > 0.01) {
return {
valid: false,
total,
message: `Font distribution should sum to 100%, got ${total}%`,
};
}
return { valid: true, total };
}
/**
* Check if required fonts are available
*/
checkAvailability(): { available: string[]; missing: string[] } {
const available: string[] = [];
const missing: string[] = [];
for (const entry of this.distribution) {
const fonts = this.findFontsByFamily(entry.family);
if (fonts.length > 0) {
available.push(entry.family);
} else {
missing.push(entry.family);
}
}
return { available, missing };
}
}
/**
* Create and initialize a font manager
*/
export async function createFontManager(
config: FontConfig,
seed?: number
): Promise<FontManager> {
const manager = new FontManager(config, seed);
await manager.loadFonts(config.directory);
return manager;
}