OCR_DATASET_MAKER / src /renderer /image-generator.ts
Omarrran's picture
OCR Dataset Generator for HF Spaces
24a732c
/**
* Image Generator - Sharp-based rendering for OCR images
* Uses text-to-svg for font rendering and sharp for image composition
*/
import sharp from 'sharp';
import { mkdir } from 'fs/promises';
import { dirname } from 'path';
import type { ImageConfig, PaddingConfig } from '../core/config.js';
import type { LoadedFont } from '../core/font-manager.js';
// Dynamic import for text-to-svg (CommonJS module)
let TextToSVG: any = null;
const textToSvgInstances: Map<string, any> = new Map();
async function loadTextToSVG() {
if (!TextToSVG) {
const module = await import('text-to-svg');
TextToSVG = module.default || module;
}
return TextToSVG;
}
async function getTextToSvgInstance(fontPath: string): Promise<any> {
if (textToSvgInstances.has(fontPath)) {
return textToSvgInstances.get(fontPath);
}
const T2S = await loadTextToSVG();
return new Promise((resolve, reject) => {
T2S.load(fontPath, (err: Error | null, instance: any) => {
if (err) {
reject(err);
} else {
textToSvgInstances.set(fontPath, instance);
resolve(instance);
}
});
});
}
export interface RenderOptions {
text: string;
font: LoadedFont;
fontSize: number;
config: ImageConfig;
outputPath: string;
}
export interface RenderResult {
success: boolean;
path: string;
width: number;
height: number;
textWidth: number;
error?: string;
}
/**
* Parse padding configuration
*/
function getPadding(padding: PaddingConfig | number | undefined): PaddingConfig {
if (typeof padding === 'number') {
return { left: padding, right: padding, top: padding, bottom: padding };
}
return padding || { left: 10, right: 10, top: 5, bottom: 5 };
}
/**
* Parse hex color to RGB object
*/
function parseColor(hex: string): { r: number; g: number; b: number } {
const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex);
if (!result) {
return { r: 0, g: 0, b: 0 };
}
return {
r: parseInt(result[1], 16),
g: parseInt(result[2], 16),
b: parseInt(result[3], 16),
};
}
/**
* Render text to SVG path data
*/
async function textToSvgPath(
text: string,
fontPath: string,
fontSize: number,
color: string
): Promise<{ svg: string; width: number; height: number }> {
try {
const instance = await getTextToSvgInstance(fontPath);
const options = {
x: 0,
y: 0,
fontSize,
anchor: 'left top' as const,
attributes: {
fill: color,
},
};
const metrics = instance.getMetrics(text, options);
const svg = instance.getSVG(text, options);
return {
svg,
width: Math.ceil(metrics.width),
height: Math.ceil(metrics.height),
};
} catch (error) {
// Fallback to simple text rendering
console.warn(`Font rendering failed for ${fontPath}, using fallback`);
throw error;
}
}
/**
* Render text to an image using SVG overlay
*/
export async function renderText(options: RenderOptions): Promise<RenderResult> {
const { text, font, fontSize, config, outputPath } = options;
try {
const padding = getPadding(config.padding);
const bgColor = parseColor(config.background);
// Create background
let pipeline = sharp({
create: {
width: config.width,
height: config.height,
channels: 3,
background: bgColor,
},
});
let textWidth = 0;
try {
// Try to render text using the font
const svgResult = await textToSvgPath(text, font.path, fontSize, config.text_color);
textWidth = svgResult.width;
// Calculate position
const availableWidth = config.width - padding.left - padding.right;
const availableHeight = config.height - padding.top - padding.bottom;
let left: number;
const alignment = config.alignment || 'center';
// For RTL, we need to flip the alignment logic
const isRtl = config.direction === 'rtl';
switch (alignment) {
case 'left':
left = isRtl ? config.width - padding.right - svgResult.width : padding.left;
break;
case 'right':
left = isRtl ? padding.left : config.width - padding.right - svgResult.width;
break;
case 'center':
default:
left = padding.left + (availableWidth - svgResult.width) / 2;
break;
}
const top = padding.top + (availableHeight - svgResult.height) / 2;
// Convert SVG to buffer
const svgBuffer = Buffer.from(svgResult.svg);
// Composite SVG onto background
pipeline = pipeline.composite([{
input: svgBuffer,
left: Math.max(0, Math.round(left)),
top: Math.max(0, Math.round(top)),
}]);
} catch (fontError) {
// Font rendering failed, create placeholder
console.warn(`Using placeholder for: ${text.substring(0, 20)}...`);
}
// Ensure output directory exists
await mkdir(dirname(outputPath), { recursive: true });
// Save image
await pipeline.png().toFile(outputPath);
return {
success: true,
path: outputPath,
width: config.width,
height: config.height,
textWidth,
};
} catch (error) {
return {
success: false,
path: outputPath,
width: config.width,
height: config.height,
textWidth: 0,
error: error instanceof Error ? error.message : String(error),
};
}
}
/**
* Render text to a buffer (without saving)
*/
export async function renderToBuffer(options: Omit<RenderOptions, 'outputPath'>): Promise<Buffer> {
const { text, font, fontSize, config } = options;
const padding = getPadding(config.padding);
const bgColor = parseColor(config.background);
// Create background
let pipeline = sharp({
create: {
width: config.width,
height: config.height,
channels: 3,
background: bgColor,
},
});
try {
// Try to render text using the font
const svgResult = await textToSvgPath(text, font.path, fontSize, config.text_color);
// Calculate position
const availableWidth = config.width - padding.left - padding.right;
const availableHeight = config.height - padding.top - padding.bottom;
let left: number;
const alignment = config.alignment || 'center';
const isRtl = config.direction === 'rtl';
switch (alignment) {
case 'left':
left = isRtl ? config.width - padding.right - svgResult.width : padding.left;
break;
case 'right':
left = isRtl ? padding.left : config.width - padding.right - svgResult.width;
break;
case 'center':
default:
left = padding.left + (availableWidth - svgResult.width) / 2;
break;
}
const top = padding.top + (availableHeight - svgResult.height) / 2;
// Convert SVG to buffer
const svgBuffer = Buffer.from(svgResult.svg);
// Composite SVG onto background
pipeline = pipeline.composite([{
input: svgBuffer,
left: Math.max(0, Math.round(left)),
top: Math.max(0, Math.round(top)),
}]);
} catch {
// Font rendering failed, return plain background
}
return pipeline.png().toBuffer();
}
/**
* Legacy function names for compatibility
*/
export const renderToCanvas = renderToBuffer;
export const canvasToPng = (buffer: Buffer) => buffer;