OCR_DATASET_MAKER / src /renderer /augmentation.ts
Omarrran's picture
OCR Dataset Generator for HF Spaces
24a732c
/**
* Augmentation Pipeline - OCR-safe image transformations
*
* All augmentations preserve ground truth (text labels are never altered)
*/
import sharp from 'sharp';
import type {
AugmentationConfig,
TransformsConfig,
RotationConfig,
SkewConfig,
GaussianBlurConfig,
MotionBlurConfig,
GaussianNoiseConfig,
SaltPepperConfig,
JpegArtifactsConfig,
ResolutionDegradationConfig,
BrightnessConfig,
ContrastConfig,
PerspectiveConfig,
PaperTextureConfig,
ShadowConfig,
InkBleedConfig
} from '../core/config.js';
export interface AugmentationResult {
buffer: Buffer;
applied: string[];
stats: Record<string, unknown>;
}
export interface AugmentationStats {
name: string;
count: number;
percentage: number;
}
/**
* Seeded random number generator
*/
class SeededRandom {
private seed: number;
constructor(seed?: number) {
this.seed = seed ?? Date.now();
}
next(): number {
this.seed = (this.seed * 1103515245 + 12345) % 2147483648;
return this.seed / 2147483648;
}
range(min: number, max: number): number {
return min + this.next() * (max - min);
}
int(min: number, max: number): number {
return Math.floor(this.range(min, max + 1));
}
bool(probability: number = 0.5): boolean {
return this.next() < probability;
}
pick<T>(array: T[]): T {
return array[this.int(0, array.length - 1)];
}
}
/**
* Augmentation Pipeline class
*/
export class AugmentationPipeline {
private config: AugmentationConfig;
private random: SeededRandom;
private stats: Map<string, number> = new Map();
private totalProcessed = 0;
private totalAugmented = 0;
constructor(config: AugmentationConfig, seed?: number) {
this.config = config;
this.random = new SeededRandom(seed);
}
/**
* Apply augmentations to an image buffer
*/
async apply(imageBuffer: Buffer): Promise<AugmentationResult> {
this.totalProcessed++;
if (!this.config.enabled) {
return { buffer: imageBuffer, applied: [], stats: {} };
}
// Decide if this sample should be augmented
const shouldAugment = this.random.bool(this.config.apply_percentage / 100);
if (!shouldAugment) {
return { buffer: imageBuffer, applied: [], stats: {} };
}
this.totalAugmented++;
const applied: string[] = [];
const stats: Record<string, unknown> = {};
let pipeline = sharp(imageBuffer);
const transforms = this.config.transforms || {};
const maxTransforms = this.config.max_transforms_per_sample || 4;
// Collect eligible transforms
const eligibleTransforms = this.getEligibleTransforms(transforms);
// Randomly select transforms up to max
const selectedTransforms = this.selectTransforms(eligibleTransforms, maxTransforms);
// Apply selected transforms
for (const transformName of selectedTransforms) {
try {
const result = await this.applyTransform(pipeline, transformName, transforms);
if (result.applied) {
pipeline = result.pipeline;
applied.push(transformName);
stats[transformName] = result.params;
this.incrementStat(transformName);
}
} catch (error) {
console.warn(`Transform ${transformName} failed:`, error);
}
}
const buffer = await pipeline.png().toBuffer();
return { buffer, applied, stats };
}
/**
* Get list of eligible transforms based on their percentage
*/
private getEligibleTransforms(transforms: TransformsConfig): string[] {
const eligible: string[] = [];
for (const [name, config] of Object.entries(transforms)) {
if (config && (config as { enabled?: boolean }).enabled) {
eligible.push(name);
}
}
return eligible;
}
/**
* Select transforms based on their percentage weights
*/
private selectTransforms(eligible: string[], max: number): string[] {
const selected: string[] = [];
const transforms = this.config.transforms || {};
for (const name of eligible) {
if (selected.length >= max) break;
const config = transforms[name as keyof TransformsConfig];
if (!config) continue;
const percentage = (config as { percentage?: number }).percentage || 0;
if (this.random.bool(percentage / 100)) {
selected.push(name);
}
}
return selected;
}
/**
* Apply a single transform
*/
private async applyTransform(
pipeline: sharp.Sharp,
name: string,
transforms: TransformsConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const config = transforms[name as keyof TransformsConfig];
if (!config) {
return { pipeline, applied: false, params: {} };
}
switch (name) {
case 'rotation':
return this.applyRotation(pipeline, config as RotationConfig);
case 'gaussian_blur':
return this.applyGaussianBlur(pipeline, config as GaussianBlurConfig);
case 'gaussian_noise':
return this.applyGaussianNoise(pipeline, config as GaussianNoiseConfig);
case 'brightness':
return this.applyBrightness(pipeline, config as BrightnessConfig);
case 'contrast':
return this.applyContrast(pipeline, config as ContrastConfig);
case 'jpeg_artifacts':
return this.applyJpegArtifacts(pipeline, config as JpegArtifactsConfig);
case 'resolution_degradation':
return this.applyResolutionDegradation(pipeline, config as ResolutionDegradationConfig);
case 'salt_pepper':
return this.applySaltPepper(pipeline, config as SaltPepperConfig);
case 'motion_blur':
return this.applyMotionBlur(pipeline, config as MotionBlurConfig);
case 'skew':
return this.applySkew(pipeline, config as SkewConfig);
case 'perspective':
return this.applyPerspective(pipeline, config as PerspectiveConfig);
case 'paper_texture':
return this.applyPaperTexture(pipeline, config as PaperTextureConfig);
case 'shadow':
return this.applyShadow(pipeline, config as ShadowConfig);
case 'ink_bleed':
return this.applyInkBleed(pipeline, config as InkBleedConfig);
default:
return { pipeline, applied: false, params: {} };
}
}
/**
* Apply rotation
*/
private async applyRotation(
pipeline: sharp.Sharp,
config: RotationConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const angle = this.random.range(config.range[0], config.range[1]);
return {
pipeline: pipeline.rotate(angle, { background: '#FFFFFF' }),
applied: true,
params: { angle },
};
}
/**
* Apply Gaussian blur
*/
private async applyGaussianBlur(
pipeline: sharp.Sharp,
config: GaussianBlurConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const sigma = this.random.range(config.sigma[0], config.sigma[1]);
return {
pipeline: pipeline.blur(sigma),
applied: true,
params: { sigma },
};
}
/**
* Apply Gaussian noise
*/
private async applyGaussianNoise(
pipeline: sharp.Sharp,
config: GaussianNoiseConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const std = this.random.range(config.std[0], config.std[1]);
// Get image metadata
const metadata = await pipeline.metadata();
const width = metadata.width || 256;
const height = metadata.height || 64;
// Generate noise overlay
const noiseBuffer = Buffer.alloc(width * height);
for (let i = 0; i < noiseBuffer.length; i++) {
// Box-Muller transform for Gaussian noise
const u1 = this.random.next();
const u2 = this.random.next();
const z = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
const noise = Math.round(128 + z * std);
noiseBuffer[i] = Math.max(0, Math.min(255, noise));
}
const noiseImage = sharp(noiseBuffer, {
raw: { width, height, channels: 1 },
}).png();
// Composite noise as overlay
const noiseData = await noiseImage.toBuffer();
return {
pipeline: pipeline.composite([{
input: noiseData,
blend: 'overlay',
}]),
applied: true,
params: { std },
};
}
/**
* Apply brightness adjustment
*/
private async applyBrightness(
pipeline: sharp.Sharp,
config: BrightnessConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const delta = this.random.range(config.range[0], config.range[1]);
// Sharp uses 1.0 as base, convert delta to multiplier
const multiplier = 1 + delta / 100;
return {
pipeline: pipeline.modulate({ brightness: multiplier }),
applied: true,
params: { delta, multiplier },
};
}
/**
* Apply contrast adjustment
*/
private async applyContrast(
pipeline: sharp.Sharp,
config: ContrastConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const factor = this.random.range(config.range[0], config.range[1]);
return {
pipeline: pipeline.linear(factor, -(128 * factor) + 128),
applied: true,
params: { factor },
};
}
/**
* Apply JPEG compression artifacts
*/
private async applyJpegArtifacts(
pipeline: sharp.Sharp,
config: JpegArtifactsConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const quality = this.random.int(config.quality[0], config.quality[1]);
// Compress to JPEG and back to PNG
const jpegBuffer = await pipeline.jpeg({ quality }).toBuffer();
return {
pipeline: sharp(jpegBuffer),
applied: true,
params: { quality },
};
}
/**
* Apply resolution degradation (downscale then upscale)
*/
private async applyResolutionDegradation(
pipeline: sharp.Sharp,
config: ResolutionDegradationConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const scaleFactor = this.random.range(config.scale_factor[0], config.scale_factor[1]);
const metadata = await pipeline.metadata();
const width = metadata.width || 256;
const height = metadata.height || 64;
const smallWidth = Math.round(width * scaleFactor);
const smallHeight = Math.round(height * scaleFactor);
// Downscale then upscale
const degraded = await pipeline
.resize(smallWidth, smallHeight, { kernel: 'nearest' })
.resize(width, height, { kernel: 'nearest' })
.toBuffer();
return {
pipeline: sharp(degraded),
applied: true,
params: { scaleFactor, smallWidth, smallHeight },
};
}
/**
* Apply salt and pepper noise
*/
private async applySaltPepper(
pipeline: sharp.Sharp,
config: SaltPepperConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const amount = this.random.range(config.amount[0], config.amount[1]);
const metadata = await pipeline.metadata();
const width = metadata.width || 256;
const height = metadata.height || 64;
// Get raw pixel data
const { data, info } = await pipeline.raw().toBuffer({ resolveWithObject: true });
const pixels = Buffer.from(data);
const channels = info.channels;
// Apply salt and pepper
const totalPixels = width * height;
const noisePixels = Math.floor(totalPixels * amount);
for (let i = 0; i < noisePixels; i++) {
const pixelIndex = this.random.int(0, totalPixels - 1) * channels;
const value = this.random.bool(0.5) ? 255 : 0;
for (let c = 0; c < Math.min(channels, 3); c++) {
pixels[pixelIndex + c] = value;
}
}
return {
pipeline: sharp(pixels, { raw: { width, height, channels } }),
applied: true,
params: { amount, noisePixels },
};
}
/**
* Apply motion blur
*/
private async applyMotionBlur(
pipeline: sharp.Sharp,
config: MotionBlurConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const kernelSize = this.random.int(config.kernel_size[0], config.kernel_size[1]);
const angle = this.random.range(config.angle[0], config.angle[1]);
// Create motion blur kernel
const kernel = this.createMotionBlurKernel(kernelSize, angle);
return {
pipeline: pipeline.convolve({
width: kernelSize,
height: kernelSize,
kernel,
}),
applied: true,
params: { kernelSize, angle },
};
}
/**
* Create motion blur convolution kernel
*/
private createMotionBlurKernel(size: number, angle: number): number[] {
const kernel: number[] = new Array(size * size).fill(0);
const center = Math.floor(size / 2);
const radians = (angle * Math.PI) / 180;
for (let i = 0; i < size; i++) {
const dx = (i - center) * Math.cos(radians);
const dy = (i - center) * Math.sin(radians);
const x = Math.round(center + dx);
const y = Math.round(center + dy);
if (x >= 0 && x < size && y >= 0 && y < size) {
kernel[y * size + x] = 1 / size;
}
}
return kernel;
}
/**
* Apply skew transformation
*/
private async applySkew(
pipeline: sharp.Sharp,
config: SkewConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const xSkew = this.random.range(config.x_range[0], config.x_range[1]);
const ySkew = this.random.range(config.y_range[0], config.y_range[1]);
// Apply affine transformation for skew
return {
pipeline: pipeline.affine([[1, xSkew], [ySkew, 1]], { background: '#FFFFFF' }),
applied: true,
params: { xSkew, ySkew },
};
}
/**
* Apply perspective transformation
*/
private async applyPerspective(
pipeline: sharp.Sharp,
config: PerspectiveConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const intensity = this.random.range(config.intensity[0], config.intensity[1]);
// Simulate perspective with skew + scale
const skew = intensity * (this.random.bool(0.5) ? 1 : -1);
return {
pipeline: pipeline.affine([[1, skew * 0.5], [0, 1]], { background: '#FFFFFF' }),
applied: true,
params: { intensity, skew },
};
}
/**
* Apply paper texture overlay
*/
private async applyPaperTexture(
pipeline: sharp.Sharp,
config: PaperTextureConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const opacity = this.random.range(config.opacity[0], config.opacity[1]);
const metadata = await pipeline.metadata();
const width = metadata.width || 256;
const height = metadata.height || 64;
// Generate paper-like texture (simple noise pattern)
const textureBuffer = Buffer.alloc(width * height);
for (let i = 0; i < textureBuffer.length; i++) {
const baseValue = 240; // Light gray base
const noise = this.random.range(-15, 15);
textureBuffer[i] = Math.max(200, Math.min(255, baseValue + noise));
}
const textureImage = await sharp(textureBuffer, {
raw: { width, height, channels: 1 },
}).png().toBuffer();
return {
pipeline: pipeline.composite([{
input: textureImage,
blend: 'multiply',
}]),
applied: true,
params: { opacity },
};
}
/**
* Apply shadow effect
*/
private async applyShadow(
pipeline: sharp.Sharp,
config: ShadowConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const intensity = this.random.range(config.intensity[0], config.intensity[1]);
const positions = config.position || ['left', 'right', 'top', 'bottom'];
const position = this.random.pick(positions);
const metadata = await pipeline.metadata();
const width = metadata.width || 256;
const height = metadata.height || 64;
// Create gradient shadow
const shadowBuffer = Buffer.alloc(width * height);
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
let gradient = 1;
switch (position) {
case 'left':
gradient = x / width;
break;
case 'right':
gradient = 1 - x / width;
break;
case 'top':
gradient = y / height;
break;
case 'bottom':
gradient = 1 - y / height;
break;
}
// Apply exponential falloff
gradient = Math.pow(gradient, 2);
const value = Math.round(255 * (1 - intensity * (1 - gradient)));
shadowBuffer[y * width + x] = value;
}
}
const shadowImage = await sharp(shadowBuffer, {
raw: { width, height, channels: 1 },
}).png().toBuffer();
return {
pipeline: pipeline.composite([{
input: shadowImage,
blend: 'multiply',
}]),
applied: true,
params: { intensity, position },
};
}
/**
* Apply ink bleed effect
*/
private async applyInkBleed(
pipeline: sharp.Sharp,
config: InkBleedConfig
): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> {
const intensity = this.random.range(config.intensity[0], config.intensity[1]);
// Dilate the image slightly to simulate ink bleeding
// This is approximated with a small blur followed by threshold
const processed = await pipeline
.blur(intensity * 0.5)
.modulate({ brightness: 0.98 })
.toBuffer();
return {
pipeline: sharp(processed),
applied: true,
params: { intensity },
};
}
/**
* Increment statistics counter
*/
private incrementStat(name: string): void {
const current = this.stats.get(name) || 0;
this.stats.set(name, current + 1);
}
/**
* Get augmentation statistics
*/
getStats(): {
totalProcessed: number;
totalAugmented: number;
cleanPercentage: number;
augmentedPercentage: number;
byTransform: AugmentationStats[];
avgTransformsPerSample: number;
} {
const cleanCount = this.totalProcessed - this.totalAugmented;
const cleanPercentage = this.totalProcessed > 0
? (cleanCount / this.totalProcessed) * 100
: 0;
const augmentedPercentage = this.totalProcessed > 0
? (this.totalAugmented / this.totalProcessed) * 100
: 0;
const byTransform: AugmentationStats[] = [];
let totalTransforms = 0;
for (const [name, count] of this.stats) {
totalTransforms += count;
byTransform.push({
name,
count,
percentage: this.totalAugmented > 0
? (count / this.totalAugmented) * 100
: 0,
});
}
// Sort by count descending
byTransform.sort((a, b) => b.count - a.count);
return {
totalProcessed: this.totalProcessed,
totalAugmented: this.totalAugmented,
cleanPercentage: Math.round(cleanPercentage * 10) / 10,
augmentedPercentage: Math.round(augmentedPercentage * 10) / 10,
byTransform,
avgTransformsPerSample: this.totalAugmented > 0
? Math.round((totalTransforms / this.totalAugmented) * 10) / 10
: 0,
};
}
/**
* Reset statistics
*/
resetStats(): void {
this.stats.clear();
this.totalProcessed = 0;
this.totalAugmented = 0;
}
}
/**
* Create augmentation pipeline from config
*/
export function createAugmentationPipeline(
config: AugmentationConfig,
seed?: number
): AugmentationPipeline {
return new AugmentationPipeline(config, seed);
}