/** * Augmentation Pipeline - OCR-safe image transformations * * All augmentations preserve ground truth (text labels are never altered) */ import sharp from 'sharp'; import type { AugmentationConfig, TransformsConfig, RotationConfig, SkewConfig, GaussianBlurConfig, MotionBlurConfig, GaussianNoiseConfig, SaltPepperConfig, JpegArtifactsConfig, ResolutionDegradationConfig, BrightnessConfig, ContrastConfig, PerspectiveConfig, PaperTextureConfig, ShadowConfig, InkBleedConfig } from '../core/config.js'; export interface AugmentationResult { buffer: Buffer; applied: string[]; stats: Record; } export interface AugmentationStats { name: string; count: number; percentage: number; } /** * Seeded random number generator */ class SeededRandom { private seed: number; constructor(seed?: number) { this.seed = seed ?? Date.now(); } next(): number { this.seed = (this.seed * 1103515245 + 12345) % 2147483648; return this.seed / 2147483648; } range(min: number, max: number): number { return min + this.next() * (max - min); } int(min: number, max: number): number { return Math.floor(this.range(min, max + 1)); } bool(probability: number = 0.5): boolean { return this.next() < probability; } pick(array: T[]): T { return array[this.int(0, array.length - 1)]; } } /** * Augmentation Pipeline class */ export class AugmentationPipeline { private config: AugmentationConfig; private random: SeededRandom; private stats: Map = new Map(); private totalProcessed = 0; private totalAugmented = 0; constructor(config: AugmentationConfig, seed?: number) { this.config = config; this.random = new SeededRandom(seed); } /** * Apply augmentations to an image buffer */ async apply(imageBuffer: Buffer): Promise { this.totalProcessed++; if (!this.config.enabled) { return { buffer: imageBuffer, applied: [], stats: {} }; } // Decide if this sample should be augmented const shouldAugment = this.random.bool(this.config.apply_percentage / 100); if (!shouldAugment) { return { buffer: imageBuffer, applied: [], stats: {} }; } this.totalAugmented++; const applied: string[] = []; const stats: Record = {}; let pipeline = sharp(imageBuffer); const transforms = this.config.transforms || {}; const maxTransforms = this.config.max_transforms_per_sample || 4; // Collect eligible transforms const eligibleTransforms = this.getEligibleTransforms(transforms); // Randomly select transforms up to max const selectedTransforms = this.selectTransforms(eligibleTransforms, maxTransforms); // Apply selected transforms for (const transformName of selectedTransforms) { try { const result = await this.applyTransform(pipeline, transformName, transforms); if (result.applied) { pipeline = result.pipeline; applied.push(transformName); stats[transformName] = result.params; this.incrementStat(transformName); } } catch (error) { console.warn(`Transform ${transformName} failed:`, error); } } const buffer = await pipeline.png().toBuffer(); return { buffer, applied, stats }; } /** * Get list of eligible transforms based on their percentage */ private getEligibleTransforms(transforms: TransformsConfig): string[] { const eligible: string[] = []; for (const [name, config] of Object.entries(transforms)) { if (config && (config as { enabled?: boolean }).enabled) { eligible.push(name); } } return eligible; } /** * Select transforms based on their percentage weights */ private selectTransforms(eligible: string[], max: number): string[] { const selected: string[] = []; const transforms = this.config.transforms || {}; for (const name of eligible) { if (selected.length >= max) break; const config = transforms[name as keyof TransformsConfig]; if (!config) continue; const percentage = (config as { percentage?: number }).percentage || 0; if (this.random.bool(percentage / 100)) { selected.push(name); } } return selected; } /** * Apply a single transform */ private async applyTransform( pipeline: sharp.Sharp, name: string, transforms: TransformsConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const config = transforms[name as keyof TransformsConfig]; if (!config) { return { pipeline, applied: false, params: {} }; } switch (name) { case 'rotation': return this.applyRotation(pipeline, config as RotationConfig); case 'gaussian_blur': return this.applyGaussianBlur(pipeline, config as GaussianBlurConfig); case 'gaussian_noise': return this.applyGaussianNoise(pipeline, config as GaussianNoiseConfig); case 'brightness': return this.applyBrightness(pipeline, config as BrightnessConfig); case 'contrast': return this.applyContrast(pipeline, config as ContrastConfig); case 'jpeg_artifacts': return this.applyJpegArtifacts(pipeline, config as JpegArtifactsConfig); case 'resolution_degradation': return this.applyResolutionDegradation(pipeline, config as ResolutionDegradationConfig); case 'salt_pepper': return this.applySaltPepper(pipeline, config as SaltPepperConfig); case 'motion_blur': return this.applyMotionBlur(pipeline, config as MotionBlurConfig); case 'skew': return this.applySkew(pipeline, config as SkewConfig); case 'perspective': return this.applyPerspective(pipeline, config as PerspectiveConfig); case 'paper_texture': return this.applyPaperTexture(pipeline, config as PaperTextureConfig); case 'shadow': return this.applyShadow(pipeline, config as ShadowConfig); case 'ink_bleed': return this.applyInkBleed(pipeline, config as InkBleedConfig); default: return { pipeline, applied: false, params: {} }; } } /** * Apply rotation */ private async applyRotation( pipeline: sharp.Sharp, config: RotationConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const angle = this.random.range(config.range[0], config.range[1]); return { pipeline: pipeline.rotate(angle, { background: '#FFFFFF' }), applied: true, params: { angle }, }; } /** * Apply Gaussian blur */ private async applyGaussianBlur( pipeline: sharp.Sharp, config: GaussianBlurConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const sigma = this.random.range(config.sigma[0], config.sigma[1]); return { pipeline: pipeline.blur(sigma), applied: true, params: { sigma }, }; } /** * Apply Gaussian noise */ private async applyGaussianNoise( pipeline: sharp.Sharp, config: GaussianNoiseConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const std = this.random.range(config.std[0], config.std[1]); // Get image metadata const metadata = await pipeline.metadata(); const width = metadata.width || 256; const height = metadata.height || 64; // Generate noise overlay const noiseBuffer = Buffer.alloc(width * height); for (let i = 0; i < noiseBuffer.length; i++) { // Box-Muller transform for Gaussian noise const u1 = this.random.next(); const u2 = this.random.next(); const z = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2); const noise = Math.round(128 + z * std); noiseBuffer[i] = Math.max(0, Math.min(255, noise)); } const noiseImage = sharp(noiseBuffer, { raw: { width, height, channels: 1 }, }).png(); // Composite noise as overlay const noiseData = await noiseImage.toBuffer(); return { pipeline: pipeline.composite([{ input: noiseData, blend: 'overlay', }]), applied: true, params: { std }, }; } /** * Apply brightness adjustment */ private async applyBrightness( pipeline: sharp.Sharp, config: BrightnessConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const delta = this.random.range(config.range[0], config.range[1]); // Sharp uses 1.0 as base, convert delta to multiplier const multiplier = 1 + delta / 100; return { pipeline: pipeline.modulate({ brightness: multiplier }), applied: true, params: { delta, multiplier }, }; } /** * Apply contrast adjustment */ private async applyContrast( pipeline: sharp.Sharp, config: ContrastConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const factor = this.random.range(config.range[0], config.range[1]); return { pipeline: pipeline.linear(factor, -(128 * factor) + 128), applied: true, params: { factor }, }; } /** * Apply JPEG compression artifacts */ private async applyJpegArtifacts( pipeline: sharp.Sharp, config: JpegArtifactsConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const quality = this.random.int(config.quality[0], config.quality[1]); // Compress to JPEG and back to PNG const jpegBuffer = await pipeline.jpeg({ quality }).toBuffer(); return { pipeline: sharp(jpegBuffer), applied: true, params: { quality }, }; } /** * Apply resolution degradation (downscale then upscale) */ private async applyResolutionDegradation( pipeline: sharp.Sharp, config: ResolutionDegradationConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const scaleFactor = this.random.range(config.scale_factor[0], config.scale_factor[1]); const metadata = await pipeline.metadata(); const width = metadata.width || 256; const height = metadata.height || 64; const smallWidth = Math.round(width * scaleFactor); const smallHeight = Math.round(height * scaleFactor); // Downscale then upscale const degraded = await pipeline .resize(smallWidth, smallHeight, { kernel: 'nearest' }) .resize(width, height, { kernel: 'nearest' }) .toBuffer(); return { pipeline: sharp(degraded), applied: true, params: { scaleFactor, smallWidth, smallHeight }, }; } /** * Apply salt and pepper noise */ private async applySaltPepper( pipeline: sharp.Sharp, config: SaltPepperConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const amount = this.random.range(config.amount[0], config.amount[1]); const metadata = await pipeline.metadata(); const width = metadata.width || 256; const height = metadata.height || 64; // Get raw pixel data const { data, info } = await pipeline.raw().toBuffer({ resolveWithObject: true }); const pixels = Buffer.from(data); const channels = info.channels; // Apply salt and pepper const totalPixels = width * height; const noisePixels = Math.floor(totalPixels * amount); for (let i = 0; i < noisePixels; i++) { const pixelIndex = this.random.int(0, totalPixels - 1) * channels; const value = this.random.bool(0.5) ? 255 : 0; for (let c = 0; c < Math.min(channels, 3); c++) { pixels[pixelIndex + c] = value; } } return { pipeline: sharp(pixels, { raw: { width, height, channels } }), applied: true, params: { amount, noisePixels }, }; } /** * Apply motion blur */ private async applyMotionBlur( pipeline: sharp.Sharp, config: MotionBlurConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const kernelSize = this.random.int(config.kernel_size[0], config.kernel_size[1]); const angle = this.random.range(config.angle[0], config.angle[1]); // Create motion blur kernel const kernel = this.createMotionBlurKernel(kernelSize, angle); return { pipeline: pipeline.convolve({ width: kernelSize, height: kernelSize, kernel, }), applied: true, params: { kernelSize, angle }, }; } /** * Create motion blur convolution kernel */ private createMotionBlurKernel(size: number, angle: number): number[] { const kernel: number[] = new Array(size * size).fill(0); const center = Math.floor(size / 2); const radians = (angle * Math.PI) / 180; for (let i = 0; i < size; i++) { const dx = (i - center) * Math.cos(radians); const dy = (i - center) * Math.sin(radians); const x = Math.round(center + dx); const y = Math.round(center + dy); if (x >= 0 && x < size && y >= 0 && y < size) { kernel[y * size + x] = 1 / size; } } return kernel; } /** * Apply skew transformation */ private async applySkew( pipeline: sharp.Sharp, config: SkewConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const xSkew = this.random.range(config.x_range[0], config.x_range[1]); const ySkew = this.random.range(config.y_range[0], config.y_range[1]); // Apply affine transformation for skew return { pipeline: pipeline.affine([[1, xSkew], [ySkew, 1]], { background: '#FFFFFF' }), applied: true, params: { xSkew, ySkew }, }; } /** * Apply perspective transformation */ private async applyPerspective( pipeline: sharp.Sharp, config: PerspectiveConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const intensity = this.random.range(config.intensity[0], config.intensity[1]); // Simulate perspective with skew + scale const skew = intensity * (this.random.bool(0.5) ? 1 : -1); return { pipeline: pipeline.affine([[1, skew * 0.5], [0, 1]], { background: '#FFFFFF' }), applied: true, params: { intensity, skew }, }; } /** * Apply paper texture overlay */ private async applyPaperTexture( pipeline: sharp.Sharp, config: PaperTextureConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const opacity = this.random.range(config.opacity[0], config.opacity[1]); const metadata = await pipeline.metadata(); const width = metadata.width || 256; const height = metadata.height || 64; // Generate paper-like texture (simple noise pattern) const textureBuffer = Buffer.alloc(width * height); for (let i = 0; i < textureBuffer.length; i++) { const baseValue = 240; // Light gray base const noise = this.random.range(-15, 15); textureBuffer[i] = Math.max(200, Math.min(255, baseValue + noise)); } const textureImage = await sharp(textureBuffer, { raw: { width, height, channels: 1 }, }).png().toBuffer(); return { pipeline: pipeline.composite([{ input: textureImage, blend: 'multiply', }]), applied: true, params: { opacity }, }; } /** * Apply shadow effect */ private async applyShadow( pipeline: sharp.Sharp, config: ShadowConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const intensity = this.random.range(config.intensity[0], config.intensity[1]); const positions = config.position || ['left', 'right', 'top', 'bottom']; const position = this.random.pick(positions); const metadata = await pipeline.metadata(); const width = metadata.width || 256; const height = metadata.height || 64; // Create gradient shadow const shadowBuffer = Buffer.alloc(width * height); for (let y = 0; y < height; y++) { for (let x = 0; x < width; x++) { let gradient = 1; switch (position) { case 'left': gradient = x / width; break; case 'right': gradient = 1 - x / width; break; case 'top': gradient = y / height; break; case 'bottom': gradient = 1 - y / height; break; } // Apply exponential falloff gradient = Math.pow(gradient, 2); const value = Math.round(255 * (1 - intensity * (1 - gradient))); shadowBuffer[y * width + x] = value; } } const shadowImage = await sharp(shadowBuffer, { raw: { width, height, channels: 1 }, }).png().toBuffer(); return { pipeline: pipeline.composite([{ input: shadowImage, blend: 'multiply', }]), applied: true, params: { intensity, position }, }; } /** * Apply ink bleed effect */ private async applyInkBleed( pipeline: sharp.Sharp, config: InkBleedConfig ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record }> { const intensity = this.random.range(config.intensity[0], config.intensity[1]); // Dilate the image slightly to simulate ink bleeding // This is approximated with a small blur followed by threshold const processed = await pipeline .blur(intensity * 0.5) .modulate({ brightness: 0.98 }) .toBuffer(); return { pipeline: sharp(processed), applied: true, params: { intensity }, }; } /** * Increment statistics counter */ private incrementStat(name: string): void { const current = this.stats.get(name) || 0; this.stats.set(name, current + 1); } /** * Get augmentation statistics */ getStats(): { totalProcessed: number; totalAugmented: number; cleanPercentage: number; augmentedPercentage: number; byTransform: AugmentationStats[]; avgTransformsPerSample: number; } { const cleanCount = this.totalProcessed - this.totalAugmented; const cleanPercentage = this.totalProcessed > 0 ? (cleanCount / this.totalProcessed) * 100 : 0; const augmentedPercentage = this.totalProcessed > 0 ? (this.totalAugmented / this.totalProcessed) * 100 : 0; const byTransform: AugmentationStats[] = []; let totalTransforms = 0; for (const [name, count] of this.stats) { totalTransforms += count; byTransform.push({ name, count, percentage: this.totalAugmented > 0 ? (count / this.totalAugmented) * 100 : 0, }); } // Sort by count descending byTransform.sort((a, b) => b.count - a.count); return { totalProcessed: this.totalProcessed, totalAugmented: this.totalAugmented, cleanPercentage: Math.round(cleanPercentage * 10) / 10, augmentedPercentage: Math.round(augmentedPercentage * 10) / 10, byTransform, avgTransformsPerSample: this.totalAugmented > 0 ? Math.round((totalTransforms / this.totalAugmented) * 10) / 10 : 0, }; } /** * Reset statistics */ resetStats(): void { this.stats.clear(); this.totalProcessed = 0; this.totalAugmented = 0; } } /** * Create augmentation pipeline from config */ export function createAugmentationPipeline( config: AugmentationConfig, seed?: number ): AugmentationPipeline { return new AugmentationPipeline(config, seed); }