Spaces:
Running
Running
| /** | |
| * Augmentation Pipeline - OCR-safe image transformations | |
| * | |
| * All augmentations preserve ground truth (text labels are never altered) | |
| */ | |
| import sharp from 'sharp'; | |
| import type { | |
| AugmentationConfig, | |
| TransformsConfig, | |
| RotationConfig, | |
| SkewConfig, | |
| GaussianBlurConfig, | |
| MotionBlurConfig, | |
| GaussianNoiseConfig, | |
| SaltPepperConfig, | |
| JpegArtifactsConfig, | |
| ResolutionDegradationConfig, | |
| BrightnessConfig, | |
| ContrastConfig, | |
| PerspectiveConfig, | |
| PaperTextureConfig, | |
| ShadowConfig, | |
| InkBleedConfig | |
| } from '../core/config.js'; | |
| export interface AugmentationResult { | |
| buffer: Buffer; | |
| applied: string[]; | |
| stats: Record<string, unknown>; | |
| } | |
| export interface AugmentationStats { | |
| name: string; | |
| count: number; | |
| percentage: number; | |
| } | |
| /** | |
| * Seeded random number generator | |
| */ | |
| class SeededRandom { | |
| private seed: number; | |
| constructor(seed?: number) { | |
| this.seed = seed ?? Date.now(); | |
| } | |
| next(): number { | |
| this.seed = (this.seed * 1103515245 + 12345) % 2147483648; | |
| return this.seed / 2147483648; | |
| } | |
| range(min: number, max: number): number { | |
| return min + this.next() * (max - min); | |
| } | |
| int(min: number, max: number): number { | |
| return Math.floor(this.range(min, max + 1)); | |
| } | |
| bool(probability: number = 0.5): boolean { | |
| return this.next() < probability; | |
| } | |
| pick<T>(array: T[]): T { | |
| return array[this.int(0, array.length - 1)]; | |
| } | |
| } | |
| /** | |
| * Augmentation Pipeline class | |
| */ | |
| export class AugmentationPipeline { | |
| private config: AugmentationConfig; | |
| private random: SeededRandom; | |
| private stats: Map<string, number> = new Map(); | |
| private totalProcessed = 0; | |
| private totalAugmented = 0; | |
| constructor(config: AugmentationConfig, seed?: number) { | |
| this.config = config; | |
| this.random = new SeededRandom(seed); | |
| } | |
| /** | |
| * Apply augmentations to an image buffer | |
| */ | |
| async apply(imageBuffer: Buffer): Promise<AugmentationResult> { | |
| this.totalProcessed++; | |
| if (!this.config.enabled) { | |
| return { buffer: imageBuffer, applied: [], stats: {} }; | |
| } | |
| // Decide if this sample should be augmented | |
| const shouldAugment = this.random.bool(this.config.apply_percentage / 100); | |
| if (!shouldAugment) { | |
| return { buffer: imageBuffer, applied: [], stats: {} }; | |
| } | |
| this.totalAugmented++; | |
| const applied: string[] = []; | |
| const stats: Record<string, unknown> = {}; | |
| let pipeline = sharp(imageBuffer); | |
| const transforms = this.config.transforms || {}; | |
| const maxTransforms = this.config.max_transforms_per_sample || 4; | |
| // Collect eligible transforms | |
| const eligibleTransforms = this.getEligibleTransforms(transforms); | |
| // Randomly select transforms up to max | |
| const selectedTransforms = this.selectTransforms(eligibleTransforms, maxTransforms); | |
| // Apply selected transforms | |
| for (const transformName of selectedTransforms) { | |
| try { | |
| const result = await this.applyTransform(pipeline, transformName, transforms); | |
| if (result.applied) { | |
| pipeline = result.pipeline; | |
| applied.push(transformName); | |
| stats[transformName] = result.params; | |
| this.incrementStat(transformName); | |
| } | |
| } catch (error) { | |
| console.warn(`Transform ${transformName} failed:`, error); | |
| } | |
| } | |
| const buffer = await pipeline.png().toBuffer(); | |
| return { buffer, applied, stats }; | |
| } | |
| /** | |
| * Get list of eligible transforms based on their percentage | |
| */ | |
| private getEligibleTransforms(transforms: TransformsConfig): string[] { | |
| const eligible: string[] = []; | |
| for (const [name, config] of Object.entries(transforms)) { | |
| if (config && (config as { enabled?: boolean }).enabled) { | |
| eligible.push(name); | |
| } | |
| } | |
| return eligible; | |
| } | |
| /** | |
| * Select transforms based on their percentage weights | |
| */ | |
| private selectTransforms(eligible: string[], max: number): string[] { | |
| const selected: string[] = []; | |
| const transforms = this.config.transforms || {}; | |
| for (const name of eligible) { | |
| if (selected.length >= max) break; | |
| const config = transforms[name as keyof TransformsConfig]; | |
| if (!config) continue; | |
| const percentage = (config as { percentage?: number }).percentage || 0; | |
| if (this.random.bool(percentage / 100)) { | |
| selected.push(name); | |
| } | |
| } | |
| return selected; | |
| } | |
| /** | |
| * Apply a single transform | |
| */ | |
| private async applyTransform( | |
| pipeline: sharp.Sharp, | |
| name: string, | |
| transforms: TransformsConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const config = transforms[name as keyof TransformsConfig]; | |
| if (!config) { | |
| return { pipeline, applied: false, params: {} }; | |
| } | |
| switch (name) { | |
| case 'rotation': | |
| return this.applyRotation(pipeline, config as RotationConfig); | |
| case 'gaussian_blur': | |
| return this.applyGaussianBlur(pipeline, config as GaussianBlurConfig); | |
| case 'gaussian_noise': | |
| return this.applyGaussianNoise(pipeline, config as GaussianNoiseConfig); | |
| case 'brightness': | |
| return this.applyBrightness(pipeline, config as BrightnessConfig); | |
| case 'contrast': | |
| return this.applyContrast(pipeline, config as ContrastConfig); | |
| case 'jpeg_artifacts': | |
| return this.applyJpegArtifacts(pipeline, config as JpegArtifactsConfig); | |
| case 'resolution_degradation': | |
| return this.applyResolutionDegradation(pipeline, config as ResolutionDegradationConfig); | |
| case 'salt_pepper': | |
| return this.applySaltPepper(pipeline, config as SaltPepperConfig); | |
| case 'motion_blur': | |
| return this.applyMotionBlur(pipeline, config as MotionBlurConfig); | |
| case 'skew': | |
| return this.applySkew(pipeline, config as SkewConfig); | |
| case 'perspective': | |
| return this.applyPerspective(pipeline, config as PerspectiveConfig); | |
| case 'paper_texture': | |
| return this.applyPaperTexture(pipeline, config as PaperTextureConfig); | |
| case 'shadow': | |
| return this.applyShadow(pipeline, config as ShadowConfig); | |
| case 'ink_bleed': | |
| return this.applyInkBleed(pipeline, config as InkBleedConfig); | |
| default: | |
| return { pipeline, applied: false, params: {} }; | |
| } | |
| } | |
| /** | |
| * Apply rotation | |
| */ | |
| private async applyRotation( | |
| pipeline: sharp.Sharp, | |
| config: RotationConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const angle = this.random.range(config.range[0], config.range[1]); | |
| return { | |
| pipeline: pipeline.rotate(angle, { background: '#FFFFFF' }), | |
| applied: true, | |
| params: { angle }, | |
| }; | |
| } | |
| /** | |
| * Apply Gaussian blur | |
| */ | |
| private async applyGaussianBlur( | |
| pipeline: sharp.Sharp, | |
| config: GaussianBlurConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const sigma = this.random.range(config.sigma[0], config.sigma[1]); | |
| return { | |
| pipeline: pipeline.blur(sigma), | |
| applied: true, | |
| params: { sigma }, | |
| }; | |
| } | |
| /** | |
| * Apply Gaussian noise | |
| */ | |
| private async applyGaussianNoise( | |
| pipeline: sharp.Sharp, | |
| config: GaussianNoiseConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const std = this.random.range(config.std[0], config.std[1]); | |
| // Get image metadata | |
| const metadata = await pipeline.metadata(); | |
| const width = metadata.width || 256; | |
| const height = metadata.height || 64; | |
| // Generate noise overlay | |
| const noiseBuffer = Buffer.alloc(width * height); | |
| for (let i = 0; i < noiseBuffer.length; i++) { | |
| // Box-Muller transform for Gaussian noise | |
| const u1 = this.random.next(); | |
| const u2 = this.random.next(); | |
| const z = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2); | |
| const noise = Math.round(128 + z * std); | |
| noiseBuffer[i] = Math.max(0, Math.min(255, noise)); | |
| } | |
| const noiseImage = sharp(noiseBuffer, { | |
| raw: { width, height, channels: 1 }, | |
| }).png(); | |
| // Composite noise as overlay | |
| const noiseData = await noiseImage.toBuffer(); | |
| return { | |
| pipeline: pipeline.composite([{ | |
| input: noiseData, | |
| blend: 'overlay', | |
| }]), | |
| applied: true, | |
| params: { std }, | |
| }; | |
| } | |
| /** | |
| * Apply brightness adjustment | |
| */ | |
| private async applyBrightness( | |
| pipeline: sharp.Sharp, | |
| config: BrightnessConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const delta = this.random.range(config.range[0], config.range[1]); | |
| // Sharp uses 1.0 as base, convert delta to multiplier | |
| const multiplier = 1 + delta / 100; | |
| return { | |
| pipeline: pipeline.modulate({ brightness: multiplier }), | |
| applied: true, | |
| params: { delta, multiplier }, | |
| }; | |
| } | |
| /** | |
| * Apply contrast adjustment | |
| */ | |
| private async applyContrast( | |
| pipeline: sharp.Sharp, | |
| config: ContrastConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const factor = this.random.range(config.range[0], config.range[1]); | |
| return { | |
| pipeline: pipeline.linear(factor, -(128 * factor) + 128), | |
| applied: true, | |
| params: { factor }, | |
| }; | |
| } | |
| /** | |
| * Apply JPEG compression artifacts | |
| */ | |
| private async applyJpegArtifacts( | |
| pipeline: sharp.Sharp, | |
| config: JpegArtifactsConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const quality = this.random.int(config.quality[0], config.quality[1]); | |
| // Compress to JPEG and back to PNG | |
| const jpegBuffer = await pipeline.jpeg({ quality }).toBuffer(); | |
| return { | |
| pipeline: sharp(jpegBuffer), | |
| applied: true, | |
| params: { quality }, | |
| }; | |
| } | |
| /** | |
| * Apply resolution degradation (downscale then upscale) | |
| */ | |
| private async applyResolutionDegradation( | |
| pipeline: sharp.Sharp, | |
| config: ResolutionDegradationConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const scaleFactor = this.random.range(config.scale_factor[0], config.scale_factor[1]); | |
| const metadata = await pipeline.metadata(); | |
| const width = metadata.width || 256; | |
| const height = metadata.height || 64; | |
| const smallWidth = Math.round(width * scaleFactor); | |
| const smallHeight = Math.round(height * scaleFactor); | |
| // Downscale then upscale | |
| const degraded = await pipeline | |
| .resize(smallWidth, smallHeight, { kernel: 'nearest' }) | |
| .resize(width, height, { kernel: 'nearest' }) | |
| .toBuffer(); | |
| return { | |
| pipeline: sharp(degraded), | |
| applied: true, | |
| params: { scaleFactor, smallWidth, smallHeight }, | |
| }; | |
| } | |
| /** | |
| * Apply salt and pepper noise | |
| */ | |
| private async applySaltPepper( | |
| pipeline: sharp.Sharp, | |
| config: SaltPepperConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const amount = this.random.range(config.amount[0], config.amount[1]); | |
| const metadata = await pipeline.metadata(); | |
| const width = metadata.width || 256; | |
| const height = metadata.height || 64; | |
| // Get raw pixel data | |
| const { data, info } = await pipeline.raw().toBuffer({ resolveWithObject: true }); | |
| const pixels = Buffer.from(data); | |
| const channels = info.channels; | |
| // Apply salt and pepper | |
| const totalPixels = width * height; | |
| const noisePixels = Math.floor(totalPixels * amount); | |
| for (let i = 0; i < noisePixels; i++) { | |
| const pixelIndex = this.random.int(0, totalPixels - 1) * channels; | |
| const value = this.random.bool(0.5) ? 255 : 0; | |
| for (let c = 0; c < Math.min(channels, 3); c++) { | |
| pixels[pixelIndex + c] = value; | |
| } | |
| } | |
| return { | |
| pipeline: sharp(pixels, { raw: { width, height, channels } }), | |
| applied: true, | |
| params: { amount, noisePixels }, | |
| }; | |
| } | |
| /** | |
| * Apply motion blur | |
| */ | |
| private async applyMotionBlur( | |
| pipeline: sharp.Sharp, | |
| config: MotionBlurConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const kernelSize = this.random.int(config.kernel_size[0], config.kernel_size[1]); | |
| const angle = this.random.range(config.angle[0], config.angle[1]); | |
| // Create motion blur kernel | |
| const kernel = this.createMotionBlurKernel(kernelSize, angle); | |
| return { | |
| pipeline: pipeline.convolve({ | |
| width: kernelSize, | |
| height: kernelSize, | |
| kernel, | |
| }), | |
| applied: true, | |
| params: { kernelSize, angle }, | |
| }; | |
| } | |
| /** | |
| * Create motion blur convolution kernel | |
| */ | |
| private createMotionBlurKernel(size: number, angle: number): number[] { | |
| const kernel: number[] = new Array(size * size).fill(0); | |
| const center = Math.floor(size / 2); | |
| const radians = (angle * Math.PI) / 180; | |
| for (let i = 0; i < size; i++) { | |
| const dx = (i - center) * Math.cos(radians); | |
| const dy = (i - center) * Math.sin(radians); | |
| const x = Math.round(center + dx); | |
| const y = Math.round(center + dy); | |
| if (x >= 0 && x < size && y >= 0 && y < size) { | |
| kernel[y * size + x] = 1 / size; | |
| } | |
| } | |
| return kernel; | |
| } | |
| /** | |
| * Apply skew transformation | |
| */ | |
| private async applySkew( | |
| pipeline: sharp.Sharp, | |
| config: SkewConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const xSkew = this.random.range(config.x_range[0], config.x_range[1]); | |
| const ySkew = this.random.range(config.y_range[0], config.y_range[1]); | |
| // Apply affine transformation for skew | |
| return { | |
| pipeline: pipeline.affine([[1, xSkew], [ySkew, 1]], { background: '#FFFFFF' }), | |
| applied: true, | |
| params: { xSkew, ySkew }, | |
| }; | |
| } | |
| /** | |
| * Apply perspective transformation | |
| */ | |
| private async applyPerspective( | |
| pipeline: sharp.Sharp, | |
| config: PerspectiveConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const intensity = this.random.range(config.intensity[0], config.intensity[1]); | |
| // Simulate perspective with skew + scale | |
| const skew = intensity * (this.random.bool(0.5) ? 1 : -1); | |
| return { | |
| pipeline: pipeline.affine([[1, skew * 0.5], [0, 1]], { background: '#FFFFFF' }), | |
| applied: true, | |
| params: { intensity, skew }, | |
| }; | |
| } | |
| /** | |
| * Apply paper texture overlay | |
| */ | |
| private async applyPaperTexture( | |
| pipeline: sharp.Sharp, | |
| config: PaperTextureConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const opacity = this.random.range(config.opacity[0], config.opacity[1]); | |
| const metadata = await pipeline.metadata(); | |
| const width = metadata.width || 256; | |
| const height = metadata.height || 64; | |
| // Generate paper-like texture (simple noise pattern) | |
| const textureBuffer = Buffer.alloc(width * height); | |
| for (let i = 0; i < textureBuffer.length; i++) { | |
| const baseValue = 240; // Light gray base | |
| const noise = this.random.range(-15, 15); | |
| textureBuffer[i] = Math.max(200, Math.min(255, baseValue + noise)); | |
| } | |
| const textureImage = await sharp(textureBuffer, { | |
| raw: { width, height, channels: 1 }, | |
| }).png().toBuffer(); | |
| return { | |
| pipeline: pipeline.composite([{ | |
| input: textureImage, | |
| blend: 'multiply', | |
| }]), | |
| applied: true, | |
| params: { opacity }, | |
| }; | |
| } | |
| /** | |
| * Apply shadow effect | |
| */ | |
| private async applyShadow( | |
| pipeline: sharp.Sharp, | |
| config: ShadowConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const intensity = this.random.range(config.intensity[0], config.intensity[1]); | |
| const positions = config.position || ['left', 'right', 'top', 'bottom']; | |
| const position = this.random.pick(positions); | |
| const metadata = await pipeline.metadata(); | |
| const width = metadata.width || 256; | |
| const height = metadata.height || 64; | |
| // Create gradient shadow | |
| const shadowBuffer = Buffer.alloc(width * height); | |
| for (let y = 0; y < height; y++) { | |
| for (let x = 0; x < width; x++) { | |
| let gradient = 1; | |
| switch (position) { | |
| case 'left': | |
| gradient = x / width; | |
| break; | |
| case 'right': | |
| gradient = 1 - x / width; | |
| break; | |
| case 'top': | |
| gradient = y / height; | |
| break; | |
| case 'bottom': | |
| gradient = 1 - y / height; | |
| break; | |
| } | |
| // Apply exponential falloff | |
| gradient = Math.pow(gradient, 2); | |
| const value = Math.round(255 * (1 - intensity * (1 - gradient))); | |
| shadowBuffer[y * width + x] = value; | |
| } | |
| } | |
| const shadowImage = await sharp(shadowBuffer, { | |
| raw: { width, height, channels: 1 }, | |
| }).png().toBuffer(); | |
| return { | |
| pipeline: pipeline.composite([{ | |
| input: shadowImage, | |
| blend: 'multiply', | |
| }]), | |
| applied: true, | |
| params: { intensity, position }, | |
| }; | |
| } | |
| /** | |
| * Apply ink bleed effect | |
| */ | |
| private async applyInkBleed( | |
| pipeline: sharp.Sharp, | |
| config: InkBleedConfig | |
| ): Promise<{ pipeline: sharp.Sharp; applied: boolean; params: Record<string, unknown> }> { | |
| const intensity = this.random.range(config.intensity[0], config.intensity[1]); | |
| // Dilate the image slightly to simulate ink bleeding | |
| // This is approximated with a small blur followed by threshold | |
| const processed = await pipeline | |
| .blur(intensity * 0.5) | |
| .modulate({ brightness: 0.98 }) | |
| .toBuffer(); | |
| return { | |
| pipeline: sharp(processed), | |
| applied: true, | |
| params: { intensity }, | |
| }; | |
| } | |
| /** | |
| * Increment statistics counter | |
| */ | |
| private incrementStat(name: string): void { | |
| const current = this.stats.get(name) || 0; | |
| this.stats.set(name, current + 1); | |
| } | |
| /** | |
| * Get augmentation statistics | |
| */ | |
| getStats(): { | |
| totalProcessed: number; | |
| totalAugmented: number; | |
| cleanPercentage: number; | |
| augmentedPercentage: number; | |
| byTransform: AugmentationStats[]; | |
| avgTransformsPerSample: number; | |
| } { | |
| const cleanCount = this.totalProcessed - this.totalAugmented; | |
| const cleanPercentage = this.totalProcessed > 0 | |
| ? (cleanCount / this.totalProcessed) * 100 | |
| : 0; | |
| const augmentedPercentage = this.totalProcessed > 0 | |
| ? (this.totalAugmented / this.totalProcessed) * 100 | |
| : 0; | |
| const byTransform: AugmentationStats[] = []; | |
| let totalTransforms = 0; | |
| for (const [name, count] of this.stats) { | |
| totalTransforms += count; | |
| byTransform.push({ | |
| name, | |
| count, | |
| percentage: this.totalAugmented > 0 | |
| ? (count / this.totalAugmented) * 100 | |
| : 0, | |
| }); | |
| } | |
| // Sort by count descending | |
| byTransform.sort((a, b) => b.count - a.count); | |
| return { | |
| totalProcessed: this.totalProcessed, | |
| totalAugmented: this.totalAugmented, | |
| cleanPercentage: Math.round(cleanPercentage * 10) / 10, | |
| augmentedPercentage: Math.round(augmentedPercentage * 10) / 10, | |
| byTransform, | |
| avgTransformsPerSample: this.totalAugmented > 0 | |
| ? Math.round((totalTransforms / this.totalAugmented) * 10) / 10 | |
| : 0, | |
| }; | |
| } | |
| /** | |
| * Reset statistics | |
| */ | |
| resetStats(): void { | |
| this.stats.clear(); | |
| this.totalProcessed = 0; | |
| this.totalAugmented = 0; | |
| } | |
| } | |
| /** | |
| * Create augmentation pipeline from config | |
| */ | |
| export function createAugmentationPipeline( | |
| config: AugmentationConfig, | |
| seed?: number | |
| ): AugmentationPipeline { | |
| return new AugmentationPipeline(config, seed); | |
| } | |