""" SmartCertify ML — Image Preprocessing Image preprocessing utilities for the CNN tampering detector. """ import numpy as np import logging import io import base64 from pathlib import Path from typing import Optional, Tuple from PIL import Image, ImageFilter, ImageDraw, ImageFont import sys sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent.parent)) logger = logging.getLogger(__name__) # ImageNet normalization stats IMAGENET_MEAN = [0.485, 0.456, 0.406] IMAGENET_STD = [0.229, 0.224, 0.225] TARGET_SIZE = (224, 224) def load_image_from_base64(image_base64: str) -> Optional[Image.Image]: """Decode a base64 string to a PIL Image.""" try: image_data = base64.b64decode(image_base64) image = Image.open(io.BytesIO(image_data)) return image.convert("RGB") except Exception as e: logger.error(f"Failed to decode base64 image: {e}") return None def image_to_base64(image: Image.Image, format: str = "PNG") -> str: """Convert a PIL Image to base64 string.""" buffer = io.BytesIO() image.save(buffer, format=format) return base64.b64encode(buffer.getvalue()).decode("utf-8") def preprocess_image(image: Image.Image) -> np.ndarray: """ Preprocess an image for the CNN model. Resize → normalize with ImageNet stats → convert to tensor format. """ # Resize image = image.resize(TARGET_SIZE, Image.Resampling.LANCZOS) # Convert to numpy img_array = np.array(image, dtype=np.float32) / 255.0 # Normalize with ImageNet stats for i in range(3): img_array[:, :, i] = (img_array[:, :, i] - IMAGENET_MEAN[i]) / IMAGENET_STD[i] # Convert to CHW format (channels first) for PyTorch img_array = np.transpose(img_array, (2, 0, 1)) return img_array def generate_synthetic_tampered_images(n_samples: int = 100, seed: int = 42) -> list: """ Generate synthetic tampered certificate images for training. Creates pairs: (authentic, tampered) with different corruption types. """ np.random.seed(seed) samples = [] for i in range(n_samples): # Create a synthetic "certificate" image img = Image.new("RGB", (400, 300), color=(255, 255, 245)) draw = ImageDraw.Draw(img) # Add certificate-like content draw.rectangle([20, 20, 380, 280], outline=(0, 0, 0), width=2) draw.text((50, 40), "CERTIFICATE OF COMPLETION", fill=(0, 0, 0)) draw.text((50, 80), f"Recipient: Student {i}", fill=(50, 50, 50)) draw.text((50, 110), f"Course: Course {i % 20}", fill=(50, 50, 50)) draw.text((50, 140), f"Date: 2024-{(i % 12) + 1:02d}-15", fill=(50, 50, 50)) draw.line([(50, 220), (200, 220)], fill=(0, 0, 100), width=2) draw.text((50, 230), "Authorized Signature", fill=(100, 100, 100)) # Authentic version authentic = img.copy() # Create tampered version tampered = img.copy() tampering_type = np.random.choice([ "pixel_alter", "text_overlay", "compression", "blur", "crop_paste" ]) tampered_draw = ImageDraw.Draw(tampered) if tampering_type == "pixel_alter": # Random pixel region alteration x1 = np.random.randint(50, 250) y1 = np.random.randint(50, 200) x2 = x1 + np.random.randint(30, 100) y2 = y1 + np.random.randint(20, 60) color = tuple(np.random.randint(200, 255, 3)) tampered_draw.rectangle([x1, y1, x2, y2], fill=color) elif tampering_type == "text_overlay": x = np.random.randint(50, 200) y = np.random.randint(50, 200) tampered_draw.text((x, y), "MODIFIED", fill=(255, 0, 0)) elif tampering_type == "compression": buffer = io.BytesIO() tampered.save(buffer, "JPEG", quality=5) buffer.seek(0) tampered = Image.open(buffer).convert("RGB") elif tampering_type == "blur": tampered = tampered.filter(ImageFilter.GaussianBlur(radius=3)) elif tampering_type == "crop_paste": region = tampered.crop((100, 100, 200, 150)) tampered.paste(region, (150, 150)) samples.append({ "authentic": preprocess_image(authentic), "tampered": preprocess_image(tampered), "tampering_type": tampering_type, }) return samples