Spaces:

iharshyadav
/

SmartCertify-ML

Sleeping

File size: 4,434 Bytes

6de2f28

"""
SmartCertify ML — Image Preprocessing
Image preprocessing utilities for the CNN tampering detector.
"""

import numpy as np
import logging
import io
import base64
from pathlib import Path
from typing import Optional, Tuple

from PIL import Image, ImageFilter, ImageDraw, ImageFont

import sys
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent.parent))

logger = logging.getLogger(__name__)

# ImageNet normalization stats
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD = [0.229, 0.224, 0.225]
TARGET_SIZE = (224, 224)


def load_image_from_base64(image_base64: str) -> Optional[Image.Image]:
    """Decode a base64 string to a PIL Image."""
    try:
        image_data = base64.b64decode(image_base64)
        image = Image.open(io.BytesIO(image_data))
        return image.convert("RGB")
    except Exception as e:
        logger.error(f"Failed to decode base64 image: {e}")
        return None


def image_to_base64(image: Image.Image, format: str = "PNG") -> str:
    """Convert a PIL Image to base64 string."""
    buffer = io.BytesIO()
    image.save(buffer, format=format)
    return base64.b64encode(buffer.getvalue()).decode("utf-8")


def preprocess_image(image: Image.Image) -> np.ndarray:
    """
    Preprocess an image for the CNN model.
    Resize → normalize with ImageNet stats → convert to tensor format.
    """
    # Resize
    image = image.resize(TARGET_SIZE, Image.Resampling.LANCZOS)

    # Convert to numpy
    img_array = np.array(image, dtype=np.float32) / 255.0

    # Normalize with ImageNet stats
    for i in range(3):
        img_array[:, :, i] = (img_array[:, :, i] - IMAGENET_MEAN[i]) / IMAGENET_STD[i]

    # Convert to CHW format (channels first) for PyTorch
    img_array = np.transpose(img_array, (2, 0, 1))

    return img_array


def generate_synthetic_tampered_images(n_samples: int = 100, seed: int = 42) -> list:
    """
    Generate synthetic tampered certificate images for training.
    Creates pairs: (authentic, tampered) with different corruption types.
    """
    np.random.seed(seed)
    samples = []

    for i in range(n_samples):
        # Create a synthetic "certificate" image
        img = Image.new("RGB", (400, 300), color=(255, 255, 245))
        draw = ImageDraw.Draw(img)

        # Add certificate-like content
        draw.rectangle([20, 20, 380, 280], outline=(0, 0, 0), width=2)
        draw.text((50, 40), "CERTIFICATE OF COMPLETION", fill=(0, 0, 0))
        draw.text((50, 80), f"Recipient: Student {i}", fill=(50, 50, 50))
        draw.text((50, 110), f"Course: Course {i % 20}", fill=(50, 50, 50))
        draw.text((50, 140), f"Date: 2024-{(i % 12) + 1:02d}-15", fill=(50, 50, 50))
        draw.line([(50, 220), (200, 220)], fill=(0, 0, 100), width=2)
        draw.text((50, 230), "Authorized Signature", fill=(100, 100, 100))

        # Authentic version
        authentic = img.copy()

        # Create tampered version
        tampered = img.copy()
        tampering_type = np.random.choice([
            "pixel_alter", "text_overlay", "compression", "blur", "crop_paste"
        ])

        tampered_draw = ImageDraw.Draw(tampered)

        if tampering_type == "pixel_alter":
            # Random pixel region alteration
            x1 = np.random.randint(50, 250)
            y1 = np.random.randint(50, 200)
            x2 = x1 + np.random.randint(30, 100)
            y2 = y1 + np.random.randint(20, 60)
            color = tuple(np.random.randint(200, 255, 3))
            tampered_draw.rectangle([x1, y1, x2, y2], fill=color)

        elif tampering_type == "text_overlay":
            x = np.random.randint(50, 200)
            y = np.random.randint(50, 200)
            tampered_draw.text((x, y), "MODIFIED", fill=(255, 0, 0))

        elif tampering_type == "compression":
            buffer = io.BytesIO()
            tampered.save(buffer, "JPEG", quality=5)
            buffer.seek(0)
            tampered = Image.open(buffer).convert("RGB")

        elif tampering_type == "blur":
            tampered = tampered.filter(ImageFilter.GaussianBlur(radius=3))

        elif tampering_type == "crop_paste":
            region = tampered.crop((100, 100, 200, 150))
            tampered.paste(region, (150, 150))

        samples.append({
            "authentic": preprocess_image(authentic),
            "tampered": preprocess_image(tampered),
            "tampering_type": tampering_type,
        })

    return samples