""" Camera/Scene Augmentation — synthetic diversity for prototype LoRA training. Generates diverse camera perspectives from a single image by simulating different camera types, scenes, and conditions. Used in Stage 5 (prototype LoRA training) to create varied conditioning inputs so each prototype specializes on a different camera/scene combination. Augmentations: - Crop position: simulate PTZ pan/tilt (random crop of original) - Brightness/contrast: simulate day/night/indoor/outdoor lighting - Color temperature: simulate warm (indoor) vs cool (outdoor) cameras - Noise: simulate low-light camera sensor noise - Blur: simulate focus/motion blur from cheap cameras - Resolution: simulate different camera resolutions Reference: Train on augmented data so hypernetwork learns to differentiate camera/scene contexts and generate specialized adapters. """ from __future__ import annotations import random from typing import Optional import torch import torch.nn.functional as F class CameraAugmentor: """ Applies random camera-specific augmentations to simulate diverse cameras. Args: img_size: Target output image size (square) crop_range: Min/max fraction of image to crop (simulates zoom level) brightness_range: Min/max brightness multiplier contrast_range: Min/max contrast multiplier noise_range: Min/max Gaussian noise std blur_prob: Probability of applying Gaussian blur color_temp_range: Min/max color temperature shift """ def __init__( self, img_size: int = 448, crop_range: tuple[float, float] = (0.5, 1.0), brightness_range: tuple[float, float] = (0.5, 1.5), contrast_range: tuple[float, float] = (0.7, 1.3), noise_range: tuple[float, float] = (0.0, 0.05), blur_prob: float = 0.3, color_temp_range: tuple[float, float] = (-0.1, 0.1), ): self.img_size = img_size self.crop_range = crop_range self.brightness_range = brightness_range self.contrast_range = contrast_range self.noise_range = noise_range self.blur_prob = blur_prob self.color_temp_range = color_temp_range def __call__(self, image: torch.Tensor) -> tuple[torch.Tensor, dict]: """ Apply random camera augmentation. Args: image: [3, H, W] or [B, 3, H, W] image tensor (0-1 range) Returns: augmented_image: same shape, augmented aug_params: dict describing which augmentations were applied """ squeeze = False if image.dim() == 3: image = image.unsqueeze(0) squeeze = True B, C, H, W = image.shape aug_params = {} # 1. Random crop (simulates PTZ zoom/position) crop_frac = random.uniform(*self.crop_range) crop_h = int(H * crop_frac) crop_w = int(W * crop_frac) top = random.randint(0, H - crop_h) left = random.randint(0, W - crop_w) image = image[:, :, top:top + crop_h, left:left + crop_w] image = F.interpolate(image, size=(self.img_size, self.img_size), mode="bilinear", align_corners=False) aug_params["crop"] = {"frac": crop_frac, "top": top, "left": left} # 2. Brightness brightness = random.uniform(*self.brightness_range) image = image * brightness aug_params["brightness"] = brightness # 3. Contrast contrast = random.uniform(*self.contrast_range) mean = image.mean(dim=[2, 3], keepdim=True) image = (image - mean) * contrast + mean aug_params["contrast"] = contrast # 4. Color temperature shift temp = random.uniform(*self.color_temp_range) if abs(temp) > 0.01: # Warm: boost red, reduce blue. Cool: opposite. image[:, 0] += temp # Red channel image[:, 2] -= temp # Blue channel aug_params["color_temp"] = temp # 5. Gaussian noise (sensor noise) noise_std = random.uniform(*self.noise_range) if noise_std > 0.001: noise = torch.randn_like(image) * noise_std image = image + noise aug_params["noise_std"] = noise_std # 6. Gaussian blur (focus/motion blur) if random.random() < self.blur_prob: kernel_size = random.choice([3, 5]) sigma = random.uniform(0.5, 2.0) image = _gaussian_blur(image, kernel_size, sigma) aug_params["blur"] = {"kernel": kernel_size, "sigma": sigma} else: aug_params["blur"] = None # Clamp to valid range image = image.clamp(0, 1) if squeeze: image = image.squeeze(0) return image, aug_params class ScenePresetAugmentor: """ Applies pre-defined scene augmentation presets for prototype diversity. Each preset simulates a specific camera installation scenario. """ PRESETS = { "lobby_daytime": {"brightness": 1.2, "contrast": 1.0, "color_temp": 0.05, "noise": 0.01, "crop_frac": 0.8}, "lobby_nighttime": {"brightness": 0.5, "contrast": 0.8, "color_temp": -0.05, "noise": 0.04, "crop_frac": 0.8}, "parking_day": {"brightness": 1.3, "contrast": 1.2, "color_temp": 0.0, "noise": 0.01, "crop_frac": 0.6}, "parking_night": {"brightness": 0.3, "contrast": 0.7, "color_temp": -0.08, "noise": 0.05, "crop_frac": 0.6}, "gate_clear": {"brightness": 1.1, "contrast": 1.1, "color_temp": 0.02, "noise": 0.01, "crop_frac": 0.9}, "gate_rain": {"brightness": 0.7, "contrast": 0.9, "color_temp": -0.03, "noise": 0.03, "crop_frac": 0.9}, "corridor_empty": {"brightness": 0.9, "contrast": 1.0, "color_temp": 0.08, "noise": 0.02, "crop_frac": 1.0}, "corridor_crowded": {"brightness": 0.9, "contrast": 1.0, "color_temp": 0.08, "noise": 0.02, "crop_frac": 0.7}, "rooftop_twilight": {"brightness": 0.4, "contrast": 1.1, "color_temp": -0.1, "noise": 0.03, "crop_frac": 0.5}, "rooftop_noon": {"brightness": 1.4, "contrast": 1.3, "color_temp": 0.0, "noise": 0.01, "crop_frac": 0.5}, } def __init__(self, img_size: int = 448): self.img_size = img_size def augment(self, image: torch.Tensor, preset_name: str) -> torch.Tensor: """Apply a named scene preset augmentation.""" preset = self.PRESETS.get(preset_name, self.PRESETS["lobby_daytime"]) if image.dim() == 3: image = image.unsqueeze(0) B, C, H, W = image.shape # Crop frac = preset["crop_frac"] ch, cw = int(H * frac), int(W * frac) top = (H - ch) // 2 left = (W - cw) // 2 image = image[:, :, top:top + ch, left:left + cw] image = F.interpolate(image, size=(self.img_size, self.img_size), mode="bilinear", align_corners=False) # Brightness + contrast mean = image.mean(dim=[2, 3], keepdim=True) image = ((image - mean) * preset["contrast"] + mean) * preset["brightness"] # Color temperature image[:, 0] += preset["color_temp"] image[:, 2] -= preset["color_temp"] # Noise if preset["noise"] > 0: image = image + torch.randn_like(image) * preset["noise"] return image.clamp(0, 1).squeeze(0) @classmethod def list_presets(cls) -> list[str]: return list(cls.PRESETS.keys()) def _gaussian_blur(image: torch.Tensor, kernel_size: int, sigma: float) -> torch.Tensor: """Apply Gaussian blur to image tensor.""" # Create 1D Gaussian kernel coords = torch.arange(kernel_size, dtype=torch.float32) - kernel_size // 2 kernel_1d = torch.exp(-0.5 * (coords / sigma) ** 2) kernel_1d = kernel_1d / kernel_1d.sum() # Create 2D kernel kernel_2d = kernel_1d.unsqueeze(0) * kernel_1d.unsqueeze(1) kernel_2d = kernel_2d.unsqueeze(0).unsqueeze(0) # [1, 1, K, K] kernel_2d = kernel_2d.expand(image.shape[1], -1, -1, -1).to(image.device) # [C, 1, K, K] pad = kernel_size // 2 return F.conv2d(image, kernel_2d, padding=pad, groups=image.shape[1])