| |
| """ |
| CSIRO Image2Biomass Prediction - Training Pipeline |
| ==================================================== |
| Multi-output regression: predicting 5 biomass targets from pasture images. |
| |
| Targets: Dry_Green_g, Dry_Dead_g, Dry_Clover_g, GDM_g, Dry_Total_g |
| Metric: Weighted R² (weights: 0.1, 0.1, 0.1, 0.2, 0.5) |
| |
| Architecture: |
| - Backbone: DINOv2 / ConvNeXt / EfficientNet (via timm) |
| - Head: MLP with LayerNorm, GELU, Dropout |
| - Loss: SmoothL1 + optional weighted R² + consistency regularizer |
| - Training: Mixed precision, gradient checkpointing, differential LR |
| |
| Usage: |
| python train.py --data_dir /path/to/competition/data --backbone dinov2_base --epochs 50 |
| """ |
|
|
| import os |
| import sys |
| import json |
| import time |
| import random |
| import argparse |
| import logging |
| from pathlib import Path |
| from typing import Dict, List, Optional, Tuple |
|
|
| import numpy as np |
| import pandas as pd |
| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
| from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler |
| from torch.cuda.amp import GradScaler, autocast |
| import timm |
|
|
| try: |
| import albumentations as A |
| from albumentations.pytorch import ToTensorV2 |
| HAS_ALBUMENTATIONS = True |
| except ImportError: |
| HAS_ALBUMENTATIONS = False |
| from torchvision import transforms |
|
|
| from PIL import Image |
| from sklearn.model_selection import KFold, StratifiedKFold |
|
|
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
| logger = logging.getLogger(__name__) |
|
|
| |
| |
| |
| TARGET_COLS = ['Dry_Green_g', 'Dry_Dead_g', 'Dry_Clover_g', 'GDM_g', 'Dry_Total_g'] |
| TARGET_WEIGHTS = [0.1, 0.1, 0.1, 0.2, 0.5] |
| IMAGENET_MEAN = (0.485, 0.456, 0.406) |
| IMAGENET_STD = (0.229, 0.224, 0.225) |
|
|
| |
| BACKBONE_CONFIGS = { |
| 'dinov2_small': { |
| 'name': 'vit_small_patch14_dinov2.lvd142m', |
| 'feat_dim': 384, 'native_size': 518, 'default_size': 224, |
| }, |
| 'dinov2_base': { |
| 'name': 'vit_base_patch14_dinov2.lvd142m', |
| 'feat_dim': 768, 'native_size': 518, 'default_size': 224, |
| }, |
| 'dinov2_large': { |
| 'name': 'vit_large_patch14_dinov2.lvd142m', |
| 'feat_dim': 1024, 'native_size': 518, 'default_size': 224, |
| }, |
| 'dinov2_base_reg': { |
| 'name': 'vit_base_patch14_reg4_dinov2.lvd142m', |
| 'feat_dim': 768, 'native_size': 518, 'default_size': 224, |
| }, |
| 'convnext_large': { |
| 'name': 'convnext_large.fb_in22k_ft_in1k', |
| 'feat_dim': 1536, 'native_size': 224, 'default_size': 224, |
| }, |
| 'convnextv2_large': { |
| 'name': 'convnextv2_large.fcmae_ft_in22k_in1k', |
| 'feat_dim': 1536, 'native_size': 224, 'default_size': 224, |
| }, |
| 'efficientnet_b4': { |
| 'name': 'efficientnet_b4.ra2_in1k', |
| 'feat_dim': 1792, 'native_size': 380, 'default_size': 320, |
| }, |
| 'swin_large': { |
| 'name': 'swin_large_patch4_window7_224.ms_in22k_ft_in1k', |
| 'feat_dim': 1536, 'native_size': 224, 'default_size': 224, |
| }, |
| 'eva02_large': { |
| 'name': 'eva02_large_patch14_448.mim_m38m_ft_in22k_in1k', |
| 'feat_dim': 1024, 'native_size': 448, 'default_size': 448, |
| }, |
| } |
|
|
|
|
| |
| |
| |
| class BiomassDataset(Dataset): |
| """Dataset for pasture biomass regression from images.""" |
| |
| def __init__( |
| self, |
| image_dir: str, |
| df: pd.DataFrame, |
| targets: Optional[pd.DataFrame] = None, |
| transform=None, |
| img_size: int = 224, |
| use_ndvi: bool = False, |
| log_transform: bool = True, |
| is_test: bool = False, |
| ): |
| self.image_dir = Path(image_dir) |
| self.df = df.reset_index(drop=True) |
| self.targets = targets |
| self.transform = transform |
| self.img_size = img_size |
| self.use_ndvi = use_ndvi |
| self.log_transform = log_transform |
| self.is_test = is_test |
| |
| |
| self.image_ids = self.df['image_id'].values if 'image_id' in self.df.columns else self.df.index.values |
| |
| def __len__(self): |
| return len(self.df) |
| |
| def __getitem__(self, idx): |
| row = self.df.iloc[idx] |
| img_id = row['image_id'] if 'image_id' in row.index else row.name |
| |
| |
| img_path = self.image_dir / f"{img_id}.jpg" |
| if not img_path.exists(): |
| img_path = self.image_dir / f"{img_id}.png" |
| if not img_path.exists(): |
| |
| candidates = list(self.image_dir.glob(f"{img_id}.*")) |
| if candidates: |
| img_path = candidates[0] |
| else: |
| raise FileNotFoundError(f"Image not found: {img_id}") |
| |
| img = Image.open(img_path).convert('RGB') |
| img = np.array(img) |
| |
| |
| if self.transform is not None: |
| if HAS_ALBUMENTATIONS: |
| augmented = self.transform(image=img) |
| img_tensor = augmented['image'] |
| else: |
| img = Image.fromarray(img) |
| img_tensor = self.transform(img) |
| else: |
| img = Image.fromarray(img) |
| img_tensor = transforms.ToTensor()(img) |
| |
| result = {'image': img_tensor, 'image_id': str(img_id)} |
| |
| |
| if self.use_ndvi and 'NDVI' in self.df.columns: |
| ndvi = torch.tensor(row['NDVI'], dtype=torch.float32) |
| result['ndvi'] = ndvi |
| |
| |
| if self.targets is not None: |
| target_values = self.targets.iloc[idx][TARGET_COLS].values.astype(np.float32) |
| if self.log_transform: |
| target_values = np.log1p(target_values) |
| result['targets'] = torch.tensor(target_values, dtype=torch.float32) |
| |
| return result |
|
|
|
|
| |
| |
| |
| def get_train_transforms(img_size: int = 224, aug_strength: str = 'medium'): |
| """Get training augmentations.""" |
| if HAS_ALBUMENTATIONS: |
| if aug_strength == 'light': |
| return A.Compose([ |
| A.RandomResizedCrop(size=(img_size, img_size), scale=(0.7, 1.0)), |
| A.HorizontalFlip(p=0.5), |
| A.VerticalFlip(p=0.5), |
| A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), |
| ToTensorV2(), |
| ]) |
| elif aug_strength == 'medium': |
| return A.Compose([ |
| A.RandomResizedCrop(size=(img_size, img_size), scale=(0.5, 1.0)), |
| A.HorizontalFlip(p=0.5), |
| A.VerticalFlip(p=0.5), |
| A.RandomRotate90(p=0.5), |
| A.Transpose(p=0.5), |
| A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5), |
| A.HueSaturationValue(hue_shift_limit=15, sat_shift_limit=25, val_shift_limit=15, p=0.4), |
| A.OneOf([ |
| A.GaussianBlur(blur_limit=(3, 5)), |
| A.MotionBlur(blur_limit=5), |
| ], p=0.15), |
| A.CoarseDropout( |
| num_holes_range=(1, 4), |
| hole_height_range=(int(img_size*0.05), int(img_size*0.15)), |
| hole_width_range=(int(img_size*0.05), int(img_size*0.15)), |
| p=0.2, |
| ), |
| A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), |
| ToTensorV2(), |
| ]) |
| else: |
| return A.Compose([ |
| A.RandomResizedCrop(size=(img_size, img_size), scale=(0.4, 1.0)), |
| A.HorizontalFlip(p=0.5), |
| A.VerticalFlip(p=0.5), |
| A.RandomRotate90(p=0.5), |
| A.Transpose(p=0.5), |
| A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.7), |
| A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5), |
| A.RandomGamma(gamma_limit=(80, 120), p=0.3), |
| A.OneOf([ |
| A.GaussianBlur(blur_limit=(3, 7)), |
| A.MotionBlur(blur_limit=7), |
| ], p=0.2), |
| A.OneOf([ |
| A.GaussNoise(p=1.0), |
| A.ISONoise(p=1.0), |
| ], p=0.2), |
| A.CoarseDropout( |
| num_holes_range=(1, 8), |
| hole_height_range=(int(img_size*0.05), int(img_size*0.2)), |
| hole_width_range=(int(img_size*0.05), int(img_size*0.2)), |
| p=0.3, |
| ), |
| A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), |
| ToTensorV2(), |
| ]) |
| else: |
| return transforms.Compose([ |
| transforms.RandomResizedCrop(img_size, scale=(0.5, 1.0)), |
| transforms.RandomHorizontalFlip(0.5), |
| transforms.RandomVerticalFlip(0.5), |
| transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1), |
| transforms.ToTensor(), |
| transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), |
| ]) |
|
|
|
|
| def get_val_transforms(img_size: int = 224): |
| """Get validation transforms.""" |
| if HAS_ALBUMENTATIONS: |
| return A.Compose([ |
| A.Resize(height=int(img_size * 1.14), width=int(img_size * 1.14)), |
| A.CenterCrop(height=img_size, width=img_size), |
| A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), |
| ToTensorV2(), |
| ]) |
| else: |
| return transforms.Compose([ |
| transforms.Resize(int(img_size * 1.14)), |
| transforms.CenterCrop(img_size), |
| transforms.ToTensor(), |
| transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), |
| ]) |
|
|
|
|
| def get_tta_transforms(img_size: int = 224, n_augments: int = 8): |
| """Get TTA (test-time augmentation) transforms. Returns list of transforms.""" |
| tta_list = [get_val_transforms(img_size)] |
| if HAS_ALBUMENTATIONS: |
| |
| tta_list.append(A.Compose([ |
| A.Resize(height=int(img_size * 1.14), width=int(img_size * 1.14)), |
| A.CenterCrop(height=img_size, width=img_size), |
| A.HorizontalFlip(p=1.0), |
| A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), |
| ToTensorV2(), |
| ])) |
| tta_list.append(A.Compose([ |
| A.Resize(height=int(img_size * 1.14), width=int(img_size * 1.14)), |
| A.CenterCrop(height=img_size, width=img_size), |
| A.VerticalFlip(p=1.0), |
| A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), |
| ToTensorV2(), |
| ])) |
| tta_list.append(A.Compose([ |
| A.Resize(height=int(img_size * 1.14), width=int(img_size * 1.14)), |
| A.CenterCrop(height=img_size, width=img_size), |
| A.HorizontalFlip(p=1.0), |
| A.VerticalFlip(p=1.0), |
| A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), |
| ToTensorV2(), |
| ])) |
| |
| for scale in [0.9, 1.0, 1.2]: |
| tta_list.append(A.Compose([ |
| A.Resize(height=int(img_size * scale * 1.14), width=int(img_size * scale * 1.14)), |
| A.CenterCrop(height=img_size, width=img_size), |
| A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), |
| ToTensorV2(), |
| ])) |
| return tta_list[:n_augments] |
|
|
|
|
| |
| |
| |
| class BiomassModel(nn.Module): |
| """ |
| Multi-output regression model for biomass prediction. |
| |
| Architecture: |
| - timm backbone (DINOv2, ConvNeXt, etc.) |
| - Optional auxiliary features (NDVI) |
| - MLP regression head with LayerNorm + GELU + Dropout |
| - Optional: separate heads per target for better specialization |
| """ |
| |
| def __init__( |
| self, |
| backbone_name: str = 'vit_base_patch14_dinov2.lvd142m', |
| num_targets: int = 5, |
| hidden_dim: int = 512, |
| dropout: float = 0.3, |
| pretrained: bool = True, |
| img_size: int = 224, |
| use_ndvi: bool = False, |
| separate_heads: bool = False, |
| grad_checkpointing: bool = False, |
| ): |
| super().__init__() |
| self.use_ndvi = use_ndvi |
| self.separate_heads = separate_heads |
| self.num_targets = num_targets |
| |
| |
| kwargs = {'pretrained': pretrained, 'num_classes': 0} |
| if 'vit' in backbone_name or 'dinov2' in backbone_name: |
| kwargs['img_size'] = img_size |
| |
| self.backbone = timm.create_model(backbone_name, **kwargs) |
| feat_dim = self.backbone.num_features |
| |
| |
| if grad_checkpointing: |
| if hasattr(self.backbone, 'set_grad_checkpointing'): |
| self.backbone.set_grad_checkpointing(True) |
| logger.info("Gradient checkpointing enabled") |
| |
| |
| if use_ndvi: |
| self.ndvi_embed = nn.Sequential( |
| nn.Linear(1, 32), |
| nn.GELU(), |
| nn.Linear(32, 64), |
| ) |
| feat_dim += 64 |
| |
| |
| if separate_heads: |
| |
| self.heads = nn.ModuleList([ |
| nn.Sequential( |
| nn.LayerNorm(feat_dim), |
| nn.Dropout(dropout), |
| nn.Linear(feat_dim, hidden_dim), |
| nn.GELU(), |
| nn.Dropout(dropout * 0.5), |
| nn.Linear(hidden_dim, 1), |
| ) |
| for _ in range(num_targets) |
| ]) |
| else: |
| |
| self.head = nn.Sequential( |
| nn.LayerNorm(feat_dim), |
| nn.Dropout(dropout), |
| nn.Linear(feat_dim, hidden_dim), |
| nn.GELU(), |
| nn.Dropout(dropout * 0.5), |
| nn.Linear(hidden_dim, hidden_dim // 2), |
| nn.GELU(), |
| nn.Dropout(dropout * 0.3), |
| nn.Linear(hidden_dim // 2, num_targets), |
| ) |
| |
| def forward(self, x, ndvi=None): |
| features = self.backbone(x) |
| |
| if self.use_ndvi and ndvi is not None: |
| ndvi_feats = self.ndvi_embed(ndvi.unsqueeze(-1)) |
| features = torch.cat([features, ndvi_feats], dim=-1) |
| |
| if self.separate_heads: |
| outputs = [head(features) for head in self.heads] |
| return torch.cat(outputs, dim=-1) |
| else: |
| return self.head(features) |
| |
| def get_param_groups(self, backbone_lr: float = 5e-5, head_lr: float = 1e-3): |
| """Get parameter groups with differential learning rates.""" |
| backbone_params = list(self.backbone.parameters()) |
| head_params = [p for n, p in self.named_parameters() if 'backbone' not in n] |
| |
| return [ |
| {'params': backbone_params, 'lr': backbone_lr}, |
| {'params': head_params, 'lr': head_lr}, |
| ] |
|
|
|
|
| |
| |
| |
| class WeightedSmoothL1Loss(nn.Module): |
| """SmoothL1 loss weighted by target importance.""" |
| |
| def __init__(self, target_weights=None, beta=1.0): |
| super().__init__() |
| self.beta = beta |
| if target_weights is None: |
| target_weights = TARGET_WEIGHTS |
| self.register_buffer('weights', torch.tensor(target_weights, dtype=torch.float32)) |
| |
| def forward(self, pred, target): |
| loss = F.smooth_l1_loss(pred, target, beta=self.beta, reduction='none') |
| weighted = loss * self.weights.unsqueeze(0) |
| return weighted.mean() |
|
|
|
|
| class WeightedMSELoss(nn.Module): |
| """MSE loss weighted by target importance.""" |
| |
| def __init__(self, target_weights=None): |
| super().__init__() |
| if target_weights is None: |
| target_weights = TARGET_WEIGHTS |
| self.register_buffer('weights', torch.tensor(target_weights, dtype=torch.float32)) |
| |
| def forward(self, pred, target): |
| loss = (pred - target) ** 2 |
| weighted = loss * self.weights.unsqueeze(0) |
| return weighted.mean() |
|
|
|
|
| class ConsistencyLoss(nn.Module): |
| """ |
| Enforce structural constraint: Dry_Total_g ≈ Dry_Green_g + Dry_Dead_g + Dry_Clover_g |
| Only approximate because GDM includes all dry matter components. |
| """ |
| |
| def __init__(self, weight=0.1): |
| super().__init__() |
| self.weight = weight |
| |
| def forward(self, pred): |
| |
| component_sum = pred[:, 0] + pred[:, 1] + pred[:, 2] |
| total = pred[:, 4] |
| return self.weight * F.mse_loss(component_sum, total) |
|
|
|
|
| class CombinedLoss(nn.Module): |
| """Combined loss with SmoothL1 + consistency regularization.""" |
| |
| def __init__(self, smoothl1_weight=1.0, mse_weight=0.0, consistency_weight=0.1, |
| target_weights=None): |
| super().__init__() |
| self.smoothl1 = WeightedSmoothL1Loss(target_weights) |
| self.mse = WeightedMSELoss(target_weights) if mse_weight > 0 else None |
| self.consistency = ConsistencyLoss(consistency_weight) if consistency_weight > 0 else None |
| self.smoothl1_weight = smoothl1_weight |
| self.mse_weight = mse_weight |
| |
| def forward(self, pred, target): |
| loss = self.smoothl1_weight * self.smoothl1(pred, target) |
| if self.mse is not None: |
| loss += self.mse_weight * self.mse(pred, target) |
| if self.consistency is not None: |
| loss += self.consistency(pred) |
| return loss |
|
|
|
|
| |
| |
| |
| def get_lds_weights(labels: np.ndarray, bins: int = 100, kernel_size: int = 5, sigma: float = 2.0): |
| """ |
| Compute Label Distribution Smoothing (LDS) weights. |
| From "Delving into Deep Imbalanced Regression" (ICML 2021). |
| """ |
| from scipy.ndimage import convolve1d |
| |
| |
| if labels.ndim > 1: |
| labels = labels[:, -1] |
| |
| hist, bin_edges = np.histogram(labels, bins=bins) |
| kernel = np.exp(-np.linspace(-3, 3, kernel_size)**2 / (2 * sigma**2)) |
| kernel /= kernel.sum() |
| smoothed = convolve1d(hist.astype(float), kernel, mode='reflect') |
| |
| bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 |
| weights = 1.0 / (np.interp(labels, bin_centers, smoothed) + 1e-8) |
| weights = weights / weights.mean() |
| |
| return weights |
|
|
|
|
| |
| |
| |
| def compute_weighted_r2(preds: np.ndarray, targets: np.ndarray, |
| target_weights: List[float] = None) -> float: |
| """ |
| Compute the globally weighted R² (competition metric). |
| |
| Args: |
| preds: [N, 5] predictions |
| targets: [N, 5] ground truth |
| target_weights: per-target weights (default: competition weights) |
| |
| Returns: |
| Weighted R² score |
| """ |
| if target_weights is None: |
| target_weights = TARGET_WEIGHTS |
| |
| n_samples = preds.shape[0] |
| n_targets = preds.shape[1] |
| |
| |
| all_preds = [] |
| all_targets = [] |
| all_weights = [] |
| |
| for j in range(n_targets): |
| all_preds.extend(preds[:, j]) |
| all_targets.extend(targets[:, j]) |
| all_weights.extend([target_weights[j]] * n_samples) |
| |
| all_preds = np.array(all_preds) |
| all_targets = np.array(all_targets) |
| all_weights = np.array(all_weights) |
| |
| |
| weighted_mean = np.sum(all_weights * all_targets) / np.sum(all_weights) |
| |
| |
| ss_res = np.sum(all_weights * (all_targets - all_preds) ** 2) |
| ss_tot = np.sum(all_weights * (all_targets - weighted_mean) ** 2) |
| |
| r2 = 1.0 - ss_res / (ss_tot + 1e-8) |
| return r2 |
|
|
|
|
| def compute_per_target_r2(preds: np.ndarray, targets: np.ndarray) -> Dict[str, float]: |
| """Compute R² per target column.""" |
| results = {} |
| for i, name in enumerate(TARGET_COLS): |
| ss_res = np.sum((targets[:, i] - preds[:, i]) ** 2) |
| ss_tot = np.sum((targets[:, i] - targets[:, i].mean()) ** 2) |
| r2 = 1.0 - ss_res / (ss_tot + 1e-8) |
| results[name] = r2 |
| return results |
|
|
|
|
| |
| |
| |
| class Trainer: |
| """Training engine with mixed precision, gradient accumulation, and k-fold.""" |
| |
| def __init__(self, args): |
| self.args = args |
| self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
| self.scaler = GradScaler() if self.device.type == 'cuda' else None |
| |
| logger.info(f"Device: {self.device}") |
| if self.device.type == 'cuda': |
| logger.info(f"GPU: {torch.cuda.get_device_name(0)}") |
| logger.info(f"GPU Memory: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB") |
| |
| def build_model(self): |
| """Build model from args.""" |
| backbone_cfg = BACKBONE_CONFIGS[self.args.backbone] |
| img_size = self.args.img_size or backbone_cfg['default_size'] |
| |
| model = BiomassModel( |
| backbone_name=backbone_cfg['name'], |
| num_targets=5, |
| hidden_dim=self.args.hidden_dim, |
| dropout=self.args.dropout, |
| pretrained=True, |
| img_size=img_size, |
| use_ndvi=self.args.use_ndvi, |
| separate_heads=self.args.separate_heads, |
| grad_checkpointing=self.args.grad_checkpointing, |
| ) |
| return model.to(self.device) |
| |
| def build_optimizer(self, model): |
| """Build optimizer with differential learning rates.""" |
| param_groups = model.get_param_groups( |
| backbone_lr=self.args.backbone_lr, |
| head_lr=self.args.head_lr, |
| ) |
| |
| if self.args.optimizer == 'adamw': |
| optimizer = torch.optim.AdamW(param_groups, weight_decay=self.args.weight_decay) |
| elif self.args.optimizer == 'sgd': |
| optimizer = torch.optim.SGD(param_groups, momentum=0.9, weight_decay=self.args.weight_decay) |
| else: |
| raise ValueError(f"Unknown optimizer: {self.args.optimizer}") |
| |
| return optimizer |
| |
| def build_scheduler(self, optimizer, num_training_steps): |
| """Build learning rate scheduler.""" |
| warmup_steps = int(num_training_steps * self.args.warmup_ratio) |
| |
| if self.args.scheduler == 'cosine': |
| from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR, SequentialLR |
| warmup = LinearLR(optimizer, start_factor=0.01, total_iters=warmup_steps) |
| cosine = CosineAnnealingLR(optimizer, T_max=num_training_steps - warmup_steps, |
| eta_min=self.args.min_lr) |
| scheduler = SequentialLR(optimizer, [warmup, cosine], milestones=[warmup_steps]) |
| elif self.args.scheduler == 'plateau': |
| scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( |
| optimizer, mode='max', factor=0.5, patience=5, verbose=True) |
| else: |
| scheduler = None |
| |
| return scheduler |
| |
| def train_one_epoch(self, model, loader, optimizer, scheduler, loss_fn, epoch): |
| """Train for one epoch.""" |
| model.train() |
| running_loss = 0.0 |
| num_samples = 0 |
| |
| for batch_idx, batch in enumerate(loader): |
| images = batch['image'].to(self.device) |
| targets = batch['targets'].to(self.device) |
| ndvi = batch.get('ndvi', None) |
| if ndvi is not None: |
| ndvi = ndvi.to(self.device) |
| |
| |
| if self.scaler is not None: |
| with autocast(dtype=torch.float16): |
| preds = model(images, ndvi) |
| loss = loss_fn(preds, targets) |
| |
| |
| loss = loss / self.args.grad_accum_steps |
| self.scaler.scale(loss).backward() |
| |
| if (batch_idx + 1) % self.args.grad_accum_steps == 0: |
| self.scaler.unscale_(optimizer) |
| torch.nn.utils.clip_grad_norm_(model.parameters(), self.args.max_grad_norm) |
| self.scaler.step(optimizer) |
| self.scaler.update() |
| optimizer.zero_grad() |
| |
| if scheduler is not None and not isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): |
| scheduler.step() |
| else: |
| preds = model(images, ndvi) |
| loss = loss_fn(preds, targets) |
| loss = loss / self.args.grad_accum_steps |
| loss.backward() |
| |
| if (batch_idx + 1) % self.args.grad_accum_steps == 0: |
| torch.nn.utils.clip_grad_norm_(model.parameters(), self.args.max_grad_norm) |
| optimizer.step() |
| optimizer.zero_grad() |
| |
| if scheduler is not None and not isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): |
| scheduler.step() |
| |
| running_loss += loss.item() * self.args.grad_accum_steps * images.size(0) |
| num_samples += images.size(0) |
| |
| if (batch_idx + 1) % self.args.log_interval == 0: |
| avg_loss = running_loss / num_samples |
| lr = optimizer.param_groups[0]['lr'] |
| logger.info(f"Epoch {epoch} [{batch_idx+1}/{len(loader)}] loss={avg_loss:.4f} lr={lr:.2e}") |
| |
| return running_loss / num_samples |
| |
| @torch.no_grad() |
| def validate(self, model, loader, loss_fn, log_transform=True): |
| """Validate and compute metrics.""" |
| model.eval() |
| all_preds = [] |
| all_targets = [] |
| running_loss = 0.0 |
| num_samples = 0 |
| |
| for batch in loader: |
| images = batch['image'].to(self.device) |
| targets = batch['targets'].to(self.device) |
| ndvi = batch.get('ndvi', None) |
| if ndvi is not None: |
| ndvi = ndvi.to(self.device) |
| |
| if self.scaler is not None: |
| with autocast(dtype=torch.float16): |
| preds = model(images, ndvi) |
| loss = loss_fn(preds, targets) |
| else: |
| preds = model(images, ndvi) |
| loss = loss_fn(preds, targets) |
| |
| running_loss += loss.item() * images.size(0) |
| num_samples += images.size(0) |
| |
| all_preds.append(preds.cpu().numpy()) |
| all_targets.append(targets.cpu().numpy()) |
| |
| all_preds = np.concatenate(all_preds, axis=0) |
| all_targets = np.concatenate(all_targets, axis=0) |
| |
| |
| if log_transform: |
| all_preds_orig = np.expm1(all_preds) |
| all_targets_orig = np.expm1(all_targets) |
| else: |
| all_preds_orig = all_preds |
| all_targets_orig = all_targets |
| |
| |
| all_preds_orig = np.clip(all_preds_orig, 0, None) |
| |
| |
| weighted_r2 = compute_weighted_r2(all_preds_orig, all_targets_orig) |
| per_target_r2 = compute_per_target_r2(all_preds_orig, all_targets_orig) |
| |
| avg_loss = running_loss / num_samples |
| |
| return { |
| 'loss': avg_loss, |
| 'weighted_r2': weighted_r2, |
| 'per_target_r2': per_target_r2, |
| 'preds': all_preds_orig, |
| 'targets': all_targets_orig, |
| } |
| |
| @torch.no_grad() |
| def predict(self, model, loader, log_transform=True, tta_transforms=None): |
| """Generate predictions (inference).""" |
| model.eval() |
| all_preds = [] |
| all_ids = [] |
| |
| for batch in loader: |
| images = batch['image'].to(self.device) |
| ndvi = batch.get('ndvi', None) |
| if ndvi is not None: |
| ndvi = ndvi.to(self.device) |
| |
| if self.scaler is not None: |
| with autocast(dtype=torch.float16): |
| preds = model(images, ndvi) |
| else: |
| preds = model(images, ndvi) |
| |
| all_preds.append(preds.cpu().numpy()) |
| all_ids.extend(batch['image_id']) |
| |
| all_preds = np.concatenate(all_preds, axis=0) |
| |
| if log_transform: |
| all_preds = np.expm1(all_preds) |
| |
| all_preds = np.clip(all_preds, 0, None) |
| |
| return all_preds, all_ids |
| |
| def train_fold(self, fold: int, train_df: pd.DataFrame, val_df: pd.DataFrame, |
| train_targets: pd.DataFrame, val_targets: pd.DataFrame, |
| image_dir: str): |
| """Train a single fold.""" |
| backbone_cfg = BACKBONE_CONFIGS[self.args.backbone] |
| img_size = self.args.img_size or backbone_cfg['default_size'] |
| |
| |
| train_dataset = BiomassDataset( |
| image_dir=image_dir, |
| df=train_df, |
| targets=train_targets, |
| transform=get_train_transforms(img_size, self.args.aug_strength), |
| img_size=img_size, |
| use_ndvi=self.args.use_ndvi, |
| log_transform=self.args.log_transform, |
| ) |
| val_dataset = BiomassDataset( |
| image_dir=image_dir, |
| df=val_df, |
| targets=val_targets, |
| transform=get_val_transforms(img_size), |
| img_size=img_size, |
| use_ndvi=self.args.use_ndvi, |
| log_transform=self.args.log_transform, |
| ) |
| |
| |
| if self.args.use_lds: |
| sample_weights = get_lds_weights( |
| train_targets[TARGET_COLS].values, |
| bins=self.args.lds_bins, |
| kernel_size=self.args.lds_kernel_size, |
| sigma=self.args.lds_sigma, |
| ) |
| sampler = WeightedRandomSampler( |
| weights=sample_weights, |
| num_samples=len(train_dataset), |
| replacement=True, |
| ) |
| train_loader = DataLoader( |
| train_dataset, batch_size=self.args.batch_size, |
| sampler=sampler, num_workers=self.args.num_workers, |
| pin_memory=True, drop_last=True, |
| ) |
| else: |
| train_loader = DataLoader( |
| train_dataset, batch_size=self.args.batch_size, |
| shuffle=True, num_workers=self.args.num_workers, |
| pin_memory=True, drop_last=True, |
| ) |
| |
| val_loader = DataLoader( |
| val_dataset, batch_size=self.args.batch_size * 2, |
| shuffle=False, num_workers=self.args.num_workers, |
| pin_memory=True, |
| ) |
| |
| |
| model = self.build_model() |
| optimizer = self.build_optimizer(model) |
| |
| num_training_steps = len(train_loader) * self.args.epochs // self.args.grad_accum_steps |
| scheduler = self.build_scheduler(optimizer, num_training_steps) |
| |
| |
| loss_fn = CombinedLoss( |
| smoothl1_weight=1.0, |
| mse_weight=self.args.mse_weight, |
| consistency_weight=self.args.consistency_weight, |
| target_weights=TARGET_WEIGHTS, |
| ).to(self.device) |
| |
| |
| best_r2 = -float('inf') |
| best_epoch = 0 |
| patience_counter = 0 |
| save_dir = Path(self.args.output_dir) / f"fold_{fold}" |
| save_dir.mkdir(parents=True, exist_ok=True) |
| |
| logger.info(f"\n{'='*60}") |
| logger.info(f"FOLD {fold}") |
| logger.info(f"Train: {len(train_dataset)}, Val: {len(val_dataset)}") |
| logger.info(f"Backbone: {backbone_cfg['name']}, img_size: {img_size}") |
| logger.info(f"{'='*60}") |
| |
| for epoch in range(1, self.args.epochs + 1): |
| t0 = time.time() |
| |
| |
| train_loss = self.train_one_epoch(model, train_loader, optimizer, scheduler, loss_fn, epoch) |
| |
| |
| val_metrics = self.validate(model, val_loader, loss_fn, self.args.log_transform) |
| |
| |
| if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): |
| scheduler.step(val_metrics['weighted_r2']) |
| |
| elapsed = time.time() - t0 |
| |
| |
| logger.info( |
| f"Epoch {epoch}/{self.args.epochs} | " |
| f"train_loss={train_loss:.4f} | " |
| f"val_loss={val_metrics['loss']:.4f} | " |
| f"val_R²={val_metrics['weighted_r2']:.4f} | " |
| f"time={elapsed:.1f}s" |
| ) |
| for name, r2 in val_metrics['per_target_r2'].items(): |
| logger.info(f" {name}: R²={r2:.4f}") |
| |
| |
| if val_metrics['weighted_r2'] > best_r2: |
| best_r2 = val_metrics['weighted_r2'] |
| best_epoch = epoch |
| patience_counter = 0 |
| |
| torch.save({ |
| 'epoch': epoch, |
| 'model_state_dict': model.state_dict(), |
| 'optimizer_state_dict': optimizer.state_dict(), |
| 'weighted_r2': best_r2, |
| 'per_target_r2': val_metrics['per_target_r2'], |
| 'args': vars(self.args), |
| }, save_dir / 'best_model.pth') |
| |
| logger.info(f" *** New best R²={best_r2:.4f} (epoch {epoch}) ***") |
| else: |
| patience_counter += 1 |
| |
| |
| if patience_counter >= self.args.patience: |
| logger.info(f"Early stopping at epoch {epoch}. Best R²={best_r2:.4f} (epoch {best_epoch})") |
| break |
| |
| |
| checkpoint = torch.load(save_dir / 'best_model.pth', map_location=self.device, weights_only=False) |
| model.load_state_dict(checkpoint['model_state_dict']) |
| |
| |
| val_metrics = self.validate(model, val_loader, loss_fn, self.args.log_transform) |
| |
| logger.info(f"\nFold {fold} Final: R²={val_metrics['weighted_r2']:.4f}") |
| |
| return model, val_metrics |
| |
| def train_kfold(self, df: pd.DataFrame, targets: pd.DataFrame, image_dir: str): |
| """Train with K-Fold cross-validation.""" |
| n_folds = self.args.n_folds |
| |
| |
| bins = pd.qcut(targets['Dry_Total_g'], q=min(10, n_folds), labels=False, duplicates='drop') |
| |
| kf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=self.args.seed) |
| |
| oof_preds = np.zeros((len(df), 5)) |
| fold_scores = [] |
| |
| for fold, (train_idx, val_idx) in enumerate(kf.split(df, bins)): |
| train_df = df.iloc[train_idx] |
| val_df = df.iloc[val_idx] |
| train_targets = targets.iloc[train_idx] |
| val_targets = targets.iloc[val_idx] |
| |
| model, val_metrics = self.train_fold( |
| fold, train_df, val_df, train_targets, val_targets, image_dir |
| ) |
| |
| oof_preds[val_idx] = val_metrics['preds'] |
| fold_scores.append(val_metrics['weighted_r2']) |
| |
| logger.info(f"Fold {fold} R²: {val_metrics['weighted_r2']:.4f}") |
| |
| |
| targets_arr = targets[TARGET_COLS].values |
| overall_r2 = compute_weighted_r2(oof_preds, targets_arr) |
| |
| logger.info(f"\n{'='*60}") |
| logger.info(f"Overall OOF R²: {overall_r2:.4f}") |
| logger.info(f"Per-fold R²: {[f'{s:.4f}' for s in fold_scores]}") |
| logger.info(f"Mean fold R²: {np.mean(fold_scores):.4f} ± {np.std(fold_scores):.4f}") |
| logger.info(f"{'='*60}") |
| |
| |
| oof_df = df[['image_id']].copy() |
| for i, col in enumerate(TARGET_COLS): |
| oof_df[col] = oof_preds[:, i] |
| oof_df.to_csv(Path(self.args.output_dir) / 'oof_predictions.csv', index=False) |
| |
| return overall_r2, fold_scores |
|
|
|
|
| |
| |
| |
| def load_competition_data(data_dir: str): |
| """ |
| Load competition data. Expected structure: |
| data_dir/ |
| train.csv |
| test.csv |
| train_images/ |
| test_images/ |
| sample_submission.csv |
| """ |
| data_dir = Path(data_dir) |
| |
| |
| train_df = pd.read_csv(data_dir / 'train.csv') |
| test_df = pd.read_csv(data_dir / 'test.csv') |
| |
| if (data_dir / 'sample_submission.csv').exists(): |
| sample_sub = pd.read_csv(data_dir / 'sample_submission.csv') |
| else: |
| sample_sub = None |
| |
| |
| train_img_dir = data_dir / 'train_images' |
| test_img_dir = data_dir / 'test_images' |
| |
| if not train_img_dir.exists(): |
| train_img_dir = data_dir / 'train' |
| if not test_img_dir.exists(): |
| test_img_dir = data_dir / 'test' |
| |
| logger.info(f"Train samples: {len(train_df)}") |
| logger.info(f"Test samples: {len(test_df)}") |
| logger.info(f"Train columns: {list(train_df.columns)}") |
| logger.info(f"Test columns: {list(test_df.columns)}") |
| |
| |
| available_targets = [c for c in TARGET_COLS if c in train_df.columns] |
| logger.info(f"Available targets: {available_targets}") |
| |
| |
| if available_targets: |
| logger.info("\nTarget statistics:") |
| for col in available_targets: |
| logger.info(f" {col}: mean={train_df[col].mean():.2f}, " |
| f"median={train_df[col].median():.2f}, " |
| f"std={train_df[col].std():.2f}, " |
| f"min={train_df[col].min():.2f}, " |
| f"max={train_df[col].max():.2f}") |
| |
| return train_df, test_df, sample_sub, str(train_img_dir), str(test_img_dir) |
|
|
|
|
| def create_submission(preds: np.ndarray, image_ids: List[str], output_path: str): |
| """ |
| Create submission CSV in required format. |
| |
| Args: |
| preds: [N, 5] predictions |
| image_ids: list of image IDs |
| output_path: path to save CSV |
| """ |
| rows = [] |
| for i, img_id in enumerate(image_ids): |
| for j, target_name in enumerate(TARGET_COLS): |
| rows.append({ |
| 'sample_id': f"{img_id}__{target_name}", |
| 'target': max(0, preds[i, j]), |
| }) |
| |
| sub_df = pd.DataFrame(rows) |
| sub_df.to_csv(output_path, index=False) |
| logger.info(f"Submission saved to {output_path} ({len(sub_df)} rows)") |
| return sub_df |
|
|
|
|
| |
| |
| |
| def set_seed(seed: int): |
| """Set random seed for reproducibility.""" |
| random.seed(seed) |
| np.random.seed(seed) |
| torch.manual_seed(seed) |
| if torch.cuda.is_available(): |
| torch.cuda.manual_seed_all(seed) |
| torch.backends.cudnn.deterministic = True |
| torch.backends.cudnn.benchmark = False |
|
|
|
|
| |
| |
| |
| def get_args(): |
| parser = argparse.ArgumentParser(description='CSIRO Image2Biomass Training') |
| |
| |
| parser.add_argument('--data_dir', type=str, required=True, help='Competition data directory') |
| parser.add_argument('--output_dir', type=str, default='./output', help='Output directory') |
| |
| |
| parser.add_argument('--backbone', type=str, default='dinov2_base', |
| choices=list(BACKBONE_CONFIGS.keys()), help='Backbone architecture') |
| parser.add_argument('--img_size', type=int, default=None, help='Image size (default: backbone native)') |
| parser.add_argument('--hidden_dim', type=int, default=512, help='Hidden dim in MLP head') |
| parser.add_argument('--dropout', type=float, default=0.3, help='Dropout rate') |
| parser.add_argument('--separate_heads', action='store_true', help='Use separate heads per target') |
| parser.add_argument('--grad_checkpointing', action='store_true', help='Enable gradient checkpointing') |
| parser.add_argument('--use_ndvi', action='store_true', help='Use NDVI features') |
| |
| |
| parser.add_argument('--epochs', type=int, default=50, help='Max epochs') |
| parser.add_argument('--batch_size', type=int, default=32, help='Batch size') |
| parser.add_argument('--backbone_lr', type=float, default=5e-5, help='Backbone learning rate') |
| parser.add_argument('--head_lr', type=float, default=1e-3, help='Head learning rate') |
| parser.add_argument('--min_lr', type=float, default=1e-7, help='Min learning rate') |
| parser.add_argument('--weight_decay', type=float, default=1e-2, help='Weight decay') |
| parser.add_argument('--optimizer', type=str, default='adamw', choices=['adamw', 'sgd']) |
| parser.add_argument('--scheduler', type=str, default='cosine', choices=['cosine', 'plateau', 'none']) |
| parser.add_argument('--warmup_ratio', type=float, default=0.05, help='Warmup ratio') |
| parser.add_argument('--max_grad_norm', type=float, default=1.0, help='Max gradient norm') |
| parser.add_argument('--grad_accum_steps', type=int, default=1, help='Gradient accumulation steps') |
| parser.add_argument('--patience', type=int, default=10, help='Early stopping patience') |
| parser.add_argument('--log_interval', type=int, default=10, help='Log every N batches') |
| |
| |
| parser.add_argument('--aug_strength', type=str, default='medium', choices=['light', 'medium', 'heavy']) |
| parser.add_argument('--log_transform', action='store_true', default=True, help='Log-transform targets') |
| parser.add_argument('--no_log_transform', action='store_true', help='Disable log-transform') |
| |
| |
| parser.add_argument('--use_lds', action='store_true', help='Use Label Distribution Smoothing') |
| parser.add_argument('--lds_bins', type=int, default=100) |
| parser.add_argument('--lds_kernel_size', type=int, default=5) |
| parser.add_argument('--lds_sigma', type=float, default=2.0) |
| |
| |
| parser.add_argument('--mse_weight', type=float, default=0.0, help='MSE loss weight') |
| parser.add_argument('--consistency_weight', type=float, default=0.1, help='Consistency loss weight') |
| |
| |
| parser.add_argument('--n_folds', type=int, default=5, help='Number of CV folds') |
| parser.add_argument('--fold', type=int, default=None, help='Train single fold (None=all)') |
| |
| |
| parser.add_argument('--seed', type=int, default=42) |
| parser.add_argument('--num_workers', type=int, default=4) |
| parser.add_argument('--mixed_precision', action='store_true', default=True) |
| |
| args = parser.parse_args() |
| |
| if args.no_log_transform: |
| args.log_transform = False |
| |
| return args |
|
|
|
|
| def main(): |
| args = get_args() |
| set_seed(args.seed) |
| |
| |
| train_df, test_df, sample_sub, train_img_dir, test_img_dir = load_competition_data(args.data_dir) |
| |
| |
| targets = train_df[TARGET_COLS].copy() |
| |
| |
| Path(args.output_dir).mkdir(parents=True, exist_ok=True) |
| |
| |
| with open(Path(args.output_dir) / 'args.json', 'w') as f: |
| json.dump(vars(args), f, indent=2) |
| |
| |
| trainer = Trainer(args) |
| |
| if args.fold is not None: |
| |
| from sklearn.model_selection import StratifiedKFold |
| bins = pd.qcut(targets['Dry_Total_g'], q=min(10, args.n_folds), labels=False, duplicates='drop') |
| kf = StratifiedKFold(n_splits=args.n_folds, shuffle=True, random_state=args.seed) |
| |
| for fold_idx, (train_idx, val_idx) in enumerate(kf.split(train_df, bins)): |
| if fold_idx == args.fold: |
| train_fold_df = train_df.iloc[train_idx] |
| val_fold_df = train_df.iloc[val_idx] |
| train_targets = targets.iloc[train_idx] |
| val_targets = targets.iloc[val_idx] |
| |
| model, val_metrics = trainer.train_fold( |
| args.fold, train_fold_df, val_fold_df, |
| train_targets, val_targets, train_img_dir |
| ) |
| break |
| else: |
| |
| overall_r2, fold_scores = trainer.train_kfold(train_df, targets, train_img_dir) |
| |
| logger.info("Training complete!") |
|
|
|
|
| if __name__ == '__main__': |
| main() |
|
|