| |
| """ |
| CSIRO Image2Biomass Prediction - Kaggle Training Notebook |
| ========================================================== |
| This is a self-contained training notebook for Kaggle. |
| Run this on Kaggle GPU to train models, then use the inference notebook to submit. |
| |
| Key features: |
| - DINOv2-Base + ConvNeXt-Large ensemble |
| - 5-fold stratified CV |
| - Log-transformed targets |
| - Label Distribution Smoothing |
| - Weighted SmoothL1 loss + consistency regularization |
| - Heavy D4 augmentations for pasture images |
| - Gradient checkpointing for memory efficiency |
| """ |
|
|
| |
| |
| |
| import os |
| import sys |
| import json |
| import time |
| import random |
| import warnings |
| from pathlib import Path |
| from typing import Dict, List, Optional, Tuple |
|
|
| import numpy as np |
| import pandas as pd |
| import torch |
| import torch.nn as nn |
| import torch.nn.functional as F |
| from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler |
| from torch.cuda.amp import GradScaler, autocast |
| from PIL import Image |
| from sklearn.model_selection import StratifiedKFold |
|
|
| warnings.filterwarnings('ignore') |
|
|
| |
| os.system('pip install -q timm albumentations') |
| import timm |
| import albumentations as A |
| from albumentations.pytorch import ToTensorV2 |
|
|
| |
| |
| |
| class CFG: |
| |
| COMPETITION = 'csiro-biomass' |
| DATA_DIR = Path(f'/kaggle/input/{COMPETITION}') |
| OUTPUT_DIR = Path('/kaggle/working') |
| |
| |
| BACKBONE = 'vit_base_patch14_dinov2.lvd142m' |
| |
| IMG_SIZE = 224 |
| HIDDEN_DIM = 512 |
| DROPOUT = 0.3 |
| SEPARATE_HEADS = False |
| GRAD_CHECKPOINTING = True |
| |
| |
| EPOCHS = 30 |
| BATCH_SIZE = 16 |
| BACKBONE_LR = 3e-5 |
| HEAD_LR = 1e-3 |
| MIN_LR = 1e-7 |
| WEIGHT_DECAY = 1e-2 |
| WARMUP_RATIO = 0.05 |
| MAX_GRAD_NORM = 1.0 |
| GRAD_ACCUM = 2 |
| PATIENCE = 8 |
| |
| |
| AUG_STRENGTH = 'medium' |
| LOG_TRANSFORM = True |
| |
| |
| USE_LDS = True |
| LDS_BINS = 100 |
| LDS_KERNEL_SIZE = 5 |
| LDS_SIGMA = 2.0 |
| |
| |
| MSE_WEIGHT = 0.0 |
| CONSISTENCY_WEIGHT = 0.1 |
| |
| |
| N_FOLDS = 5 |
| TRAIN_FOLDS = [0, 1, 2, 3, 4] |
| |
| |
| SEED = 42 |
| NUM_WORKERS = 2 |
| DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
| |
| TARGET_COLS = ['Dry_Green_g', 'Dry_Dead_g', 'Dry_Clover_g', 'GDM_g', 'Dry_Total_g'] |
| TARGET_WEIGHTS = [0.1, 0.1, 0.1, 0.2, 0.5] |
| IMAGENET_MEAN = (0.485, 0.456, 0.406) |
| IMAGENET_STD = (0.229, 0.224, 0.225) |
|
|
|
|
| def set_seed(seed=42): |
| random.seed(seed) |
| np.random.seed(seed) |
| torch.manual_seed(seed) |
| if torch.cuda.is_available(): |
| torch.cuda.manual_seed_all(seed) |
|
|
| set_seed(CFG.SEED) |
| print(f"Device: {CFG.DEVICE}") |
| if CFG.DEVICE == 'cuda': |
| print(f"GPU: {torch.cuda.get_device_name(0)}") |
|
|
|
|
| |
| |
| |
| |
| if not CFG.DATA_DIR.exists(): |
| |
| for alt in ['/kaggle/input/csiro-image2biomass-prediction', '/kaggle/input/csiro-biomass', |
| '/kaggle/input/csiro-image2biomass', './data']: |
| if Path(alt).exists(): |
| CFG.DATA_DIR = Path(alt) |
| break |
|
|
| print(f"Data directory: {CFG.DATA_DIR}") |
| print(f"Files: {os.listdir(CFG.DATA_DIR) if CFG.DATA_DIR.exists() else 'NOT FOUND'}") |
|
|
| |
| train_csv = None |
| for fname in ['train.csv', 'Train.csv']: |
| fpath = CFG.DATA_DIR / fname |
| if fpath.exists(): |
| train_csv = fpath |
| break |
|
|
| test_csv = None |
| for fname in ['test.csv', 'Test.csv']: |
| fpath = CFG.DATA_DIR / fname |
| if fpath.exists(): |
| test_csv = fpath |
| break |
|
|
| train_df = pd.read_csv(train_csv) if train_csv else None |
| test_df = pd.read_csv(test_csv) if test_csv else None |
|
|
| |
| train_img_dir = None |
| test_img_dir = None |
| for d in ['train_images', 'train', 'images/train']: |
| if (CFG.DATA_DIR / d).exists(): |
| train_img_dir = CFG.DATA_DIR / d |
| break |
| for d in ['test_images', 'test', 'images/test']: |
| if (CFG.DATA_DIR / d).exists(): |
| test_img_dir = CFG.DATA_DIR / d |
| break |
|
|
| if train_df is not None: |
| print(f"\nTrain shape: {train_df.shape}") |
| print(f"Train columns: {list(train_df.columns)}") |
| print(f"\nTarget statistics:") |
| for col in TARGET_COLS: |
| if col in train_df.columns: |
| s = train_df[col] |
| print(f" {col}: mean={s.mean():.2f}, median={s.median():.2f}, " |
| f"std={s.std():.2f}, min={s.min():.2f}, max={s.max():.2f}, " |
| f"zeros={100*(s==0).mean():.1f}%") |
|
|
| if test_df is not None: |
| print(f"\nTest shape: {test_df.shape}") |
| print(f"Test columns: {list(test_df.columns)}") |
|
|
|
|
| |
| |
| |
| class BiomassDataset(Dataset): |
| def __init__(self, image_dir, df, targets=None, transform=None, |
| log_transform=True, use_ndvi=False): |
| self.image_dir = Path(image_dir) |
| self.df = df.reset_index(drop=True) |
| self.targets = targets |
| self.transform = transform |
| self.log_transform = log_transform |
| self.use_ndvi = use_ndvi |
| |
| def __len__(self): |
| return len(self.df) |
| |
| def __getitem__(self, idx): |
| row = self.df.iloc[idx] |
| img_id = row['image_id'] if 'image_id' in row.index else row.name |
| |
| |
| img_path = None |
| for ext in ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']: |
| p = self.image_dir / f"{img_id}{ext}" |
| if p.exists(): |
| img_path = p |
| break |
| if img_path is None: |
| candidates = list(self.image_dir.glob(f"{img_id}*")) |
| img_path = candidates[0] if candidates else self.image_dir / f"{img_id}.jpg" |
| |
| img = np.array(Image.open(img_path).convert('RGB')) |
| |
| if self.transform: |
| img_tensor = self.transform(image=img)['image'] |
| else: |
| img_tensor = torch.tensor(img.transpose(2, 0, 1), dtype=torch.float32) / 255.0 |
| |
| result = {'image': img_tensor, 'image_id': str(img_id)} |
| |
| if self.use_ndvi and 'NDVI' in self.df.columns: |
| result['ndvi'] = torch.tensor(float(row['NDVI']), dtype=torch.float32) |
| |
| if self.targets is not None: |
| target_values = self.targets.iloc[idx][TARGET_COLS].values.astype(np.float32) |
| if self.log_transform: |
| target_values = np.log1p(target_values) |
| result['targets'] = torch.tensor(target_values, dtype=torch.float32) |
| |
| return result |
|
|
|
|
| def get_transforms(img_size, is_train=True, aug_strength='medium'): |
| if is_train: |
| if aug_strength == 'light': |
| return A.Compose([ |
| A.RandomResizedCrop(size=(img_size, img_size), scale=(0.7, 1.0)), |
| A.HorizontalFlip(p=0.5), |
| A.VerticalFlip(p=0.5), |
| A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), |
| ToTensorV2(), |
| ]) |
| elif aug_strength == 'medium': |
| return A.Compose([ |
| A.RandomResizedCrop(size=(img_size, img_size), scale=(0.5, 1.0)), |
| A.HorizontalFlip(p=0.5), |
| A.VerticalFlip(p=0.5), |
| A.RandomRotate90(p=0.5), |
| A.Transpose(p=0.5), |
| A.RandomBrightnessContrast(0.2, 0.2, p=0.5), |
| A.HueSaturationValue(15, 25, 15, p=0.4), |
| A.OneOf([A.GaussianBlur(blur_limit=(3, 5)), A.MotionBlur(blur_limit=5)], p=0.15), |
| A.CoarseDropout(num_holes_range=(1, 4), |
| hole_height_range=(int(img_size*0.05), int(img_size*0.15)), |
| hole_width_range=(int(img_size*0.05), int(img_size*0.15)), p=0.2), |
| A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), |
| ToTensorV2(), |
| ]) |
| else: |
| return A.Compose([ |
| A.RandomResizedCrop(size=(img_size, img_size), scale=(0.4, 1.0)), |
| A.HorizontalFlip(p=0.5), |
| A.VerticalFlip(p=0.5), |
| A.RandomRotate90(p=0.5), |
| A.Transpose(p=0.5), |
| A.RandomBrightnessContrast(0.3, 0.3, p=0.7), |
| A.HueSaturationValue(20, 30, 20, p=0.5), |
| A.RandomGamma((80, 120), p=0.3), |
| A.OneOf([A.GaussianBlur((3, 7)), A.MotionBlur(blur_limit=7)], p=0.2), |
| A.CoarseDropout(num_holes_range=(1, 8), |
| hole_height_range=(int(img_size*0.05), int(img_size*0.2)), |
| hole_width_range=(int(img_size*0.05), int(img_size*0.2)), p=0.3), |
| A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), |
| ToTensorV2(), |
| ]) |
| else: |
| return A.Compose([ |
| A.Resize(height=int(img_size * 1.14), width=int(img_size * 1.14)), |
| A.CenterCrop(height=img_size, width=img_size), |
| A.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD), |
| ToTensorV2(), |
| ]) |
|
|
|
|
| |
| |
| |
| class BiomassModel(nn.Module): |
| def __init__(self, backbone_name, num_targets=5, hidden_dim=512, |
| dropout=0.3, pretrained=True, img_size=224, |
| use_ndvi=False, separate_heads=False, grad_checkpointing=False): |
| super().__init__() |
| self.use_ndvi = use_ndvi |
| self.separate_heads = separate_heads |
| |
| kwargs = {'pretrained': pretrained, 'num_classes': 0} |
| if 'vit' in backbone_name or 'dinov2' in backbone_name: |
| kwargs['img_size'] = img_size |
| |
| self.backbone = timm.create_model(backbone_name, **kwargs) |
| feat_dim = self.backbone.num_features |
| |
| if grad_checkpointing and hasattr(self.backbone, 'set_grad_checkpointing'): |
| self.backbone.set_grad_checkpointing(True) |
| |
| if use_ndvi: |
| self.ndvi_embed = nn.Sequential(nn.Linear(1, 32), nn.GELU(), nn.Linear(32, 64)) |
| feat_dim += 64 |
| |
| if separate_heads: |
| self.heads = nn.ModuleList([ |
| nn.Sequential( |
| nn.LayerNorm(feat_dim), nn.Dropout(dropout), |
| nn.Linear(feat_dim, hidden_dim), nn.GELU(), |
| nn.Dropout(dropout * 0.5), nn.Linear(hidden_dim, 1), |
| ) for _ in range(num_targets) |
| ]) |
| else: |
| self.head = nn.Sequential( |
| nn.LayerNorm(feat_dim), nn.Dropout(dropout), |
| nn.Linear(feat_dim, hidden_dim), nn.GELU(), |
| nn.Dropout(dropout * 0.5), |
| nn.Linear(hidden_dim, hidden_dim // 2), nn.GELU(), |
| nn.Dropout(dropout * 0.3), |
| nn.Linear(hidden_dim // 2, num_targets), |
| ) |
| |
| def forward(self, x, ndvi=None): |
| features = self.backbone(x) |
| if self.use_ndvi and ndvi is not None: |
| features = torch.cat([features, self.ndvi_embed(ndvi.unsqueeze(-1))], dim=-1) |
| if self.separate_heads: |
| return torch.cat([h(features) for h in self.heads], dim=-1) |
| return self.head(features) |
| |
| def get_param_groups(self, backbone_lr, head_lr): |
| return [ |
| {'params': self.backbone.parameters(), 'lr': backbone_lr}, |
| {'params': [p for n, p in self.named_parameters() if 'backbone' not in n], 'lr': head_lr}, |
| ] |
|
|
|
|
| |
| |
| |
| class WeightedSmoothL1Loss(nn.Module): |
| def __init__(self, target_weights=None, beta=1.0): |
| super().__init__() |
| self.beta = beta |
| w = target_weights or TARGET_WEIGHTS |
| self.register_buffer('weights', torch.tensor(w, dtype=torch.float32)) |
| |
| def forward(self, pred, target): |
| loss = F.smooth_l1_loss(pred, target, beta=self.beta, reduction='none') |
| return (loss * self.weights.unsqueeze(0)).mean() |
|
|
|
|
| class CombinedLoss(nn.Module): |
| def __init__(self, mse_weight=0.0, consistency_weight=0.1): |
| super().__init__() |
| self.smoothl1 = WeightedSmoothL1Loss() |
| self.mse_weight = mse_weight |
| self.consistency_weight = consistency_weight |
| |
| def forward(self, pred, target): |
| loss = self.smoothl1(pred, target) |
| if self.mse_weight > 0: |
| mse = ((pred - target) ** 2 * |
| torch.tensor(TARGET_WEIGHTS, device=pred.device).unsqueeze(0)).mean() |
| loss += self.mse_weight * mse |
| if self.consistency_weight > 0: |
| comp_sum = pred[:, 0] + pred[:, 1] + pred[:, 2] |
| loss += self.consistency_weight * F.mse_loss(comp_sum, pred[:, 4]) |
| return loss |
|
|
|
|
| |
| |
| |
| def get_lds_weights(labels, bins=100, kernel_size=5, sigma=2.0): |
| from scipy.ndimage import convolve1d |
| if labels.ndim > 1: |
| labels = labels[:, -1] |
| hist, edges = np.histogram(labels, bins=bins) |
| kernel = np.exp(-np.linspace(-3, 3, kernel_size)**2 / (2 * sigma**2)) |
| kernel /= kernel.sum() |
| smoothed = convolve1d(hist.astype(float), kernel, mode='reflect') |
| centers = (edges[:-1] + edges[1:]) / 2 |
| weights = 1.0 / (np.interp(labels, centers, smoothed) + 1e-8) |
| return weights / weights.mean() |
|
|
|
|
| |
| |
| |
| def compute_weighted_r2(preds, targets, weights=None): |
| if weights is None: |
| weights = TARGET_WEIGHTS |
| all_p, all_t, all_w = [], [], [] |
| for j in range(preds.shape[1]): |
| all_p.extend(preds[:, j]) |
| all_t.extend(targets[:, j]) |
| all_w.extend([weights[j]] * preds.shape[0]) |
| all_p, all_t, all_w = map(np.array, (all_p, all_t, all_w)) |
| wmean = np.sum(all_w * all_t) / np.sum(all_w) |
| ss_res = np.sum(all_w * (all_t - all_p) ** 2) |
| ss_tot = np.sum(all_w * (all_t - wmean) ** 2) |
| return 1.0 - ss_res / (ss_tot + 1e-8) |
|
|
|
|
| def compute_per_target_r2(preds, targets): |
| results = {} |
| for i, name in enumerate(TARGET_COLS): |
| ss_res = np.sum((targets[:, i] - preds[:, i]) ** 2) |
| ss_tot = np.sum((targets[:, i] - targets[:, i].mean()) ** 2) |
| results[name] = 1.0 - ss_res / (ss_tot + 1e-8) |
| return results |
|
|
|
|
| |
| |
| |
| def train_one_fold(fold, train_df, val_df, train_targets, val_targets, train_img_dir): |
| print(f"\n{'='*60}") |
| print(f"FOLD {fold}") |
| print(f"Train: {len(train_df)}, Val: {len(val_df)}") |
| print(f"{'='*60}") |
| |
| device = torch.device(CFG.DEVICE) |
| |
| |
| train_ds = BiomassDataset( |
| train_img_dir, train_df, train_targets, |
| transform=get_transforms(CFG.IMG_SIZE, True, CFG.AUG_STRENGTH), |
| log_transform=CFG.LOG_TRANSFORM, |
| ) |
| val_ds = BiomassDataset( |
| train_img_dir, val_df, val_targets, |
| transform=get_transforms(CFG.IMG_SIZE, False), |
| log_transform=CFG.LOG_TRANSFORM, |
| ) |
| |
| |
| if CFG.USE_LDS: |
| sample_weights = get_lds_weights( |
| train_targets[TARGET_COLS].values, CFG.LDS_BINS, CFG.LDS_KERNEL_SIZE, CFG.LDS_SIGMA) |
| sampler = WeightedRandomSampler(sample_weights, len(train_ds), replacement=True) |
| train_loader = DataLoader(train_ds, batch_size=CFG.BATCH_SIZE, sampler=sampler, |
| num_workers=CFG.NUM_WORKERS, pin_memory=True, drop_last=True) |
| else: |
| train_loader = DataLoader(train_ds, batch_size=CFG.BATCH_SIZE, shuffle=True, |
| num_workers=CFG.NUM_WORKERS, pin_memory=True, drop_last=True) |
| |
| val_loader = DataLoader(val_ds, batch_size=CFG.BATCH_SIZE * 2, shuffle=False, |
| num_workers=CFG.NUM_WORKERS, pin_memory=True) |
| |
| |
| model = BiomassModel( |
| CFG.BACKBONE, 5, CFG.HIDDEN_DIM, CFG.DROPOUT, True, CFG.IMG_SIZE, |
| separate_heads=CFG.SEPARATE_HEADS, grad_checkpointing=CFG.GRAD_CHECKPOINTING, |
| ).to(device) |
| |
| print(f"Parameters: {sum(p.numel() for p in model.parameters()):,}") |
| |
| |
| optimizer = torch.optim.AdamW( |
| model.get_param_groups(CFG.BACKBONE_LR, CFG.HEAD_LR), |
| weight_decay=CFG.WEIGHT_DECAY, |
| ) |
| |
| n_steps = len(train_loader) * CFG.EPOCHS // CFG.GRAD_ACCUM |
| warmup_steps = int(n_steps * CFG.WARMUP_RATIO) |
| |
| warmup = torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=0.01, total_iters=warmup_steps) |
| cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_steps - warmup_steps, eta_min=CFG.MIN_LR) |
| scheduler = torch.optim.lr_scheduler.SequentialLR(optimizer, [warmup, cosine], milestones=[warmup_steps]) |
| |
| loss_fn = CombinedLoss(CFG.MSE_WEIGHT, CFG.CONSISTENCY_WEIGHT).to(device) |
| scaler = GradScaler() if device.type == 'cuda' else None |
| |
| |
| best_r2 = -float('inf') |
| patience = 0 |
| save_path = CFG.OUTPUT_DIR / f'fold_{fold}' |
| save_path.mkdir(parents=True, exist_ok=True) |
| |
| for epoch in range(1, CFG.EPOCHS + 1): |
| t0 = time.time() |
| |
| |
| model.train() |
| running_loss = 0 |
| n_samples = 0 |
| |
| for batch_idx, batch in enumerate(train_loader): |
| images = batch['image'].to(device) |
| targets = batch['targets'].to(device) |
| ndvi = batch.get('ndvi', None) |
| if ndvi is not None: |
| ndvi = ndvi.to(device) |
| |
| if scaler: |
| with autocast(dtype=torch.float16): |
| preds = model(images, ndvi) |
| loss = loss_fn(preds, targets) / CFG.GRAD_ACCUM |
| scaler.scale(loss).backward() |
| |
| if (batch_idx + 1) % CFG.GRAD_ACCUM == 0: |
| scaler.unscale_(optimizer) |
| nn.utils.clip_grad_norm_(model.parameters(), CFG.MAX_GRAD_NORM) |
| scaler.step(optimizer) |
| scaler.update() |
| optimizer.zero_grad() |
| scheduler.step() |
| else: |
| preds = model(images, ndvi) |
| loss = loss_fn(preds, targets) / CFG.GRAD_ACCUM |
| loss.backward() |
| |
| if (batch_idx + 1) % CFG.GRAD_ACCUM == 0: |
| nn.utils.clip_grad_norm_(model.parameters(), CFG.MAX_GRAD_NORM) |
| optimizer.step() |
| optimizer.zero_grad() |
| scheduler.step() |
| |
| running_loss += loss.item() * CFG.GRAD_ACCUM * images.size(0) |
| n_samples += images.size(0) |
| |
| train_loss = running_loss / n_samples |
| |
| |
| model.eval() |
| val_preds, val_targets_arr = [], [] |
| val_loss = 0 |
| val_n = 0 |
| |
| with torch.no_grad(): |
| for batch in val_loader: |
| images = batch['image'].to(device) |
| targets = batch['targets'].to(device) |
| ndvi = batch.get('ndvi', None) |
| if ndvi is not None: |
| ndvi = ndvi.to(device) |
| |
| if scaler: |
| with autocast(dtype=torch.float16): |
| preds = model(images, ndvi) |
| loss = loss_fn(preds, targets) |
| else: |
| preds = model(images, ndvi) |
| loss = loss_fn(preds, targets) |
| |
| val_loss += loss.item() * images.size(0) |
| val_n += images.size(0) |
| val_preds.append(preds.cpu().numpy()) |
| val_targets_arr.append(targets.cpu().numpy()) |
| |
| val_loss /= val_n |
| val_preds = np.concatenate(val_preds) |
| val_targets_arr = np.concatenate(val_targets_arr) |
| |
| |
| if CFG.LOG_TRANSFORM: |
| val_preds_orig = np.expm1(val_preds) |
| val_targets_orig = np.expm1(val_targets_arr) |
| else: |
| val_preds_orig = val_preds |
| val_targets_orig = val_targets_arr |
| |
| val_preds_orig = np.clip(val_preds_orig, 0, None) |
| |
| r2 = compute_weighted_r2(val_preds_orig, val_targets_orig) |
| per_r2 = compute_per_target_r2(val_preds_orig, val_targets_orig) |
| |
| elapsed = time.time() - t0 |
| lr = optimizer.param_groups[0]['lr'] |
| |
| print(f"Epoch {epoch:02d}/{CFG.EPOCHS} | " |
| f"train_loss={train_loss:.4f} | val_loss={val_loss:.4f} | " |
| f"R²={r2:.4f} | lr={lr:.2e} | {elapsed:.0f}s") |
| for name, val in per_r2.items(): |
| print(f" {name}: {val:.4f}") |
| |
| |
| if r2 > best_r2: |
| best_r2 = r2 |
| patience = 0 |
| torch.save({ |
| 'epoch': epoch, |
| 'model_state_dict': model.state_dict(), |
| 'weighted_r2': best_r2, |
| 'per_target_r2': per_r2, |
| 'args': { |
| 'backbone': CFG.BACKBONE, |
| 'img_size': CFG.IMG_SIZE, |
| 'hidden_dim': CFG.HIDDEN_DIM, |
| 'dropout': CFG.DROPOUT, |
| 'separate_heads': CFG.SEPARATE_HEADS, |
| 'log_transform': CFG.LOG_TRANSFORM, |
| 'use_ndvi': False, |
| }, |
| }, save_path / 'best_model.pth') |
| print(f" *** New best R²={best_r2:.4f} ***") |
| else: |
| patience += 1 |
| if patience >= CFG.PATIENCE: |
| print(f"Early stopping at epoch {epoch}") |
| break |
| |
| print(f"\nFold {fold} best R²: {best_r2:.4f}") |
| return best_r2 |
|
|
|
|
| |
| |
| |
| if train_df is not None and train_img_dir is not None: |
| targets = train_df[TARGET_COLS].copy() |
| bins = pd.qcut(targets['Dry_Total_g'], q=min(10, CFG.N_FOLDS), labels=False, duplicates='drop') |
| |
| kf = StratifiedKFold(n_splits=CFG.N_FOLDS, shuffle=True, random_state=CFG.SEED) |
| fold_scores = [] |
| |
| for fold_idx, (train_idx, val_idx) in enumerate(kf.split(train_df, bins)): |
| if fold_idx not in CFG.TRAIN_FOLDS: |
| continue |
| |
| fold_train_df = train_df.iloc[train_idx] |
| fold_val_df = train_df.iloc[val_idx] |
| fold_train_targets = targets.iloc[train_idx] |
| fold_val_targets = targets.iloc[val_idx] |
| |
| score = train_one_fold( |
| fold_idx, fold_train_df, fold_val_df, |
| fold_train_targets, fold_val_targets, str(train_img_dir), |
| ) |
| fold_scores.append(score) |
| |
| print(f"\n{'='*60}") |
| print(f"All folds: {[f'{s:.4f}' for s in fold_scores]}") |
| print(f"Mean R²: {np.mean(fold_scores):.4f} ± {np.std(fold_scores):.4f}") |
| print(f"{'='*60}") |
| else: |
| print("Training data not found! Check DATA_DIR setting.") |
|
|
|
|
| |
| |
| |
| training_info = { |
| 'backbone': CFG.BACKBONE, |
| 'img_size': CFG.IMG_SIZE, |
| 'n_folds': CFG.N_FOLDS, |
| 'fold_scores': fold_scores if 'fold_scores' in dir() else [], |
| 'mean_r2': float(np.mean(fold_scores)) if 'fold_scores' in dir() and fold_scores else 0, |
| } |
| with open(CFG.OUTPUT_DIR / 'training_info.json', 'w') as f: |
| json.dump(training_info, f, indent=2) |
|
|
| print("\nTraining complete! Models saved to:", CFG.OUTPUT_DIR) |
| print("Next step: Use inference.py or the inference notebook to generate submission.csv") |
|
|