Spaces:

dAISYTUIlmenau
/

LTN_Classification

Running

App Files Files Community

chenchangliu commited on Feb 17

Commit

0f3a64e

verified ·

1 Parent(s): a38d768

Delete code_EffNet_train_best.py

Browse files

Files changed (1) hide show

code_EffNet_train_best.py +0 -423

code_EffNet_train_best.py DELETED Viewed

@@ -1,423 +0,0 @@
-#!/usr/bin/env python3
-from __future__ import annotations
-# Two-head EfficientNet classifier (multi-task) with:
-# - Dataset: reads image crops from DATA_ROOT/{train,val,test} and labels from sidecar .txt files
-#   (each .txt contains: "species_id state_id")
-# - Augmentation (train only): resize to IMG_SIZE, random horizontal flip, random 0/90/180/270 rotation,
-#   mild ColorJitter (lighting/camera variation), and small translate/scale jitter via RandomAffine
-# - Transfer learning: EfficientNet-B0 pretrained backbone shared by two classification heads
-#   (species head: NUM_SPECIES classes, state head: NUM_STATES classes)
-# - Optimization: AdamW with separate learning rates for backbone vs heads (LR_BACKBONE, LR_HEADS)
-# - Warm-up: freeze backbone for the first FREEZE_EPOCHS epochs, then unfreeze and fine-tune end-to-end
-# - LR schedule: CosineAnnealingLR applied only after unfreezing (T_max = EPOCHS - FREEZE_EPOCHS)
-# - Logging: W&B (self-hosted) logs per-head losses/accuracies, combined accuracy, and current LR values
-# - Checkpointing: saves best.pt (by combined val accuracy = mean of two head accuracies) and last.pt
-### To prevent overfitting after 15/20 epochs:
-# - Added: label smoothing to prevent overfitting: ce = nn.CrossEntropyLoss(label_smoothing=0.05)
-# - RandomErasing applied AFTER normalization, because it expects a tensor
-# - Increased dropout
-# - Increased FREEZE_EPOCHS
-# - Reduced color augmentation, use very small numbers
-# - Reduced LR_HEADS
-# - Try freezing batch norm
-from pathlib import Path
-import os
-import torch
-import torch.nn as nn
-from torch.utils.data import Dataset, DataLoader
-from torchvision.io import read_image
-from torchvision import transforms
-from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
-import wandb
-# ------------------ CONFIG ------------------
-DATA_ROOT = Path("LTN_crop_twohead")
-NUM_SPECIES = 12
-NUM_STATES = 4
-EPOCHS = 150
-BATCH = 32
-LR_BACKBONE = 3e-5
-LR_HEADS = 2e-4  # slightly reduced to reduce overfitting
-WEIGHT_DECAY = 1e-2
-FREEZE_EPOCHS = 15  # freeze backbone initially
-IMG_SIZE = 224
-WORKERS = 1
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# --------------------------------------------
-### Freeze batchNorm
-def set_bn_eval(m):
-    if isinstance(m, nn.BatchNorm2d):
-        m.eval()
-###-----
-class RandomRotate90:
-    """Random rotation by k * 90 degrees, k in {0,1,2,3}."""
-    def __call__(self, x: torch.Tensor) -> torch.Tensor:
-        k = torch.randint(0, 4, (1,)).item()
-        return torch.rot90(x, k, dims=[1, 2])  # rotate H,W for CHW tensor
-class TwoHeadCrops(Dataset):
-    """
-    Expects layout:
-      LTN_crop_twohead/{train,val,test}/.../*.jpg
-      LTN_crop_twohead/{train,val,test}/.../*.txt  (contains: "species_id state_id")
-    The folder name is ignored. Labels come from the .txt next to each image.
-    """
-    def __init__(self, root: Path, split: str):
-        img_exts = {".jpg", ".jpeg", ".png"}
-        paths = []
-        for p in (root / split).rglob("*"):
-            if p.is_dir():
-                continue
-            if p.suffix.lower() not in img_exts:
-                continue
-            if any(part.startswith(".") for part in p.parts):
-                continue
-            if not p.with_suffix(".txt").exists():
-                continue
-            paths.append(p)
-        self.img_paths = sorted(paths)
-        if not self.img_paths:
-            raise RuntimeError(f"No images found under: {root / split}")
-        # Augmentation only for training split
-        if split == "train":
-            self.tfm = transforms.Compose([
-                transforms.Resize((IMG_SIZE, IMG_SIZE), antialias=True),
-                transforms.RandomHorizontalFlip(p=0.5),
-                RandomRotate90(), # 90, 180, 270
-                transforms.RandomApply([
-                    transforms.RandomAffine(
-                        degrees=0,              # no arbitrary angle
-                        translate=(0.02, 0.02),  # small shift
-                        scale=(0.95, 1.05),      # small zoom
-                        shear=None,
-                    )
-                ], p=0.5),
-                transforms.ColorJitter( ### augmentation in both position and color
-                    brightness=0.05,
-                    contrast=0.02,
-                    saturation=0.02,
-                    hue=0.01,
-                ),
-                transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                                     std=[0.229, 0.224, 0.225]),
-                transforms.RandomErasing( ### Add random erasing 25% of images affected, earsed area: 2–10%
-                    p=0.20,
-                    scale=(0.01, 0.05),
-                    ratio=(0.5, 2.0),
-                    value=0
-                ),
-            ])
-        else:
-            self.tfm = transforms.Compose([
-                transforms.Resize((IMG_SIZE, IMG_SIZE), antialias=True),
-                transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                                     std=[0.229, 0.224, 0.225]),
-            ])
-    def __len__(self) -> int:
-        return len(self.img_paths)
-    def __getitem__(self, i: int):
-        p = self.img_paths[i]
-        x = read_image(str(p)).float() / 255.0  # CHW in [0..1]
-        x = self.tfm(x)
-        lab = p.with_suffix(".txt").read_text(encoding="utf-8").strip().split()
-        species_id = int(lab[0])
-        state_id = int(lab[1])
-        return x, torch.tensor(species_id, dtype=torch.long), torch.tensor(state_id, dtype=torch.long)
-class EffNetTwoHead(nn.Module):
-    """EfficientNet backbone + two classification heads."""
-    def __init__(self, num_species: int, num_states: int, pretrained: bool = True):
-        super().__init__()
-        base = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT if pretrained else None)
-        self.features = base.features
-        self.pool = base.avgpool
-        c = base.classifier[1].in_features
-        self.drop = nn.Dropout(0.3)
-        self.head_species = nn.Linear(c, num_species)
-        self.head_state = nn.Linear(c, num_states)
-    def forward(self, x: torch.Tensor):
-        x = self.features(x)
-        x = self.pool(x)
-        x = torch.flatten(x, 1)
-        x = self.drop(x)
-        return self.head_species(x), self.head_state(x)
-@torch.no_grad()
-def eval_one_epoch(model: nn.Module, loader: DataLoader, ce):
-    """
-    Evaluate:
-    - total loss and per-head losses
-    - per-head accuracies
-    """
-    model.eval()
-    loss_sum_total = 0.0
-    loss_sum_sp = 0.0
-    loss_sum_st = 0.0
-    n = 0
-    correct_sp = 0
-    correct_st = 0
-    for x, ysp, yst in loader:
-        x = x.to(DEVICE, non_blocking=True)
-        ysp = ysp.to(DEVICE, non_blocking=True)
-        yst = yst.to(DEVICE, non_blocking=True)
-        lsp, lst = model(x)
-        loss_sp = ce(lsp, ysp)
-        loss_st = ce(lst, yst)
-        loss = loss_sp + loss_st
-        bs = x.size(0)
-        loss_sum_total += float(loss.item()) * bs
-        loss_sum_sp += float(loss_sp.item()) * bs
-        loss_sum_st += float(loss_st.item()) * bs
-        n += bs
-        correct_sp += int((lsp.argmax(1) == ysp).sum().item())
-        correct_st += int((lst.argmax(1) == yst).sum().item())
-    val_loss = loss_sum_total / max(1, n)
-    val_loss_sp = loss_sum_sp / max(1, n)
-    val_loss_st = loss_sum_st / max(1, n)
-    val_acc_sp = correct_sp / max(1, n)
-    val_acc_st = correct_st / max(1, n)
-    return val_loss, val_loss_sp, val_loss_st, val_acc_sp, val_acc_st
-def main():
-    # W&B setup (self-hosted server)
-    os.environ.setdefault("WANDB_BASE_URL", "http://k8s.tu-ilmenau.de:31020")
-    run = wandb.init(
-        project="EffNetCls",
-        entity="mase-students",
-        config={
-            "epochs": EPOCHS,
-            "batch": BATCH,
-            "lr_backbone": LR_BACKBONE,
-            "lr_heads": LR_HEADS,
-            "weight_decay": WEIGHT_DECAY,
-            "freeze_epochs": FREEZE_EPOCHS,
-            "img_size": IMG_SIZE,
-        },
-    )
-    # Data
-    train_ds = TwoHeadCrops(DATA_ROOT, "train")
-    val_ds = TwoHeadCrops(DATA_ROOT, "val")
-    train_loader = DataLoader(
-        train_ds,
-        batch_size=BATCH,
-        shuffle=True,
-        num_workers=WORKERS,
-        pin_memory=True,
-    )
-    val_loader = DataLoader(
-        val_ds,
-        batch_size=BATCH,
-        shuffle=False,
-        num_workers=WORKERS,
-        pin_memory=True,
-    )
-    # Model
-    model = EffNetTwoHead(NUM_SPECIES, NUM_STATES, pretrained=True).to(DEVICE)
-    # Freeze backbone initially (heads learn first, then fine-tune backbone)
-    for p in model.features.parameters():
-        p.requires_grad = False
-    # Optimizer with separate LR groups
-    opt = torch.optim.AdamW(
-        [
-            {"params": model.features.parameters(), "lr": LR_BACKBONE},
-            {"params": model.pool.parameters(), "lr": LR_BACKBONE},
-            {"params": model.drop.parameters(), "lr": LR_BACKBONE},
-            {"params": model.head_species.parameters(), "lr": LR_HEADS},
-            {"params": model.head_state.parameters(), "lr": LR_HEADS},
-        ],
-        weight_decay=WEIGHT_DECAY,
-    )
-    # LR starts high, smooth cosine decay across the whole training
-    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
-        opt,
-        T_max=EPOCHS - FREEZE_EPOCHS,
-        eta_min=1e-6,
-    )
-    ce_train = nn.CrossEntropyLoss(label_smoothing=0.05) # apply label smoothing only for training
-    ce_val = nn.CrossEntropyLoss()
-    # Save best by combined accuracy; always save last at end
-    best_acc = -1.0
-    for epoch in range(1, EPOCHS + 1):
-        # Unfreeze backbone after warm-up
-        if epoch == FREEZE_EPOCHS + 1:
-            for p in model.features.parameters():
-                p.requires_grad = True
-            print(f"[epoch {epoch:03d}] Backbone unfrozen")
-        # ---- Train one epoch ----
-        model.train()
-        # Freeze BatchNorm running statistics after unfreezing the backbone
-        if epoch > FREEZE_EPOCHS:
-            model.apply(set_bn_eval)
-        ########
-        loss_sum_total = 0.0
-        loss_sum_sp = 0.0
-        loss_sum_st = 0.0
-        n = 0
-        correct_sp = 0
-        correct_st = 0
-        for x, ysp, yst in train_loader:
-            x = x.to(DEVICE, non_blocking=True)
-            ysp = ysp.to(DEVICE, non_blocking=True)
-            yst = yst.to(DEVICE, non_blocking=True)
-            opt.zero_grad(set_to_none=True)
-            lsp, lst = model(x)
-            # per-head losses
-            loss_sp = ce_train(lsp, ysp)
-            loss_st = ce_train(lst, yst)
-            loss = loss_sp + loss_st
-            loss.backward()
-            opt.step()
-            bs = x.size(0)
-            loss_sum_total += float(loss.item()) * bs
-            loss_sum_sp += float(loss_sp.item()) * bs
-            loss_sum_st += float(loss_st.item()) * bs
-            n += bs
-            correct_sp += int((lsp.argmax(1) == ysp).sum().item())
-            correct_st += int((lst.argmax(1) == yst).sum().item())
-        train_loss = loss_sum_total / max(1, n)
-        train_loss_sp = loss_sum_sp / max(1, n)
-        train_loss_st = loss_sum_st / max(1, n)
-        train_acc_sp = correct_sp / max(1, n)
-        train_acc_st = correct_st / max(1, n)
-        # ---- Validate ----
-        val_loss, val_loss_sp, val_loss_st, val_acc_sp, val_acc_st = eval_one_epoch(model, val_loader, ce_val)
-        # after unfreeze, avoids “wasting” cosine decay while backbone is frozen.
-        if epoch > FREEZE_EPOCHS:
-            scheduler.step()
-        # Read current LRs after scheduler.step()
-        lr_backbone = opt.param_groups[0]["lr"]
-        lr_heads = opt.param_groups[-1]["lr"]
-        combined_acc = 0.5 * (val_acc_sp + val_acc_st)
-        # ---- Print per-epoch summary ----
-        print(
-            f"epoch {epoch:03d} | "
-            f"train_loss={train_loss:.4f} (sp={train_loss_sp:.4f}, st={train_loss_st:.4f}) | "
-            f"train_acc_sp={train_acc_sp:.3f} | train_acc_st={train_acc_st:.3f} | "
-            f"val_loss={val_loss:.4f} (sp={val_loss_sp:.4f}, st={val_loss_st:.4f}) | "
-            f"val_acc_sp={val_acc_sp:.3f} | val_acc_st={val_acc_st:.3f} | "
-            f"val_acc_combined={combined_acc:.3f} | "
-            f"lr_backbone={lr_backbone:.6f} | lr_heads={lr_heads:.6f}"
-        )
-        # ---- W&B logging ----
-        wandb.log({
-            "epoch": epoch,
-            "train/loss_total": train_loss,
-            "train/loss_species": train_loss_sp,
-            "train/loss_state": train_loss_st,
-            "train/acc_species": train_acc_sp,
-            "train/acc_state": train_acc_st,
-            "val/loss_total": val_loss,
-            "val/loss_species": val_loss_sp,
-            "val/loss_state": val_loss_st,
-            "val/acc_species": val_acc_sp,
-            "val/acc_state": val_acc_st,
-            "val/acc_combined": combined_acc,
-            "lr/backbone": lr_backbone,
-            "lr/heads": lr_heads,
-        })
-        # ---- Save best checkpoint by combined accuracy ----
-        if combined_acc > best_acc:
-            best_acc = combined_acc
-            torch.save(
-                {
-                    "model": model.state_dict(),
-                    "epoch": epoch,
-                    "best_acc": best_acc,
-                    "val_acc_species": val_acc_sp,
-                    "val_acc_state": val_acc_st,
-                    "val_acc_combined": combined_acc,
-                    "num_species": NUM_SPECIES,
-                    "num_states": NUM_STATES,
-                    "img_size": IMG_SIZE,
-                },
-                "best.pt",
-            )
-    # Always save last checkpoint
-    torch.save(
-        {
-            "model": model.state_dict(),
-            "epoch": EPOCHS,
-            "num_species": NUM_SPECIES,
-            "num_states": NUM_STATES,
-            "img_size": IMG_SIZE,
-        },
-        "last.pt",
-    )
-    run.finish()
-    print("Done. Saved best.pt and last.pt")
-if __name__ == "__main__":
-    main()