Upload Adacrop MobileNetV3 distilled version

Browse files

Files changed (4) hide show

common.py +493 -0
student_best.pth +3 -0
student_last.pth +3 -0
train_mobilenet_distill.py +532 -0

common.py ADDED Viewed

	@@ -0,0 +1,493 @@

+import json
+import math
+import pathlib
+import random
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.transforms as T
+from PIL import Image
+from torch.utils.data import Dataset
+from torchvision import models
+ACTIONS = ["left", "right", "up", "down", "zoom_in", "zoom_out", "stop"]
+def find_adacrop_root() -> Path:
+    return Path(__file__).resolve().parents[1]
+def _strip_adacrop_prefix(path_text: str) -> str:
+    path_text = path_text.replace("\\", "/")
+    if path_text.startswith("./"):
+        path_text = path_text[2:]
+    if path_text.startswith("Adacrop/"):
+        path_text = path_text[len("Adacrop/") :]
+    return path_text
+def resolve_image_path(raw_path: str, adacrop_root: Path, source_file: Optional[Path] = None) -> Path:
+    """Resolve mixed project paths, including JSONL paths like ./outpainted/a.png."""
+    raw = str(raw_path).replace("\\", "/")
+    candidates: List[Path] = []
+    p = Path(raw)
+    if p.is_absolute():
+        candidates.append(p)
+    if source_file is not None:
+        candidates.append(source_file.parent / raw)
+        if raw.startswith("./"):
+            candidates.append(source_file.parent / raw[2:])
+    stripped = _strip_adacrop_prefix(raw)
+    candidates.append(adacrop_root / stripped)
+    candidates.append(adacrop_root.parent / raw)
+    # Old merged JSONs may contain Adacrop/data/outpainted/foo.png, while this
+    # workspace stores those files under data/outpainted_dataset/outpainted.
+    if stripped.startswith("data/outpainted/"):
+        suffix = stripped[len("data/outpainted/") :]
+        candidates.append(adacrop_root / "data" / "outpainted_dataset" / "outpainted" / suffix)
+    # The outpainted JSONL stores paths as ./outpainted/foo.png relative to the
+    # JSONL file: data/outpainted_dataset/training_pairs.jsonl.
+    if stripped.startswith("outpainted/"):
+        candidates.append(adacrop_root / "data" / "outpainted_dataset" / stripped)
+    for cand in candidates:
+        if cand.exists():
+            return cand.resolve()
+    return candidates[0].resolve()
+def normalize_boxes(value) -> List[List[float]]:
+    if value is None:
+        return []
+    if isinstance(value, dict):
+        if all(k in value for k in ("x1", "y1", "x2", "y2")):
+            return [[float(value["x1"]), float(value["y1"]), float(value["x2"]), float(value["y2"])]]
+        if all(k in value for k in ("x", "y", "w", "h")):
+            x, y, w, h = float(value["x"]), float(value["y"]), float(value["w"]), float(value["h"])
+            return [[x, y, x + w, y + h]]
+        return []
+    if isinstance(value, (list, tuple)):
+        if len(value) == 4 and all(isinstance(v, (int, float)) for v in value):
+            return [[float(v) for v in value]]
+        boxes: List[List[float]] = []
+        for item in value:
+            boxes.extend(normalize_boxes(item))
+        return boxes
+    return []
+def canonical_box_xyxy(box: Sequence[float], width: int, height: int, img_path: Optional[str] = None) -> List[float]:
+    """Return a pixel-space [x1,y1,x2,y2] box.
+    The outpainted JSONL is xyxy, while the CUHK split files in this workspace
+    use yxyx-like coordinates. Use the image path when it is unambiguous, then
+    fall back to bounds checks.
+    """
+    a, b, c, d = [float(v) for v in box]
+    path_text = (img_path or "").replace("\\", "/").lower()
+    if "cuhk_images" in path_text:
+        x1, y1, x2, y2 = b, a, d, c
+    elif "outpainted" in path_text or "gaic_dataset" in path_text:
+        x1, y1, x2, y2 = a, b, c, d
+    else:
+        xyxy_valid = 0 <= a < c <= width and 0 <= b < d <= height
+        yxyx_valid = 0 <= b < d <= width and 0 <= a < c <= height
+        if yxyx_valid and not xyxy_valid:
+            x1, y1, x2, y2 = b, a, d, c
+        else:
+            x1, y1, x2, y2 = a, b, c, d
+    x1, x2 = sorted([x1, x2])
+    y1, y2 = sorted([y1, y2])
+    x1 = min(max(0.0, x1), float(width))
+    x2 = min(max(0.0, x2), float(width))
+    y1 = min(max(0.0, y1), float(height))
+    y2 = min(max(0.0, y2), float(height))
+    if x2 <= x1:
+        x2 = min(float(width), x1 + 1.0)
+    if y2 <= y1:
+        y2 = min(float(height), y1 + 1.0)
+    return [x1, y1, x2, y2]
+def load_records(path: Path, adacrop_root: Path, require_images: bool = True) -> List[Dict]:
+    path = Path(path)
+    rows: List[Dict] = []
+    if path.suffix.lower() == ".jsonl":
+        with path.open("r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    rows.append(json.loads(line))
+    else:
+        with path.open("r", encoding="utf-8") as f:
+            rows = json.load(f)
+    records: List[Dict] = []
+    for row in rows:
+        raw_img = row.get("img") or row.get("file")
+        if not raw_img:
+            continue
+        img_path = resolve_image_path(raw_img, adacrop_root, source_file=path)
+        if require_images and not img_path.exists():
+            continue
+        boxes = normalize_boxes(row.get("box") or row.get("boxes") or row.get("orig_bbox"))
+        records.append({"img": str(img_path), "boxes": boxes, "raw": row})
+    return records
+def resnet50_no_weights():
+    try:
+        return models.resnet50(weights=None)
+    except TypeError:
+        return models.resnet50(pretrained=False)
+def mobilenet_v3_no_weights(arch: str):
+    if arch == "mobilenet_v3_large":
+        try:
+            return models.mobilenet_v3_large(weights=None)
+        except TypeError:
+            return models.mobilenet_v3_large(pretrained=False)
+    if arch == "mobilenet_v3_small":
+        try:
+            return models.mobilenet_v3_small(weights=None)
+        except TypeError:
+            return models.mobilenet_v3_small(pretrained=False)
+    raise ValueError(f"Unsupported student arch: {arch}")
+class TeacherActorCritic(nn.Module):
+    def __init__(self, n_actions: int = len(ACTIONS)):
+        super().__init__()
+        self.backbone = resnet50_no_weights()
+        self.backbone.fc = nn.Identity()
+        feat_dim = 2048
+        self.actor = nn.Sequential(
+            nn.Linear(feat_dim + 4, 1024),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(1024, 512),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            nn.Linear(512, n_actions),
+        )
+        self.critic = nn.Sequential(
+            nn.Linear(feat_dim + 4, 1024),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(1024, 512),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            nn.Linear(512, 1),
+        )
+        self.bbox_head = nn.Sequential(nn.Linear(feat_dim, 512), nn.ReLU(), nn.Linear(512, 4))
+    def forward(self, img_tensor: torch.Tensor, state: torch.Tensor):
+        feats = self.backbone(img_tensor)
+        x = torch.cat([feats, state], dim=1)
+        logits = self.actor(x)
+        return F.softmax(logits, dim=1), self.critic(x)
+    def backbone_forward(self, img_tensor: torch.Tensor):
+        feats = self.backbone(img_tensor)
+        return self.bbox_head(feats)
+class MobileNetPolicy(nn.Module):
+    def __init__(self, arch: str = "mobilenet_v3_small", n_actions: int = len(ACTIONS)):
+        super().__init__()
+        base = mobilenet_v3_no_weights(arch)
+        self.arch = arch
+        self.features = base.features
+        self.avgpool = base.avgpool
+        feat_dim = base.classifier[0].in_features
+        self.actor = nn.Sequential(
+            nn.Linear(feat_dim + 4, 512),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            nn.Linear(512, 256),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(256, n_actions),
+        )
+        self.bbox_head = nn.Sequential(
+            nn.Linear(feat_dim, 256),
+            nn.ReLU(),
+            nn.Dropout(0.1),
+            nn.Linear(256, 4),
+        )
+    def extract_feats(self, img_tensor: torch.Tensor):
+        feats = self.features(img_tensor)
+        feats = self.avgpool(feats)
+        return torch.flatten(feats, 1)
+    def forward(self, img_tensor: torch.Tensor, state: torch.Tensor):
+        feats = self.extract_feats(img_tensor)
+        logits = self.actor(torch.cat([feats, state], dim=1))
+        return F.softmax(logits, dim=1), logits
+    def backbone_forward(self, img_tensor: torch.Tensor):
+        feats = self.extract_feats(img_tensor)
+        return torch.sigmoid(self.bbox_head(feats))
+def load_teacher(ckpt_path: Path, device: torch.device) -> TeacherActorCritic:
+    ckpt = torch_load_portable(ckpt_path)
+    state_dict = ckpt.get("model_state_dict", ckpt) if isinstance(ckpt, dict) else ckpt
+    model = TeacherActorCritic(n_actions=len(ACTIONS))
+    missing, unexpected = model.load_state_dict(state_dict, strict=False)
+    if unexpected:
+        print(f"[teacher] unexpected keys: {unexpected[:8]}")
+    missing_required = [k for k in missing if not k.startswith("critic.") and not k.startswith("bbox_head.")]
+    if missing_required:
+        raise RuntimeError(f"Teacher checkpoint missing required keys: {missing_required[:8]}")
+    return model.to(device).eval()
+def load_student(ckpt_path: Path, device: torch.device, arch: Optional[str] = None) -> MobileNetPolicy:
+    ckpt = torch_load_portable(ckpt_path)
+    ckpt_arch = ckpt.get("arch", arch or "mobilenet_v3_small")
+    model = MobileNetPolicy(arch=ckpt_arch, n_actions=len(ACTIONS))
+    state_dict = ckpt.get("model_state_dict", ckpt)
+    model.load_state_dict(state_dict)
+    return model.to(device).eval()
+def torch_load_portable(ckpt_path: Path):
+    try:
+        return torch.load(ckpt_path, map_location="cpu", weights_only=False)
+    except NotImplementedError as exc:
+        if "WindowsPath" not in str(exc):
+            raise
+        # Checkpoints saved on Windows may pickle pathlib.WindowsPath inside
+        # metadata such as args. On POSIX, remap it before loading.
+        pathlib.WindowsPath = pathlib.PosixPath
+        return torch.load(ckpt_path, map_location="cpu", weights_only=False)
+def xyxy_to_xywh(box: Sequence[float]) -> List[float]:
+    x1, y1, x2, y2 = [float(v) for v in box]
+    x1, x2 = sorted([x1, x2])
+    y1, y2 = sorted([y1, y2])
+    return [x1, y1, max(1.0, x2 - x1), max(1.0, y2 - y1)]
+def xywh_to_xyxy(box: Sequence[float]) -> List[float]:
+    x, y, w, h = [float(v) for v in box]
+    return [x, y, x + w, y + h]
+def box_iou_xyxy(a: Sequence[float], b: Sequence[float]) -> float:
+    ax1, ay1, ax2, ay2 = [float(v) for v in a]
+    bx1, by1, bx2, by2 = [float(v) for v in b]
+    ix1, iy1 = max(ax1, bx1), max(ay1, by1)
+    ix2, iy2 = min(ax2, bx2), min(ay2, by2)
+    iw, ih = max(0.0, ix2 - ix1), max(0.0, iy2 - iy1)
+    inter = iw * ih
+    area_a = max(0.0, ax2 - ax1) * max(0.0, ay2 - ay1)
+    area_b = max(0.0, bx2 - bx1) * max(0.0, by2 - by1)
+    union = area_a + area_b - inter
+    return 0.0 if union <= 1e-8 else inter / union
+def clamp_xywh(box: Sequence[float], width: int, height: int, delta: float = 0.05) -> List[float]:
+    x, y, w, h = [float(v) for v in box]
+    min_size = max(10.0, min(width, height) * 0.05)
+    w = max(min_size, min(w, float(width)))
+    h = max(min_size, min(h, float(height)))
+    x = min(max(0.0, x), float(width) - w)
+    y = min(max(0.0, y), float(height) - h)
+    w = max(min_size, min(float(width) - x, max(w, delta * width)))
+    h = max(min_size, min(float(height) - y, max(h, delta * height)))
+    return [x, y, w, h]
+def random_box(width: int, height: int) -> List[float]:
+    ratio = width / max(1, height)
+    scale = random.uniform(0.3, 0.8)
+    if ratio >= 1:
+        w = max(10.0, width * scale)
+        h = max(10.0, w / ratio)
+    else:
+        h = max(10.0, height * scale)
+        w = max(10.0, h * ratio)
+    x = random.uniform(0.0, max(1.0, width - w))
+    y = random.uniform(0.0, max(1.0, height - h))
+    return clamp_xywh([x, y, w, h], width, height)
+def jitter_box(box_xywh: Sequence[float], width: int, height: int, jitter: float = 0.12) -> List[float]:
+    x, y, w, h = [float(v) for v in box_xywh]
+    x += random.uniform(-jitter, jitter) * width
+    y += random.uniform(-jitter, jitter) * height
+    w *= random.uniform(1.0 - jitter, 1.0 + jitter)
+    h *= random.uniform(1.0 - jitter, 1.0 + jitter)
+    return clamp_xywh([x, y, w, h], width, height)
+def box_state(box_xywh: Sequence[float], width: int, height: int) -> torch.Tensor:
+    x, y, w, h = [float(v) for v in box_xywh]
+    state = [
+        (x + 0.5 * w) / max(1.0, width),
+        (y + 0.5 * h) / max(1.0, height),
+        w / max(1.0, width),
+        h / max(1.0, height),
+    ]
+    if not all(math.isfinite(v) for v in state):
+        state = [0.5, 0.5, 0.6, 0.6]
+    return torch.tensor(state, dtype=torch.float32)
+def render_crop(img: Image.Image, box_xywh: Sequence[float], img_size: int) -> torch.Tensor:
+    x, y, w, h = [float(v) for v in box_xywh]
+    crop = img.crop((x, y, x + w, y + h)).resize((img_size, img_size))
+    return T.ToTensor()(crop)
+def render_full_image(img: Image.Image, img_size: int) -> torch.Tensor:
+    return T.ToTensor()(img.resize((img_size, img_size)))
+def bbox_target_from_xyxy(box_xyxy: Sequence[float], width: int, height: int) -> torch.Tensor:
+    x1, y1, x2, y2 = [float(v) for v in box_xyxy]
+    x1, x2 = sorted([x1, x2])
+    y1, y2 = sorted([y1, y2])
+    target = [
+        ((x1 + x2) * 0.5) / max(1.0, width),
+        ((y1 + y2) * 0.5) / max(1.0, height),
+        max(1.0, x2 - x1) / max(1.0, width),
+        max(1.0, y2 - y1) / max(1.0, height),
+    ]
+    return torch.tensor([min(1.0, max(0.0, v)) for v in target], dtype=torch.float32)
+def bbox_cxcywh_to_xyxy(box_cxcywh: Sequence[float], width: int, height: int) -> List[float]:
+    cx, cy, w, h = [float(v) for v in box_cxcywh]
+    bw = w * width
+    bh = h * height
+    x1 = cx * width - 0.5 * bw
+    y1 = cy * height - 0.5 * bh
+    x2 = x1 + bw
+    y2 = y1 + bh
+    return [
+        min(max(0.0, x1), float(width)),
+        min(max(0.0, y1), float(height)),
+        min(max(0.0, x2), float(width)),
+        min(max(0.0, y2), float(height)),
+    ]
+def step_box(box_xywh: Sequence[float], action_idx: int, width: int, height: int, delta: float = 0.05) -> List[float]:
+    act = ACTIONS[int(action_idx)]
+    x, y, w, h = [float(v) for v in box_xywh]
+    dx, dy = delta * w, delta * h
+    cx, cy = x + 0.5 * w, y + 0.5 * h
+    if act == "left":
+        x = max(0.0, x - dx)
+    elif act == "right":
+        x = min(width - w, x + dx)
+    elif act == "up":
+        y = max(0.0, y - dy)
+    elif act == "down":
+        y = min(height - h, y + dy)
+    elif act == "zoom_in":
+        w *= 1.0 - delta
+        h *= 1.0 - delta
+        x = cx - 0.5 * w
+        y = cy - 0.5 * h
+    elif act == "zoom_out":
+        w *= 1.0 + delta
+        h *= 1.0 + delta
+        x = cx - 0.5 * w
+        y = cy - 0.5 * h
+    return clamp_xywh([x, y, w, h], width, height, delta=delta)
+class PolicyStateDataset(Dataset):
+    def __init__(
+        self,
+        records: Sequence[Dict],
+        img_size: int = 224,
+        samples_per_image: int = 1,
+        random_box_prob: float = 0.65,
+        jitter: float = 0.12,
+    ):
+        self.records = list(records)
+        self.img_size = int(img_size)
+        self.samples_per_image = max(1, int(samples_per_image))
+        self.random_box_prob = float(random_box_prob)
+        self.jitter = float(jitter)
+    def __len__(self) -> int:
+        return len(self.records) * self.samples_per_image
+    def __getitem__(self, idx: int):
+        rec = self.records[idx % len(self.records)]
+        img = Image.open(rec["img"]).convert("RGB")
+        width, height = img.size
+        boxes = rec.get("boxes") or []
+        if boxes and random.random() > self.random_box_prob:
+            gt_box = canonical_box_xyxy(random.choice(boxes), width, height, img_path=rec["img"])
+            box = jitter_box(xyxy_to_xywh(gt_box), width, height, jitter=self.jitter)
+        else:
+            box = random_box(width, height)
+        return render_crop(img, box, self.img_size), box_state(box, width, height)
+class BBoxDataset(Dataset):
+    def __init__(self, records: Sequence[Dict], img_size: int = 224, samples_per_image: int = 1):
+        self.records = [r for r in records if r.get("boxes")]
+        self.img_size = int(img_size)
+        self.samples_per_image = max(1, int(samples_per_image))
+    def __len__(self) -> int:
+        return len(self.records) * self.samples_per_image
+    def __getitem__(self, idx: int):
+        rec = self.records[idx % len(self.records)]
+        img = Image.open(rec["img"]).convert("RGB")
+        width, height = img.size
+        box = canonical_box_xyxy(random.choice(rec["boxes"]), width, height, img_path=rec["img"])
+        return render_full_image(img, self.img_size), bbox_target_from_xyxy(box, width, height)
+class BBoxEvalDataset(Dataset):
+    def __init__(self, records: Sequence[Dict], img_size: int = 224):
+        self.records = [r for r in records if r.get("boxes")]
+        self.img_size = int(img_size)
+    def __len__(self) -> int:
+        return len(self.records)
+    def __getitem__(self, idx: int):
+        rec = self.records[idx]
+        img = Image.open(rec["img"]).convert("RGB")
+        width, height = img.size
+        targets = torch.stack(
+            [
+                bbox_target_from_xyxy(canonical_box_xyxy(box, width, height, img_path=rec["img"]), width, height)
+                for box in rec["boxes"]
+            ]
+        )
+        return render_full_image(img, self.img_size), targets
+def soften_probs(probs: torch.Tensor, temperature: float) -> torch.Tensor:
+    if temperature <= 1.0:
+        return probs
+    softened = probs.clamp_min(1e-8).pow(1.0 / temperature)
+    return softened / softened.sum(dim=1, keepdim=True)

student_best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8754f42dba8ec738701aaca6893803bd8ebb6ce212f75e42da8e6186c54ebb1
+size 18336390

student_last.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14da7c12373975c86deb5d99cecedb17a9e2c98a5868a38e5f78e53394203225
+size 18336390

train_mobilenet_distill.py ADDED Viewed

	@@ -0,0 +1,532 @@

+import argparse
+import csv
+import time
+from itertools import cycle
+from pathlib import Path
+import torch
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+from common import (
+    ACTIONS,
+    BBoxDataset,
+    BBoxEvalDataset,
+    MobileNetPolicy,
+    PolicyStateDataset,
+    bbox_cxcywh_to_xyxy,
+    box_iou_xyxy,
+    find_adacrop_root,
+    load_records,
+    load_teacher,
+    soften_probs,
+)
+def parse_args():
+    root = find_adacrop_root()
+    parser = argparse.ArgumentParser(description="Two-stage distillation: BBox head + PPO actor policy.")
+    parser.add_argument("--teacher-ckpt", type=Path, default=root.parent / "ppo_best_val_final_score.pth")
+    parser.add_argument("--train-jsonl", type=Path, default=root / "data" / "outpainted_dataset" / "training_pairs.jsonl")
+    parser.add_argument("--val-json", type=Path, default=root / "data" / "splits" / "val_mixed.json")
+    parser.add_argument("--output-dir", type=Path, default=root / "distillation" / "runs")
+    parser.add_argument("--arch", choices=["mobilenet_v3_small", "mobilenet_v3_large"], default="mobilenet_v3_small")
+    parser.add_argument("--resume-student", type=Path, default=None, help="Load an existing student checkpoint before training.")
+    parser.add_argument("--skip-bbox-stage", action="store_true", help="Skip Stage 1 and go directly to Stage 2 policy distillation.")
+    parser.add_argument("--bbox-epochs", type=int, default=5, help="Stage 1 epochs for bbox head distillation/supervision.")
+    parser.add_argument("--epochs", type=int, default=10, help="Stage 2 epochs for actor policy distillation.")
+    parser.add_argument("--batch-size", type=int, default=64)
+    parser.add_argument("--bbox-batch-size", type=int, default=0, help="Stage 2 bbox regularization batch size; 0 uses --batch-size.")
+    parser.add_argument("--lr", type=float, default=1e-4)
+    parser.add_argument("--bbox-lr", type=float, default=1e-4)
+    parser.add_argument("--weight-decay", type=float, default=1e-4)
+    parser.add_argument("--num-workers", type=int, default=4)
+    parser.add_argument("--pin-memory", action="store_true", help="Enable DataLoader pinned memory. Off by default to reduce Windows CUDA OOM risk.")
+    parser.add_argument("--samples-per-image", type=int, default=1)
+    parser.add_argument("--max-train-images", type=int, default=0)
+    parser.add_argument("--max-val-images", type=int, default=512)
+    parser.add_argument("--img-size", type=int, default=224)
+    parser.add_argument("--random-box-prob", type=float, default=0.65)
+    parser.add_argument("--jitter", type=float, default=0.12)
+    parser.add_argument("--temperature", type=float, default=2.0)
+    parser.add_argument("--ce-weight", type=float, default=0.25)
+    parser.add_argument("--bbox-gt-weight", type=float, default=1.0)
+    parser.add_argument("--bbox-teacher-weight", type=float, default=0.25)
+    parser.add_argument("--stage2-bbox-weight", type=float, default=0.10)
+    parser.add_argument("--save-every", type=int, default=5)
+    parser.add_argument("--patience", type=int, default=8, help="Stage 2 early-stop patience in epochs; <=0 disables.")
+    parser.add_argument("--min-delta", type=float, default=1e-4)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu")
+    return parser.parse_args()
+def make_loader(dataset, batch_size, shuffle, num_workers, pin_memory=False, drop_last=False):
+    return DataLoader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=shuffle,
+        num_workers=num_workers,
+        pin_memory=bool(pin_memory),
+        drop_last=drop_last,
+    )
+def iou_from_cxcywh_batch(preds, targets):
+    preds = preds.detach().cpu().clamp(0.0, 1.0)
+    targets = targets.detach().cpu().clamp(0.0, 1.0)
+    ious = []
+    for pred, target in zip(preds, targets):
+        ious.append(box_iou_xyxy(bbox_cxcywh_to_xyxy(pred.tolist(), 1, 1), bbox_cxcywh_to_xyxy(target.tolist(), 1, 1)))
+    return sum(ious) / max(1, len(ious))
+def best_iou_against_targets(pred_box, target_boxes):
+    pred_xyxy = bbox_cxcywh_to_xyxy(pred_box.tolist(), 1, 1)
+    return max(box_iou_xyxy(pred_xyxy, bbox_cxcywh_to_xyxy(t.tolist(), 1, 1)) for t in target_boxes)
+@torch.no_grad()
+def validate_bbox(student, teacher, loader, device, bbox_gt_weight, bbox_teacher_weight):
+    student.eval()
+    teacher.eval()
+    total = 0
+    total_loss = 0.0
+    gt_loss_sum = 0.0
+    teacher_loss_sum = 0.0
+    gt_iou_sum = 0.0
+    teacher_iou_sum = 0.0
+    for imgs, targets in loader:
+        imgs = imgs.to(device, non_blocking=True)
+        targets = targets.to(device, non_blocking=True)
+        preds = student.backbone_forward(imgs)
+        teacher_preds = teacher.backbone_forward(imgs).clamp(0.0, 1.0)
+        if targets.ndim == 3:
+            # Evaluation records can have multiple acceptable GT boxes. Use the
+            # closest GT for loss, and best IoU for reporting.
+            per_box_l1 = torch.abs(preds.unsqueeze(1) - targets).mean(dim=2)
+            best_idx = per_box_l1.argmin(dim=1)
+            chosen_targets = targets[torch.arange(targets.size(0), device=targets.device), best_idx]
+        else:
+            chosen_targets = targets
+        gt_loss = F.smooth_l1_loss(preds, chosen_targets)
+        teacher_loss = F.smooth_l1_loss(preds, teacher_preds)
+        loss = bbox_gt_weight * gt_loss + bbox_teacher_weight * teacher_loss
+        bs = imgs.size(0)
+        total += bs
+        total_loss += loss.item() * bs
+        gt_loss_sum += gt_loss.item() * bs
+        teacher_loss_sum += teacher_loss.item() * bs
+        if targets.ndim == 3:
+            preds_cpu = preds.detach().cpu().clamp(0.0, 1.0)
+            teacher_cpu = teacher_preds.detach().cpu().clamp(0.0, 1.0)
+            targets_cpu = targets.detach().cpu().clamp(0.0, 1.0)
+            gt_iou_sum += sum(best_iou_against_targets(p, ts) for p, ts in zip(preds_cpu, targets_cpu))
+            teacher_iou_sum += sum(best_iou_against_targets(p, ts) for p, ts in zip(teacher_cpu, targets_cpu))
+        else:
+            gt_iou_sum += iou_from_cxcywh_batch(preds, chosen_targets) * bs
+            teacher_iou_sum += iou_from_cxcywh_batch(teacher_preds, chosen_targets) * bs
+    return {
+        "bbox_loss": total_loss / max(1, total),
+        "bbox_gt_loss": gt_loss_sum / max(1, total),
+        "bbox_teacher_loss": teacher_loss_sum / max(1, total),
+        "bbox_gt_iou": gt_iou_sum / max(1, total),
+        "bbox_teacher_iou": teacher_iou_sum / max(1, total),
+        "bbox_samples": total,
+    }
+@torch.no_grad()
+def validate_policy(student, teacher, loader, device, temperature):
+    student.eval()
+    teacher.eval()
+    total = 0
+    total_kl = 0.0
+    total_ce = 0.0
+    total_agree = 0.0
+    for imgs, states in loader:
+        imgs = imgs.to(device, non_blocking=True)
+        states = states.to(device, non_blocking=True)
+        teacher_probs, _ = teacher(imgs, states)
+        student_probs, student_logits = student(imgs, states)
+        target_probs = soften_probs(teacher_probs, temperature)
+        kl = F.kl_div(F.log_softmax(student_logits / temperature, dim=1), target_probs, reduction="batchmean")
+        kl = kl * (temperature * temperature)
+        ce = F.cross_entropy(student_logits, teacher_probs.argmax(dim=1))
+        agree = (student_probs.argmax(dim=1) == teacher_probs.argmax(dim=1)).float().mean()
+        bs = imgs.size(0)
+        total += bs
+        total_kl += kl.item() * bs
+        total_ce += ce.item() * bs
+        total_agree += agree.item() * bs
+    return {
+        "policy_kl": total_kl / max(1, total),
+        "policy_ce": total_ce / max(1, total),
+        "policy_top1_agreement": total_agree / max(1, total),
+        "policy_samples": total,
+    }
+def save_ckpt(path, student, optimizer, args, epoch, stage, metrics):
+    torch.save(
+        {
+            "arch": args.arch,
+            "epoch": epoch,
+            "stage": stage,
+            "model_state_dict": student.state_dict(),
+            "optimizer_state_dict": optimizer.state_dict() if optimizer is not None else None,
+            "args": vars(args),
+            "metrics": metrics,
+        },
+        path,
+    )
+def load_student_checkpoint(student, ckpt_path: Path, device: torch.device):
+    ckpt = torch.load(ckpt_path, map_location="cpu", weights_only=False)
+    state_dict = ckpt.get("model_state_dict", ckpt)
+    missing, unexpected = student.load_state_dict(state_dict, strict=False)
+    if missing:
+        print(f"[resume] missing keys: {missing[:8]}")
+    if unexpected:
+        print(f"[resume] unexpected keys: {unexpected[:8]}")
+    print(
+        f"[resume] loaded student checkpoint: {ckpt_path} "
+        f"(stage={ckpt.get('stage', 'unknown')}, epoch={ckpt.get('epoch', 'unknown')})"
+    )
+    return student.to(device)
+def train_bbox_stage(args, student, teacher, train_loader, val_loader, device, run_dir, writer, csv_file):
+    print(f"[stage1] bbox distillation/supervision for {args.bbox_epochs} epoch(s)")
+    optimizer = torch.optim.AdamW(student.parameters(), lr=args.bbox_lr, weight_decay=args.weight_decay)
+    scaler = torch.amp.GradScaler("cuda", enabled=device.type == "cuda")
+    best_iou = -1.0
+    for epoch in range(1, args.bbox_epochs + 1):
+        student.train()
+        total = 0
+        loss_sum = 0.0
+        gt_loss_sum = 0.0
+        teacher_loss_sum = 0.0
+        for imgs, targets in train_loader:
+            imgs = imgs.to(device, non_blocking=True)
+            targets = targets.to(device, non_blocking=True)
+            with torch.no_grad():
+                teacher_targets = teacher.backbone_forward(imgs).clamp(0.0, 1.0)
+            optimizer.zero_grad(set_to_none=True)
+            with torch.amp.autocast("cuda", enabled=device.type == "cuda"):
+                preds = student.backbone_forward(imgs)
+                gt_loss = F.smooth_l1_loss(preds, targets)
+                teacher_loss = F.smooth_l1_loss(preds, teacher_targets)
+                loss = args.bbox_gt_weight * gt_loss + args.bbox_teacher_weight * teacher_loss
+            scaler.scale(loss).backward()
+            scaler.step(optimizer)
+            scaler.update()
+            bs = imgs.size(0)
+            total += bs
+            loss_sum += loss.item() * bs
+            gt_loss_sum += gt_loss.item() * bs
+            teacher_loss_sum += teacher_loss.item() * bs
+        val_bbox = validate_bbox(student, teacher, val_loader, device, args.bbox_gt_weight, args.bbox_teacher_weight)
+        row = {
+            "stage": "bbox",
+            "epoch": epoch,
+            "train_loss": loss_sum / max(1, total),
+            "train_bbox_gt_loss": gt_loss_sum / max(1, total),
+            "train_bbox_teacher_loss": teacher_loss_sum / max(1, total),
+            "val_bbox_loss": val_bbox["bbox_loss"],
+            "val_bbox_gt_loss": val_bbox["bbox_gt_loss"],
+            "val_bbox_teacher_loss": val_bbox["bbox_teacher_loss"],
+            "val_bbox_gt_iou": val_bbox["bbox_gt_iou"],
+            "val_bbox_teacher_iou": val_bbox["bbox_teacher_iou"],
+            "val_bbox_samples": val_bbox["bbox_samples"],
+        }
+        writer.writerow(row)
+        csv_file.flush()
+        save_ckpt(run_dir / "student_bbox_stage1_last.pth", student, optimizer, args, epoch, "bbox", row)
+        if val_bbox["bbox_gt_iou"] > best_iou + args.min_delta:
+            best_iou = val_bbox["bbox_gt_iou"]
+            save_ckpt(run_dir / "student_bbox_stage1_best.pth", student, optimizer, args, epoch, "bbox", row)
+            print(f"[stage1][save] best bbox: {run_dir / 'student_bbox_stage1_best.pth'}")
+        print(
+            f"[stage1][epoch {epoch}] loss={row['train_loss']:.4f} "
+            f"val_bbox_iou={row['val_bbox_gt_iou']:.3f} "
+            f"val_teacher_iou={row['val_bbox_teacher_iou']:.3f}"
+        )
+        if device.type == "cuda":
+            torch.cuda.empty_cache()
+def train_policy_stage(args, student, teacher, policy_loader, bbox_loader, val_policy_loader, val_bbox_loader, device, run_dir, writer, csv_file):
+    print(f"[stage2] actor policy distillation for {args.epochs} epoch(s)")
+    optimizer = torch.optim.AdamW(student.parameters(), lr=args.lr, weight_decay=args.weight_decay)
+    scaler = torch.amp.GradScaler("cuda", enabled=device.type == "cuda")
+    bbox_iter = cycle(bbox_loader) if args.stage2_bbox_weight > 0 and len(bbox_loader) > 0 else None
+    best_agreement = -1.0
+    epochs_without_improvement = 0
+    for epoch in range(1, args.epochs + 1):
+        student.train()
+        total = 0
+        loss_sum = 0.0
+        kl_sum = 0.0
+        ce_sum = 0.0
+        bbox_sum = 0.0
+        agree_sum = 0.0
+        for step, (imgs, states) in enumerate(policy_loader, start=1):
+            imgs = imgs.to(device, non_blocking=True)
+            states = states.to(device, non_blocking=True)
+            with torch.no_grad():
+                teacher_probs, _ = teacher(imgs, states)
+                target_probs = soften_probs(teacher_probs, args.temperature)
+                hard_targets = teacher_probs.argmax(dim=1)
+            bbox_loss = torch.zeros((), device=device)
+            bbox_bs = imgs.size(0)
+            if bbox_iter is not None:
+                bbox_imgs, bbox_targets = next(bbox_iter)
+                bbox_imgs = bbox_imgs.to(device, non_blocking=True)
+                bbox_targets = bbox_targets.to(device, non_blocking=True)
+                bbox_bs = bbox_imgs.size(0)
+            optimizer.zero_grad(set_to_none=True)
+            with torch.amp.autocast("cuda", enabled=device.type == "cuda"):
+                student_probs, student_logits = student(imgs, states)
+                kl = F.kl_div(F.log_softmax(student_logits / args.temperature, dim=1), target_probs, reduction="batchmean")
+                kl = kl * (args.temperature * args.temperature)
+                ce = F.cross_entropy(student_logits, hard_targets)
+                policy_loss = kl + args.ce_weight * ce
+                if bbox_iter is not None:
+                    bbox_preds = student.backbone_forward(bbox_imgs)
+                    bbox_loss = F.smooth_l1_loss(bbox_preds, bbox_targets)
+                loss = policy_loss + args.stage2_bbox_weight * bbox_loss
+            scaler.scale(loss).backward()
+            scaler.step(optimizer)
+            scaler.update()
+            bs = imgs.size(0)
+            total += bs
+            loss_sum += loss.item() * bs
+            kl_sum += kl.item() * bs
+            ce_sum += ce.item() * bs
+            bbox_sum += bbox_loss.item() * bbox_bs
+            agree_sum += (student_probs.argmax(dim=1) == hard_targets).float().mean().item() * bs
+            if step % 50 == 0:
+                print(
+                    f"[stage2][epoch {epoch}] step {step}/{len(policy_loader)} "
+                    f"loss={loss_sum / total:.4f} kl={kl_sum / total:.4f} "
+                    f"agree={agree_sum / total:.3f}"
+                )
+        val_policy = validate_policy(student, teacher, val_policy_loader, device, args.temperature)
+        val_bbox = validate_bbox(student, teacher, val_bbox_loader, device, args.bbox_gt_weight, args.bbox_teacher_weight)
+        row = {
+            "stage": "policy",
+            "epoch": epoch,
+            "train_loss": loss_sum / max(1, total),
+            "train_policy_kl": kl_sum / max(1, total),
+            "train_policy_ce": ce_sum / max(1, total),
+            "train_policy_top1_agreement": agree_sum / max(1, total),
+            "train_stage2_bbox_loss": bbox_sum / max(1, total),
+            "val_policy_kl": val_policy["policy_kl"],
+            "val_policy_ce": val_policy["policy_ce"],
+            "val_policy_top1_agreement": val_policy["policy_top1_agreement"],
+            "val_policy_samples": val_policy["policy_samples"],
+            "val_bbox_loss": val_bbox["bbox_loss"],
+            "val_bbox_gt_iou": val_bbox["bbox_gt_iou"],
+            "val_bbox_teacher_iou": val_bbox["bbox_teacher_iou"],
+        }
+        improved = row["val_policy_top1_agreement"] > best_agreement + args.min_delta
+        if improved:
+            best_agreement = row["val_policy_top1_agreement"]
+            epochs_without_improvement = 0
+        else:
+            epochs_without_improvement += 1
+        should_stop = args.patience > 0 and epochs_without_improvement >= args.patience
+        row["best_val_policy_top1_agreement"] = best_agreement
+        row["epochs_without_improvement"] = epochs_without_improvement
+        row["early_stop"] = bool(should_stop)
+        save_ckpt(run_dir / "student_last.pth", student, optimizer, args, epoch, "policy", row)
+        if improved:
+            save_ckpt(run_dir / "student_best.pth", student, optimizer, args, epoch, "policy", row)
+            print(f"[stage2][save] best policy: {run_dir / 'student_best.pth'}")
+        if args.save_every > 0 and epoch % args.save_every == 0:
+            path = run_dir / f"student_epoch_{epoch:03d}.pth"
+            save_ckpt(path, student, optimizer, args, epoch, "policy", row)
+            print(f"[stage2][save] periodic checkpoint: {path}")
+        writer.writerow(row)
+        csv_file.flush()
+        print(
+            f"[stage2][epoch {epoch}] loss={row['train_loss']:.4f} "
+            f"val_agree={row['val_policy_top1_agreement']:.3f} "
+            f"val_bbox_iou={row['val_bbox_gt_iou']:.3f} "
+            f"best={best_agreement:.3f} stale={epochs_without_improvement}/{args.patience if args.patience > 0 else 'off'}"
+        )
+        if should_stop:
+            print(f"[early-stop] no policy agreement improvement for {args.patience} epoch(s).")
+            break
+        if device.type == "cuda":
+            torch.cuda.empty_cache()
+def main():
+    args = parse_args()
+    torch.manual_seed(args.seed)
+    device = torch.device(args.device)
+    root = find_adacrop_root()
+    run_dir = args.output_dir / f"{args.arch}_twostage_{time.strftime('%Y%m%d_%H%M%S')}"
+    run_dir.mkdir(parents=True, exist_ok=True)
+    train_records = load_records(args.train_jsonl, root, require_images=True)
+    val_records = load_records(args.val_json, root, require_images=True) if args.val_json.exists() else []
+    if args.max_train_images > 0:
+        train_records = train_records[: args.max_train_images]
+    if args.max_val_images > 0:
+        val_records = val_records[: args.max_val_images]
+    if not train_records:
+        raise RuntimeError("No training images were resolved. Check --train-jsonl and path handling.")
+    print(f"[data] train images: {len(train_records)}")
+    print(f"[data] val images: {len(val_records)}")
+    print(f"[data] first train image: {train_records[0]['img']}")
+    bbox_train_ds = BBoxDataset(train_records, img_size=args.img_size, samples_per_image=args.samples_per_image)
+    bbox_val_ds = BBoxEvalDataset(val_records or train_records[: min(256, len(train_records))], img_size=args.img_size)
+    policy_train_ds = PolicyStateDataset(
+        train_records,
+        img_size=args.img_size,
+        samples_per_image=args.samples_per_image,
+        random_box_prob=args.random_box_prob,
+        jitter=args.jitter,
+    )
+    policy_val_ds = PolicyStateDataset(
+        val_records or train_records[: min(256, len(train_records))],
+        img_size=args.img_size,
+        samples_per_image=1,
+        random_box_prob=args.random_box_prob,
+        jitter=args.jitter,
+    )
+    if len(bbox_train_ds) == 0:
+        raise RuntimeError("No bbox labels found for Stage 1. Check box/orig_bbox fields.")
+    bbox_batch_size = args.bbox_batch_size if args.bbox_batch_size > 0 else args.batch_size
+    bbox_train_loader = make_loader(
+        bbox_train_ds,
+        bbox_batch_size,
+        True,
+        args.num_workers,
+        pin_memory=args.pin_memory,
+        drop_last=True,
+    )
+    bbox_val_loader = make_loader(
+        bbox_val_ds,
+        bbox_batch_size,
+        False,
+        max(0, min(args.num_workers, 4)),
+        pin_memory=args.pin_memory,
+    )
+    policy_train_loader = make_loader(
+        policy_train_ds,
+        args.batch_size,
+        True,
+        args.num_workers,
+        pin_memory=args.pin_memory,
+        drop_last=True,
+    )
+    policy_val_loader = make_loader(
+        policy_val_ds,
+        args.batch_size,
+        False,
+        max(0, min(args.num_workers, 4)),
+        pin_memory=args.pin_memory,
+    )
+    teacher = load_teacher(args.teacher_ckpt, device)
+    student = MobileNetPolicy(arch=args.arch, n_actions=len(ACTIONS)).to(device)
+    if args.resume_student is not None:
+        student = load_student_checkpoint(student, args.resume_student, device)
+    metrics_path = run_dir / "metrics.csv"
+    fieldnames = [
+        "stage",
+        "epoch",
+        "train_loss",
+        "train_bbox_gt_loss",
+        "train_bbox_teacher_loss",
+        "train_policy_kl",
+        "train_policy_ce",
+        "train_policy_top1_agreement",
+        "train_stage2_bbox_loss",
+        "val_bbox_loss",
+        "val_bbox_gt_loss",
+        "val_bbox_teacher_loss",
+        "val_bbox_gt_iou",
+        "val_bbox_teacher_iou",
+        "val_bbox_samples",
+        "val_policy_kl",
+        "val_policy_ce",
+        "val_policy_top1_agreement",
+        "val_policy_samples",
+        "best_val_policy_top1_agreement",
+        "epochs_without_improvement",
+        "early_stop",
+    ]
+    with metrics_path.open("w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
+        writer.writeheader()
+        if args.skip_bbox_stage:
+            print("[stage1] skipped by --skip-bbox-stage")
+        elif args.bbox_epochs > 0:
+            train_bbox_stage(args, student, teacher, bbox_train_loader, bbox_val_loader, device, run_dir, writer, f)
+        if args.epochs > 0:
+            train_policy_stage(
+                args,
+                student,
+                teacher,
+                policy_train_loader,
+                bbox_train_loader,
+                policy_val_loader,
+                bbox_val_loader,
+                device,
+                run_dir,
+                writer,
+                f,
+            )
+    print(f"[done] run dir: {run_dir}")
+if __name__ == "__main__":
+    main()