dhvazquez commited on May 22

Commit

1ff644f

verified ·

1 Parent(s): f6ea357

Upload Train + exports

Browse files

Files changed (26) hide show

.gitattributes +3 -0
exports/mtg_4kp_s.onnx +3 -0
exports/mtg_4kp_s.onnx.data +3 -0
exports/mtg_4kp_s_fp16.onnx +3 -0
exports/mtg_4kp_s_int8_conv_int4.onnx +3 -0
exports/mtg_4kp_s_int8_conv_int4g32.onnx +3 -0
exports/mtg_4kp_s_int8_conv_wo.onnx +3 -0
exports/mtg_4kp_s_int8_conv_wo_fp16res.onnx +3 -0
exports/mtg_4kp_s_int8_conv_wo_opt.onnx +3 -0
exports/mtg_4kp_s_int8_dynamic.onnx +3 -0
exports/mtg_4kp_s_nosim.onnx +3 -0
exports/mtg_4kp_s_nosim.onnx.data +3 -0
exports/mtg_4kp_s_r4b.onnx +3 -0
exports/mtg_4kp_s_r4b.onnx.data +3 -0
exports/mtg_4kp_s_r4b_fp16.onnx +3 -0
run_20260519_012809/checkpoint.pth +3 -0
run_20260519_012809/checkpoint0000.pth +3 -0
run_20260519_012809/checkpoint0001.pth +3 -0
run_20260519_012809/checkpoint_best_regular.pth +3 -0
run_20260519_012809/effective_config.py +363 -0
run_20260519_012809/eval/000.pth +3 -0
run_20260519_012809/eval/latest.pth +3 -0
run_20260519_012809/log.txt +2 -0
run_20260519_012809/summary/events.out.tfevents.1779154095.palpatine.458155.0 +3 -0
run_20260519_012809/summary/events.out.tfevents.1779154095.palpatine.458156.0 +3 -0
run_20260519_012809/train.log +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+exports/mtg_4kp_s_nosim.onnx.data filter=lfs diff=lfs merge=lfs -text
+exports/mtg_4kp_s_r4b.onnx.data filter=lfs diff=lfs merge=lfs -text
+exports/mtg_4kp_s.onnx.data filter=lfs diff=lfs merge=lfs -text

exports/mtg_4kp_s.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c72f71bf017880ebbcde10f2ef630fa3e2fa3167124e7668434f6f726136e70
+size 47111454

exports/mtg_4kp_s.onnx.data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17cb72be4056e6d6a55210a448cc3bbf23610e416b355d99513a73ae73860c96
+size 46792704

exports/mtg_4kp_s_fp16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d4ed2defe0403ef700cb606982ee8e2c5e190c0fb315d4106c4f9ebb404f491b
+size 23666438

exports/mtg_4kp_s_int8_conv_int4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba99236110bbad78a1ab75091a573f4136d33581cd75485b440a25c7f516ebf0
+size 10561260

exports/mtg_4kp_s_int8_conv_int4g32.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:232b8daa8d1fe47934541839c01984e6c40ea78dfecc992279fe0001c4d951cd
+size 11262351

exports/mtg_4kp_s_int8_conv_wo.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39e6493a2519b30ae120f2e173319564af31fb4cac12d439ed8cf3b934ae742e
+size 13068978

exports/mtg_4kp_s_int8_conv_wo_fp16res.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60608af82ba1f9192782fef79719ef6b7793b23b54ae6bdab7a5dc4b4d398d26
+size 12458839

exports/mtg_4kp_s_int8_conv_wo_opt.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:240b38b6365dfb94ca61f4c13c51892f8c78e95de062a52d3e63dad5ece83ce4
+size 13045930

exports/mtg_4kp_s_int8_dynamic.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a567c68b46b8f8f9f147827f4ce0301e4c4a59a6d783c82ab7a4b8d9ea5eeb7
+size 28120147

exports/mtg_4kp_s_nosim.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3f2c47a664f550710588efa67ec607de3d8669902b5e08e34a68e4bdaa711ed
+size 3061750

exports/mtg_4kp_s_nosim.onnx.data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5f8e83484053d6cc2152f5a8f1d6218e0ba19e40cacf3cbea790ae6c451081c
+size 46792704

exports/mtg_4kp_s_r4b.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d33788e992e4882e37b1d2307994942552413092fdebaf998728cdbda4b5a99
+size 14850609

exports/mtg_4kp_s_r4b.onnx.data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8977c772c498a74eeed131322efe13d9ce9bfde4a3bfa5242ba583615f2a3a22
+size 14745600

exports/mtg_4kp_s_r4b_fp16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e203415be3c526d486082f5a9b2de15bc03d6b443c5dc2a7f8bf5f3031a48471
+size 7495617

run_20260519_012809/checkpoint.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a66ae6431459f3775eed0d847b8fa3f697d4d7ee5b3ffc2e72be993bc079b0d
+size 59580875

run_20260519_012809/checkpoint0000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8910ba9beb071960bd6ac07e1cdf95324b03be4840a88e4769e868da281c4a0f
+size 59590695

run_20260519_012809/checkpoint0001.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a939b95e08fdc272e44fcf15e02130d9ffb8d3520d463bfdd08025174a5bf268
+size 59590695

run_20260519_012809/checkpoint_best_regular.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18b495cf63a29735287d2ac97299f87cb206c220b0fb0e28242feb9ff9a4b7a6
+size 59612854

run_20260519_012809/effective_config.py ADDED Viewed

	@@ -0,0 +1,363 @@

+"""MTG card 4-keypoint training config for DETRPose-S.
+This config is a LazyCall override of the upstream DETRPose-S (HGNetV2-B0) config.
+It inherits the full upstream architecture and only overrides what differs for
+single-class MTG card corner detection with 4 keypoints.
+Placement: detrpose/configs/mtg_card_4kp.py
+Load with:
+    python train.py --config_file configs/mtg_card_4kp.py --device cuda --amp
+NOTE (T14): Three upstream files hardcode OKS sigma dispatch tables that crash
+for num_body_points not in {3, 14, 17}:
+  - src/models/detrpose/matcher.py:29  (HungarianMatcher.__init__)
+  - src/misc/keypoint_loss.py:49       (OKSLoss.__init__)
+  - src/models/detrpose/dn_component.py:19 (get_sigmas)
+Task 14 must add a num_keypoints==4 branch to each using:
+  oks_sigmas = [0.025, 0.025, 0.025, 0.025]
+Until then, training will crash at model init with NotImplementedError/ValueError.
+"""
+# ---------------------------------------------------------------------------
+# Base: import from the DETRPose-S include hierarchy (relative to this file's
+# location, which is detrpose/configs/; the include files live at
+# detrpose/configs/detrpose/include/).
+# ---------------------------------------------------------------------------
+from .detrpose.detrpose_hgnetv2_s import (
+    model,
+    criterion,
+    training_params,
+    postprocessor,
+    ema,
+    optimizer,
+    lr_scheduler,
+)
+from .detrpose.include.dataset import dataset_train, dataset_val, evaluator
+from src.core import LazyCall as L
+from src.data.coco_eval import CocoEvaluator
+import src.data.transforms as T
+# ---------------------------------------------------------------------------
+# Spec §: Task-level constants (consumed by T14 patches and future reference)
+# ---------------------------------------------------------------------------
+# Number of card corners; drives model/criterion/postprocessor.
+# NOTE(T14): oks sigma dispatch in matcher.py / keypoint_loss.py / dn_component.py
+# must add a branch for this value using oks_sigmas below.
+num_body_points = 4
+# OKS sigmas — uniform for all 4 corners (no anatomical significance).
+# Normalized to ~1/40 of image fraction following COCO convention (σ=0.025 ≈ 1/(2*20)).
+# TODO(T14): wire these into the 3 sigma dispatch tables instead of hardcoding.
+oks_sigmas = [0.025, 0.025, 0.025, 0.025]
+# Single class: "mtg_card" (background is implicit, class 0 is the card)
+num_classes = 1
+# ---------------------------------------------------------------------------
+# Spec §: Model overrides — num_body_points and num_classes in 3 places
+# ---------------------------------------------------------------------------
+# (1) Transformer: drives keypoint head output dimension
+model.transformer.num_body_points = num_body_points
+model.transformer.num_classes = num_classes
+# (2) Criterion: drives loss computation over keypoints
+criterion.num_classes = num_classes
+criterion.num_body_points = num_body_points
+# Loss weights — mapped from spec to upstream key names:
+#   spec "cls"           → upstream "loss_vfl"  (varifocal classification loss)
+#   spec "keypoints_l1"  → upstream "loss_keypoints" (L1 keypoint regression)
+#   spec "keypoints_oks" → upstream "loss_oks"       (OKS keypoint loss)
+# Note: upstream DETRPose does NOT have separate bbox_l1 / bbox_giou loss keys;
+# bounding boxes are recovered from keypoint predictions, not via a dedicated
+# bbox branch. The spec's bbox_l1=5.0 / bbox_giou=2.0 weights have no upstream
+# equivalent and are omitted here.
+criterion.weight_dict = {
+    'loss_vfl': 2.0,          # spec: cls=2.0
+    'loss_keypoints': 10.0,   # spec: keypoints_l1=10.0
+    'loss_oks': 4.0,          # spec: keypoints_oks=4.0
+}
+# Matcher costs — mirror weight_dict ratios so Hungarian assignment is consistent
+criterion.matcher.cost_class = 2.0
+criterion.matcher.cost_keypoints = 10.0
+criterion.matcher.cost_oks = 4.0
+criterion.matcher.num_body_points = num_body_points
+# (3) PostProcessor: drives output decoding
+postprocessor.num_body_points = num_body_points
+# ---------------------------------------------------------------------------
+# Round 4a — Model surgery for inference latency (2026-05-18)
+# ---------------------------------------------------------------------------
+# DETRPose-S was tuned for COCO multi-person pose (60 queries to find ≤ K
+# people, 6 decoder layers to refine 17-keypoint anatomy). For MTG card
+# corner detection we have 1 object per image with 4 deterministic corners,
+# so the upstream defaults are wildly over-provisioned. Three config-only
+# overrides cut a meaningful chunk of the decoder cost without invasive
+# code changes:
+#
+#   - num_queries 60 → 10: cross-attention scales linearly with queries
+#     and we only ever consume the top-1. 10 still gives the Hungarian
+#     matcher slack (≥ 4 ensures every keypoint slot has a query) without
+#     paying for the 50 unused ones.
+#   - num_decoder_layers 6 → 3: halves decoder compute. DETR keypoint
+#     refinement converges fast on a single-object task; 3 layers is
+#     plenty.
+#   - dec_n_points 4 → 2: each decoder query sampled 4 reference points
+#     per feature level via deformable attention, which is what the 10
+#     WebGPU `GridSample` ops implement. Cutting to 2 halves those
+#     dispatches — the WebGPU EP's biggest pain point on Mali / GCN.
+#
+# `postprocessor.num_select` must match `num_queries` (it does top-k over
+# all available queries; if num_select > num_queries the index math
+# silently wraps).
+#
+# Expected impact: -20 to -30 % inference on top of the FP16 model
+# (Ampere 191 → ~140 ms, GCN-5 342 → ~250 ms, Apple Tahoe 218 → ~170 ms).
+#
+# Costs:
+#   - Existing checkpoints are NOT compatible — different head sizes,
+#     different decoder depth. Training restarts from scratch.
+#   - Smoke run before committing GPU: `python scripts/_make_small_dataset.py`
+#     + `python scripts/train.py --config detrpose/configs/mtg_card_4kp_smoke.py --single-gpu`
+#     verifies convergence in ~15 min before the 3-8 h full run.
+#
+# Round 4b (2026-05-19): R4a converged to AP=0.997 in a SINGLE epoch (run
+# runs/run_20260518_210733/, checkpoint backed up as r4a_best_epoch0_AP997.pth).
+# That's a saturated benchmark with the AP near 1.0 in epoch 0 — the model
+# still has way too much capacity for a 4-corner-of-1-rigid-card task with
+# synthetic data. R4b shrinks the transformer 3-4× further on top of R4a:
+#
+#   - hidden_dim 256 → 128: quarters the attention compute (O(d²) per token).
+#     Touches BOTH encoder and transformer; both must be set or the model
+#     init fails on dim mismatch at the encoder→transformer boundary.
+#   - dim_feedforward 1024 → 512: halves FFN compute.
+#   - nhead 8 → 4: halves attention head projections.
+#   - num_decoder_layers 3 → 1: single decoder pass. With AP saturating in
+#     1 epoch already, one decoder layer is plenty for this task.
+#   - num_queries 10 → 4: minimum that still gives Hungarian a slot per
+#     keypoint. We never use more than top-1 anyway.
+#
+# Param count goes 11.35 M → ~5-6 M (backbone HGNetv2-B0 dominates and
+# can't shrink without a bigger surgery). FLOPS roughly halve again.
+#
+# Expected inference latency on FP16+WebGPU: Ampere 191 → ~90 ms,
+# GCN-5 342 → ~200 ms. macOS Tahoe → ~120 ms.
+model.transformer.hidden_dim = 128
+model.transformer.dim_feedforward = 512
+model.transformer.nhead = 4
+model.transformer.num_decoder_layers = 1
+model.transformer.num_queries = 4
+model.transformer.dec_n_points = 2
+model.encoder.hidden_dim = 128
+model.encoder.dim_feedforward = 512
+model.encoder.nhead = 4
+postprocessor.num_select = 4
+# ---------------------------------------------------------------------------
+# Spec §: Training hyperparameters
+# ---------------------------------------------------------------------------
+# Upstream default is 100 epochs; spec says 150 — override.
+training_params.epochs = 150
+# Skip the full val eval that upstream fires before the first training
+# epoch on --resume. It's 30+ min of compute on 307k imgs, and leaves
+# evaluator state accumulated in RAM which feeds the gradual OOM growth we
+# observed in epoch 1. The end-of-epoch eval (run every epoch anyway)
+# provides the same signal.
+training_params.skip_initial_eval = True
+# Cap val eval at a deterministic subset of N images.
+#
+# Without this, every epoch's end-of-epoch eval runs over all 307k val imgs
+# (9623 batches @ bs=32). pycocotools' CocoEvaluator.update() appends an
+# [C, A, B] numpy array of per-image match dicts per batch — confirmed by
+# scripts/_repro_eval_leak.py to grow at ~0.4 MB/batch (≈4 GB over the full
+# val per rank). With 2-rank DDP on a 60 GB host, the val dataloader was
+# OOM-killed at ~25-32 % of eval (runs/run_20260413_234839 iter 2360 and
+# run_20260414_102022 iter 3010). Subsampling caps that accumulator without
+# touching the source dataset on disk.
+#
+# N=10_000 keeps eval at ~1 min/epoch, accumulator ≤ 130 MB / rank, and gives
+# a stable per-epoch AP signal. Run a full-set eval offline at the end of
+# training for the headline benchmark. Set to 0/None to disable the cap.
+training_params.max_eval_samples = 10000
+# torch.compile is honored by our trainer.py patch but disabled here.
+# Tried with mode="default", fullgraph=False — crashed in backward with:
+#   "RuntimeError: one of the variables needed for gradient computation has
+#    been modified by an inplace operation: [HalfTensor [32, 256, 80, 80]]"
+# Root cause: inductor reordering + AMP + DDP gradient buckets clashes with
+# the inplace activations in HGNetv2 backbone. Not worth the debugging rabbit
+# hole; bigger speedups come from reducing epochs or dataset size.
+training_params.compile_model = False
+training_params.compile_mode = "default"
+# Gradient clipping (spec: grad_clip=0.1; upstream default is also 0.1 —
+# keeping explicit for clarity)
+training_params.clip_max_norm = 0.1
+# Batch size: bumped 16 → 64 (32/GPU) to saturate 2× RTX 3090 — at 16 the
+# GPUs idled at ~50% with only 6–7 GB of 24 GB VRAM used (data-loading bound).
+# Requires linear LR scaling below (4× = 0.0001 → 0.0004).
+dataset_train.total_batch_size = 64
+# Data-loading: 6 workers per rank (12 total). Reduced from 8 because we hit
+# OOM-kill at iter 11000/11226 of epoch 1 on a 60 GB host — during epoch 1
+# training, RAM grew from ~31 GB (epoch start) to OOM (~60+ GB) over 11k
+# iters, a gradual leak of ~2-3 MB/iter we couldn't fully attribute in-run.
+# Hypothesis: fragmentation + residual state from the on-resume val eval +
+# pin_memory accumulation. Cutting workers 8→6 reclaims ~4-5 GB of headroom
+# and doesn't affect throughput (data: 0.0002 — we're compute-bound, not
+# loader-bound).
+#
+# persistent_workers=False, prefetch_factor=2 — same rationale as before,
+# see #97432 (prefetch + pin_memory leak) and #62066 (persistent workers
+# accumulating CoW pages).
+dataset_train.num_workers = 6
+dataset_train.persistent_workers = False
+dataset_train.prefetch_factor = 2
+# Val loader: keep the loader footprint small without increasing main-process
+# pressure. The upstream default (bs=32, num_workers=4) OOM-killed a worker
+# at ~32% of end-of-epoch val on 307k imgs.
+#
+# Two independent RAM pressures in val eval:
+#   (a) worker side: num_workers × (forked parent RSS + prefetch buffers).
+#       Halving num_workers to 1 cuts this dominant term ~4×.
+#   (b) main-process side: CocoEvaluator.update() calls `COCO.loadRes(coco_gt,
+#       results)` once per batch — creating a fresh pycocotools index per
+#       call. Reducing batch_size would DOUBLE the number of calls and the
+#       main-process allocator pressure, so we keep batch_size at 32.
+#
+# pin_memory=False because eval is a no_grad forward — there's no backward
+# to benefit from pinned host buffers, and pinning ~1-2 GB of non-swappable
+# RAM is pure overhead. PyTorch docs confirm pin_memory is page-locked and
+# counts against the OOM killer's notion of used memory.
+#
+# Rationale cross-checked against:
+#   - pytorch/pytorch#8976 (SIGKILL workers = OOM)
+#   - pytorch/pytorch#13246 (num_workers × RSS replication)
+#   - Yuxin Wu "Demystify RAM Usage in Multiprocess DataLoader"
+#   - facebookresearch/detr#423, #602 (DETR-family val OOM)
+dataset_val.num_workers = 1
+dataset_val.pin_memory = False
+# Image size 640×640 — upstream already uses (640,640); explicit for clarity.
+# eval_spatial_size lives in include/detrpose_hgnetv2.py and is referenced by
+# the encoder/transformer. We do NOT re-import and override it here because
+# changing it would require re-instantiating encoder/transformer embed sizes.
+# Training at 640×640 (the upstream default for -S) already satisfies spec.
+# Optimizer: spec lr=0.0001 @ bs=16, lr_backbone=0.00001, weight_decay=0.0001.
+# Batch size was bumped 16 → 64, so LR is scaled by √(64/16)=2× using the
+# square-root rule instead of linear. Rationale: linear scaling (×4) is the
+# optimum for ResNet-style conv nets, but DETR-family transformers with many
+# aux heads are well known to go numerically unstable under aggressive LR
+# scaling. We tried linear (lr=4e-4): training was converging (loss 192→15
+# in 4k iters) then a single bad batch produced NaN in fp16 aux-head logits,
+# the Hungarian matcher in scipy raised "matrix contains invalid numeric
+# entries", and DDP timed out after 10 min waiting on the dead rank.
+# Square-root scaling (lr=2e-4) gives back headroom against fp16 overflow
+# while still benefitting from the larger batch — it's the standard choice
+# for transformers per Goyal et al. 2017 §2.1 and AdamW large-batch studies.
+#     lr_head     : 0.0001  → 0.0002
+#     lr_backbone : 0.00001 → 0.00002
+optimizer.lr = 0.0002
+optimizer.weight_decay = 0.0001
+optimizer.params.cfg = [
+    {
+        'params': '^(?=.*backbone).*$',
+        'lr': 0.00002,   # √(bs 16→64)=2× of spec lr_backbone=1e-5
+    },
+]
+# LR scheduler: keep upstream MultiStepLR with no decay during training window
+# (milestones=[1000] effectively means no step during 150 epochs).
+# ---------------------------------------------------------------------------
+# Spec §: Stop-epoch policy for augmentation ops — scaled to 150 epochs
+# Upstream -S uses [5, 53, 96]; we scale proportionally to 150 epochs:
+#   start_mosaic=5, stop_zoomout=round(53*150/100)=80, stop_mosaic=round(96*150/100)=144
+# ---------------------------------------------------------------------------
+dataset_train.dataset.transforms.policy = {
+    'name': 'stop_epoch',
+    'ops': ['Mosaic', 'RandomCrop', 'RandomZoomOut'],
+    'epoch': [5, 80, 144],   # scaled from [5, 53, 96] @ 100ep → 150ep
+}
+dataset_train.collate_fn.base_size_repeat = 20
+dataset_train.collate_fn.stop_epoch = 144
+# ---------------------------------------------------------------------------
+# Spec §: Augmentation — add HSVJitter and set flip_pairs=[[1,2],[3,4]]
+# (swap TL↔TR and BR↔BL on horizontal flip). HSVJitter is inserted as
+# transforms3 (after load/mosaic transforms1-2, before geometric transforms).
+#
+# Upstream Compose receives transforms as **kwargs keyed transforms1..transforms7.
+# The Compose.__init__ iterates dict values in insertion order (Python 3.7+),
+# so we can rename slots to insert HSVJitter. New pipeline:
+#   transforms1 = Mosaic
+#   transforms2 = RandomZoomOut
+#   transforms3 = HSVJitter          ← NEW: color jitter before geometric ops
+#   transforms4 = RandomHorizontalFlip(flip_pairs=[[0,1],[2,3]])  ← 0-indexed corner swap
+#   transforms5 = ColorJitter        (kept; can coexist with HSVJitter)
+#   transforms6 = RandomResize
+#   transforms7 = ToTensor
+#   transforms8 = Normalize
+#
+# Note: upstream dataset.py uses a fixed set of numbered kwargs; we override
+# the entire transforms object on dataset_train.dataset.transforms to inject
+# the new pipeline cleanly.
+# ---------------------------------------------------------------------------
+from .detrpose.include.detrpose_hgnetv2 import eval_spatial_size
+from src.data.container import Compose
+_scales = [(640, 640)]
+_max_size = 1333
+dataset_train.dataset.transforms = L(Compose)(
+    policy={
+        'name': 'stop_epoch',
+        'ops': ['Mosaic', 'RandomCrop', 'RandomZoomOut'],
+        'epoch': [5, 80, 144],
+    },
+    mosaic_prob=0.5,
+    transforms1=L(T.Mosaic)(output_size=320, probability=1.0),
+    transforms2=L(T.RandomZoomOut)(p=0.5),
+    transforms3=L(T.HSVJitter)(h=0.015, s=0.7, v=0.4),   # spec: HSVJitter early
+    transforms4=L(T.RandomHorizontalFlip)(flip_pairs=[[0, 1], [2, 3]]),  # 0-indexed pairs: TL↔TR (0↔1), BR↔BL (2↔3) — preserves corner semantics after h-flip
+    transforms5=L(T.ColorJitter)(),                        # upstream default; coexists with HSVJitter
+    transforms6=L(T.RandomResize)(sizes=_scales, max_size=_max_size),
+    transforms7=L(T.ToTensor)(),
+    transforms8=L(T.Normalize)(mean=[0, 0, 0], std=[1, 1, 1]),
+)
+# ---------------------------------------------------------------------------
+# Spec §: Dataset paths — relative to cwd when training runs (train-pose-estimation-v2/)
+# ---------------------------------------------------------------------------
+dataset_train.dataset.img_folder = "coco_dataset/train"
+dataset_train.dataset.ann_file = "coco_dataset/annotations/instances_train.json"
+dataset_val.dataset.img_folder = "coco_dataset/val"
+dataset_val.dataset.ann_file = "coco_dataset/annotations/instances_val.json"
+# Evaluator: point to our val annotations.
+# CocoEvaluator uses keypoints iou_type for pose eval.
+evaluator.ann_file = "coco_dataset/annotations/instances_val.json"
+evaluator.iou_types = ['keypoints']
+evaluator.useCats = True
+# Pass our 4-corner sigmas so pycocotools.COCOeval uses the right OKS scale
+# instead of the 17-person defaults. Without this, the eval truncates the
+# first 4 person sigmas (nose/l_eye/r_eye/l_ear) and all AP collapses to 0.
+# The CocoEvaluator constructor was patched (mtg-fork) to accept this kwarg.
+evaluator.kpt_oks_sigmas = oks_sigmas
+# Output directory for checkpoints
+training_params.output_dir = "output/mtg_card_4kp"

run_20260519_012809/eval/000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:755cca2d9cb0112e1682b5eb60f5007abf42892e58f3d4f73bdaee2529d8c734
+size 574357

run_20260519_012809/eval/latest.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d9d28b086f8dd9cf17520fa5547cdce4218eb5fee0ac86f735c2e0c023f0fe1
+size 573799

run_20260519_012809/log.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {"train_lr": 1.999999999999807e-05, "train_loss": 12.543476746572056, "train_loss_keypoints": 1.2924200675378752, "train_loss_keypoints_dn_0": 0.9205444374136824, "train_loss_keypoints_dn_pre": 0.9917099125578445, "train_loss_keypoints_enc_0": 2.873148705925127, "train_loss_keypoints_pre": 1.3559586186052888, "train_loss_oks": 0.3853887344912788, "train_loss_oks_dn_0": 0.5664547389831461, "train_loss_oks_dn_pre": 0.5764505134504863, "train_loss_oks_enc_0": 1.0937668736151316, "train_loss_oks_pre": 0.38551030588972934, "train_loss_vfl": 0.17695714877120275, "train_loss_vfl_dn_0": 0.533061496633356, "train_loss_vfl_dn_pre": 0.546968495083348, "train_loss_vfl_enc_0": 0.66637879066898, "train_loss_vfl_pre": 0.17875792909365126, "test_coco_eval_keypoints": [0.9900901282433162, 0.9900930550152051, 0.9900930550152051, -1.0, 0.9900901282433162, 0.9998699999999999, 0.9999, 0.9999, -1.0, 0.9998699999999999], "test_mtg_kp_l2_norm_per_corner": [0.0007670049089938402, 0.0008375109755434096, 0.0008375166798941791, 0.0008211143431253731], "test_mtg_kp_l2_norm_mean": 0.0008157867268892005, "epoch": 0, "n_parameters": 3601989, "now_time": "2026-05-19 03:46:01.686364", "epoch_time": "2:17:35"}
2	+ {"train_lr": 1.999999999999807e-05, "train_loss": 2.994915947076494, "train_loss_keypoints": 0.1010790098453219, "train_loss_keypoints_dn_0": 0.284792542535896, "train_loss_keypoints_dn_pre": 0.28494799813946237, "train_loss_keypoints_enc_0": 0.46997490368083944, "train_loss_keypoints_pre": 0.10092079869171004, "train_loss_oks": 0.02156980774322143, "train_loss_oks_dn_0": 0.14225913356885422, "train_loss_oks_dn_pre": 0.1424708259666102, "train_loss_oks_enc_0": 0.26801609149232236, "train_loss_oks_pre": 0.021543673624277265, "train_loss_vfl": 0.05088163607264508, "train_loss_vfl_dn_0": 0.3026131311032981, "train_loss_vfl_dn_pre": 0.3101332919516496, "train_loss_vfl_enc_0": 0.4422853824269553, "train_loss_vfl_pre": 0.05142771953485578, "test_coco_eval_keypoints": [0.9990016902607651, 0.9999910971215015, 0.9999910971215015, -1.0, 0.9990016902607651, 0.99999, 1.0, 1.0, -1.0, 0.99999], "test_mtg_kp_l2_norm_per_corner": [0.0006501294556073844, 0.0006766291335225105, 0.0006809880142100155, 0.0006852635415270925], "test_mtg_kp_l2_norm_mean": 0.0006732525362167507, "epoch": 1, "n_parameters": 3601989, "now_time": "2026-05-19 06:03:47.075803", "epoch_time": "2:17:45"}

run_20260519_012809/summary/events.out.tfevents.1779154095.palpatine.458155.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0360420cd4a5c28fe09343bd2fef9a5cd22e37a625824ff30c8b953f2542426
+size 5307086

run_20260519_012809/summary/events.out.tfevents.1779154095.palpatine.458156.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be9655c314b9859070066a62af066d51db7028b63ade3b74c0d36cc2fbedbff8
+size 88

run_20260519_012809/train.log ADDED Viewed

The diff for this file is too large to render. See raw diff