covtoken / data /masks.py
Chucks90's picture
covtoken: label-free lesion-subspace token economy (reframed) + gated eval + paper draft
3510f1d verified
Raw
History Blame Contribute Delete
1.92 kB
"""EVAL-ONLY lesion label/mask loading for LIDC-IDRI.
Every read here calls `assert_label_free(...)`, which raises if the caller is inside
`subspace_construction_guard()`. This makes it impossible for a lesion label/mask to
reach subspace construction without failing loudly (IMPLEMENTATION_SPEC §0.6, §5).
The LIDC manifest (Chucks90/eryon-data-pipelines, manifests/lidc/manifest_v1.1.0.jsonl)
carries per-slice annotations: `has_nodule`, `nodule_pixel_area`, `nodule_ids`,
`nodule_diameter_mm`, `label` ("tumor"/"normal"). These are evaluation ground truth ONLY.
"""
from __future__ import annotations
from dataclasses import dataclass
from .leak_guard import assert_label_free
@dataclass(frozen=True)
class SliceLabel:
slice_id: str
has_nodule: bool
nodule_pixel_area: float
nodule_diameter_mm: float | None
label: str # "tumor" | "normal"
def label_from_manifest_record(rec: dict) -> SliceLabel:
"""Construct an eval-only label from a manifest record. EVAL-ONLY."""
assert_label_free("LIDC slice label")
return SliceLabel(
slice_id=rec.get("slice_id", ""),
has_nodule=bool(rec.get("has_nodule", False)),
nodule_pixel_area=float(rec.get("nodule_pixel_area", 0) or 0),
nodule_diameter_mm=rec.get("nodule_diameter_mm"),
label=rec.get("label", "normal"),
)
def load_patch_mask(rec: dict, n_patches_side: int):
"""Return a per-patch lesion-membership mask for evaluation (Gate 1+). EVAL-ONLY.
Placeholder for the pixel→patch rasterization that Phase 1 evaluation will use
against held-out masks. Guarded so it can never be called during subspace fit.
"""
assert_label_free("LIDC patch-level lesion mask")
raise NotImplementedError(
"Patch-mask rasterization is implemented in Phase 1 (Gate 1 evaluation). "
"It requires nodule segmentation frames not present in the Phase 0 manifest."
)