Spaces:

raidium
/

curia

Sleeping

App Files Files Community

SovanK commited on Nov 12

Commit

40b84df

1 Parent(s): 0fddfa1

fix: inference, add mask

Browse files

Files changed (2) hide show

app.py +1 -1
inference.py +84 -2

app.py CHANGED Viewed

@@ -415,7 +415,7 @@ def run_inference(
     try:
         image = image_state["image"]
-        probs = infer_image(image, head)
         # Use id_to_labels.json mapping, fall back to model config if not available
         id2label = load_id_to_labels().get(head, {})

     try:
         image = image_state["image"]
+        probs = infer_image(image, head, image_state.get("mask"))
         # Use id_to_labels.json mapping, fall back to model config if not available
         id2label = load_id_to_labels().get(head, {})

inference.py CHANGED Viewed

@@ -12,16 +12,92 @@ import pandas as pd
 import torch
 from datasets import Dataset, DatasetDict, load_dataset
 from PIL import Image
 from transformers import (
     AutoImageProcessor,
     AutoModelForImageClassification,
 )
 HF_REPO_ID = "raidium/curia"
 HF_DATASET_ID = "raidium/CuriaBench"
 @lru_cache(maxsize=1)
 def load_id_to_labels() -> Dict[str, Dict[str, str]]:
     """Load the id_to_labels.json mapping file."""
@@ -37,7 +113,9 @@ def load_id_to_labels() -> Dict[str, Dict[str, str]]:
 @lru_cache(maxsize=1)
 def load_processor() -> AutoImageProcessor:
     token = os.environ.get("HF_TOKEN")
-    return AutoImageProcessor.from_pretrained(HF_REPO_ID, trust_remote_code=True, token=token)
 @lru_cache(maxsize=None)
@@ -88,11 +166,15 @@ def to_numpy_image(image: Any) -> np.ndarray:
 def infer_image(
     image: np.ndarray,
     head: str,
 ) -> torch.Tensor:
     processor = load_processor()
     model = load_model(head)
     with torch.no_grad():
         processed = processor(images=image, return_tensors="pt")
         outputs = model(**processed)
         logits = outputs["logits"]
         probs = torch.nn.functional.softmax(logits[0], dim=-1)

 import torch
 from datasets import Dataset, DatasetDict, load_dataset
 from PIL import Image
+from torchvision import transforms
+from torchvision.transforms import functional as TF
 from transformers import (
     AutoImageProcessor,
     AutoModelForImageClassification,
 )
 HF_REPO_ID = "raidium/curia"
 HF_DATASET_ID = "raidium/CuriaBench"
+class _NumpyToTensor:
+    """Convert numpy arrays to tensors while preserving tensors/images."""
+    def __call__(self, value: Any) -> torch.Tensor:
+        if isinstance(value, (torch.Tensor, Image.Image)):
+            return value  # type: ignore[return-value]
+        return torch.tensor(value).unsqueeze(0)
+class AdaptativeResizeMask(torch.nn.Module):
+    """Resize binary masks with a fallback threshold to avoid empty masks."""
+    def __init__(self, target_size: int = 512, initial_threshold: float = 0.5) -> None:
+        super().__init__()
+        self.target_size = target_size
+        self.initial_threshold = initial_threshold
+    def forward(self, mask: torch.Tensor) -> torch.Tensor:  # type: ignore[override]
+        mask = mask.to(dtype=torch.float32)
+        resized = TF.resize(
+            mask,
+            (self.target_size, self.target_size),
+            interpolation=TF.InterpolationMode.BILINEAR,
+            antialias=True,
+        )
+        binary = resized > self.initial_threshold
+        if binary.sum() == 0:
+            new_threshold = torch.max(resized) * 0.5
+            binary = resized > new_threshold
+        return binary.to(dtype=torch.float32)
+@lru_cache(maxsize=1)
+def make_mask_transform(crop_size: int = 512) -> transforms.Compose:
+    """Return the resize transform used during training/inference."""
+    return transforms.Compose(
+        [
+            _NumpyToTensor(),
+            AdaptativeResizeMask(target_size=crop_size),
+        ]
+    )
+def prepare_mask_for_model(mask: Any) -> Optional[torch.Tensor]:
+    """Apply Curia's mask preprocessing so heads get the ROI they expect."""
+    if mask is None:
+        return None
+    mask_transform = make_mask_transform()
+    try:
+        mask_arr = np.array(mask)
+    except Exception:
+        return None
+    if mask_arr.size == 0:
+        return None
+    if mask_arr.ndim == 3:
+        tensor = mask_transform(mask_arr.transpose(2, 0, 1))
+        # Match the shape produced in simple_test.py so the model receives
+        # (batch, height, width, channels) style tensors.
+        tensor = tensor.transpose(1, 3).transpose(1, 2)
+    else:
+        tensor = mask_transform(torch.tensor([mask_arr]))
+        tensor = tensor.unsqueeze(0)
+    if isinstance(tensor, np.ndarray):
+        tensor = torch.from_numpy(tensor)
+    return tensor
 @lru_cache(maxsize=1)
 def load_id_to_labels() -> Dict[str, Dict[str, str]]:
     """Load the id_to_labels.json mapping file."""
 @lru_cache(maxsize=1)
 def load_processor() -> AutoImageProcessor:
     token = os.environ.get("HF_TOKEN")
+    return AutoImageProcessor.from_pretrained(
+        HF_REPO_ID, trust_remote_code=True, token=token
+    )
 @lru_cache(maxsize=None)
 def infer_image(
     image: np.ndarray,
     head: str,
+    mask: Any | None = None,
 ) -> torch.Tensor:
     processor = load_processor()
     model = load_model(head)
     with torch.no_grad():
         processed = processor(images=image, return_tensors="pt")
+        mask_tensor = prepare_mask_for_model(mask)
+        if mask_tensor is not None:
+            processed["mask"] = mask_tensor
         outputs = model(**processed)
         logits = outputs["logits"]
         probs = torch.nn.functional.softmax(logits[0], dim=-1)