Spaces:

DatSplit
/

FashionVeil

Sleeping

App Files Files Community

DatSplit commited on Aug 25, 2025

Commit

cbda6dd

verified ·

1 Parent(s): 11defca

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -3

app.py CHANGED Viewed

@@ -9,8 +9,76 @@ import torch
 from PIL import Image, ImageColor
 from torchvision.utils import draw_bounding_boxes
 import rfdetr.datasets.transforms as T
 def process_categories() -> tuple:
     with open("categories.json") as fp:
         categories = json.load(fp)
@@ -79,10 +147,33 @@ def inference(image_path, model_name, bbox_threshold):
     )
     ort_inputs = {ort_session.get_inputs()[0].name: tensor_img.cpu().numpy()}
-    ort_outs = ort_session.run(None, ort_inputs)
-    boxes, labels, scores = ort_outs
-    return draw_predictions(boxes, labels, scores, torch.from_numpy(np.array(img)), score_threshold=bbox_threshold)

 from PIL import Image, ImageColor
 from torchvision.utils import draw_bounding_boxes
 import rfdetr.datasets.transforms as T
+from torchvision.ops import box_convert
+def _box_yxyx_to_xyxy(boxes: torch.Tensor) -> torch.Tensor:
+    """Convert bounding boxes from (y1, x1, y2, x2) format to (x1, y1, x2, y2) format.
+    Args:
+        boxes (torch.Tensor): A tensor of bounding boxes in the (y1, x1, y2, x2) format.
+    Returns:
+        torch.Tensor: A tensor of bounding boxes in the (x1, y1, x2, y2) format.
+    """
+    y1, x1, y2, x2 = boxes.unbind(-1)
+    boxes = torch.stack((x1, y1, x2, y2), dim=-1)
+    return boxes
+def _box_xyxy_to_yxyx(boxes: torch.Tensor) -> torch.Tensor:
+    """Convert bounding boxes from (x1, y1, x2, y2) format to (y1, x1, y2, x2) format.
+    Args:
+        boxes (torch.Tensor): A tensor of bounding boxes in the (x1, y1, x2, y2) format.
+    Returns:
+        torch.Tensor: A tensor of bounding boxes in the (y1, x1, y2, x2) format.
+    """
+    x1, y1, x2, y2 = boxes.unbind(-1)
+    boxes = torch.stack((y1, x1, y2, x2), dim=-1)
+    return boxes
+# Adapted from: https://github.com/pytorch/vision/blob/main/torchvision/ops/boxes.py#L168
+def extended_box_convert(
+    boxes: torch.Tensor, in_fmt: str, out_fmt: str
+) -> torch.Tensor:
+    """
+    Converts boxes from given in_fmt to out_fmt.
+    Supported in_fmt and out_fmt are:
+        - 'xyxy': boxes are represented via corners, x1, y1 being top left and x2, y2 being bottom right. This is the format that torchvision utilities expect.
+        - 'xywh' : boxes are represented via corner, width and height, x1, y2 being top left, w, h being width and height.
+        - 'cxcywh' : boxes are represented via centre, width and height, cx, cy being center of box, w, h being width and height.
+        - 'yxyx': boxes are represented via corners, y1, x1 being top left and y2, x2 being bottom right. This is the format that `amrcnn` model outputs.
+    Args:
+        boxes (Tensor[N, 4]): boxes which will be converted.
+        in_fmt (str): Input format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh', 'yxyx'].
+        out_fmt (str): Output format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh', 'yxyx'].
+    Returns:
+        Tensor[N, 4]: Boxes into converted format.
+    """
+    if in_fmt == "yxyx":
+        # Convert to xyxy and assign in_fmt accordingly
+        boxes = _box_yxyx_to_xyxy(boxes)
+        in_fmt = "xyxy"
+    if out_fmt == "yxyx":
+        # Convert to xyxy if not already in that format
+        if in_fmt != "xyxy":
+            boxes = box_convert(boxes, in_fmt=in_fmt, out_fmt="xyxy")
+        # Convert to yxyx
+        boxes = _box_xyxy_to_yxyx(boxes)
+    else:
+        # Use torchvision's box_convert for other conversions
+        boxes = box_convert(boxes, in_fmt=in_fmt, out_fmt=out_fmt)
+    return boxes
 def process_categories() -> tuple:
     with open("categories.json") as fp:
         categories = json.load(fp)
     )
     ort_inputs = {ort_session.get_inputs()[0].name: tensor_img.cpu().numpy()}
+    pred_boxes, logits = ort_session.run(['dets', 'labels'], ort_inputs)
+    scores = torch.sigmoid(torch.from_numpy(logits))
+    max_scores, pred_labels = scores.max(-1)
+    mask = max_scores > bbox_threshold
+    pred_boxes = torch.from_numpy(pred_boxes[0])
+    image_w, image_h = img.size
+    pred_boxes_abs = pred_boxes.clone()
+    pred_boxes_abs[:, 0] *= image_w
+    pred_boxes_abs[:, 1] *= image_h
+    pred_boxes_abs[:, 2] *= image_w
+    pred_boxes_abs[:, 3] *= image_h
+    mask = mask.squeeze(0)
+    filtered_boxes = extended_box_convert(
+        pred_boxes_abs[mask], in_fmt="cxcywh", out_fmt="xyxy"
+    )
+    filtered_scores = max_scores.squeeze(0)[mask]
+    filtered_labels = pred_labels.squeeze(0)[mask]
+    img_tensor = torch.from_numpy(np.array(img)).permute(2, 0, 1)
+    return draw_predictions(filtered_boxes, filtered_labels, filtered_scores, img_tensor, score_threshold=bbox_threshold)