Spaces:

OverMind0
/

shelf_demo

Running

App Files Files Community

OverMind0 commited on Feb 5

Commit

a840fa0

verified ·

1 Parent(s): ced6f9d

Upload 5 files

Browse files

Files changed (5) hide show

augmentation_embeddings.py +137 -0
best.pt +3 -0
processor.py +188 -0
requirements.txt +7 -0
router.py +143 -0

augmentation_embeddings.py ADDED Viewed

	@@ -0,0 +1,137 @@

+# -*- coding: utf-8 -*-
+"""Augmentation and embedding helpers."""
+from __future__ import annotations
+from typing import List, Dict, Tuple
+import random
+import numpy as np
+import torch
+from PIL import Image, ImageEnhance, ImageOps
+from transformers import AutoImageProcessor, AutoModel
+_DINO_PROCESSOR = None
+_DINO_MODEL = None
+def get_dino_model(device: torch.device):
+    global _DINO_PROCESSOR, _DINO_MODEL
+    if _DINO_PROCESSOR is None or _DINO_MODEL is None:
+        _DINO_PROCESSOR = AutoImageProcessor.from_pretrained("facebook/dinov2-small")
+        _DINO_MODEL = AutoModel.from_pretrained("facebook/dinov2-small").to(device)
+        _DINO_MODEL.eval()
+    return _DINO_PROCESSOR, _DINO_MODEL
+def augment_image(img: Image.Image) -> Image.Image:
+    aug = img.copy()
+    if random.random() < 0.5:
+        aug = ImageOps.mirror(aug)
+    angle = random.uniform(-10, 10)
+    aug = aug.rotate(angle, resample=Image.BILINEAR)
+    if random.random() < 0.7:
+        enhancer = ImageEnhance.Brightness(aug)
+        aug = enhancer.enhance(random.uniform(0.8, 1.2))
+    if random.random() < 0.7:
+        enhancer = ImageEnhance.Contrast(aug)
+        aug = enhancer.enhance(random.uniform(0.8, 1.2))
+    if random.random() < 0.5:
+        enhancer = ImageEnhance.Sharpness(aug)
+        aug = enhancer.enhance(random.uniform(0.9, 1.3))
+    return aug
+def extract_embedding_from_pil(image: Image.Image, device: torch.device) -> torch.Tensor:
+    processor, model = get_dino_model(device)
+    inputs = processor(images=image, return_tensors="pt").to(device)
+    with torch.no_grad():
+        outputs = model(**inputs)
+    emb = outputs.last_hidden_state[:, 0, :]
+    emb = torch.nn.functional.normalize(emb, p=2, dim=1)
+    return emb
+def build_reference_embeddings(
+    ref_images: List[Image.Image],
+    device: torch.device,
+    augmentations_per_image: int = 10,
+) -> torch.Tensor:
+    augmented_images: List[Image.Image] = []
+    for img in ref_images:
+        augmented_images.append(img)
+        for _ in range(augmentations_per_image):
+            augmented_images.append(augment_image(img))
+    ref_embeddings = []
+    for img in augmented_images:
+        ref_embeddings.append(extract_embedding_from_pil(img, device))
+    return torch.cat(ref_embeddings, dim=0)
+def adaptive_similarity_threshold(
+    similarities: List[Dict[str, float]],
+    percentile: int = 80,
+    std_factor: float = 0.5,
+    min_threshold: float = 0.7,
+) -> float:
+    sims = np.array([s["similarity"] for s in similarities])
+    if sims.size == 0:
+        return min_threshold
+    p_thresh = np.percentile(sims, percentile)
+    mean_thresh = sims.mean() + std_factor * sims.std()
+    return max(p_thresh, mean_thresh, min_threshold)
+def compute_similarities(
+    object_crops: Dict[int, Image.Image],
+    ref_embeddings: torch.Tensor,
+    device: torch.device,
+) -> List[Dict[str, float]]:
+    similarities = []
+    for i, crop in object_crops.items():
+        prod_emb = extract_embedding_from_pil(crop, device)
+        sim = torch.matmul(ref_embeddings, prod_emb.T).max().item()
+        similarities.append({"box_id": i, "similarity": sim})
+    similarities.sort(key=lambda x: x["similarity"], reverse=True)
+    return similarities
+def calculate_shelf_share(similarities: List[Dict[str, float]], boxes, threshold: float):
+    matched_area = 0
+    total_area = 0
+    stock_status = ""
+    for s in similarities:
+        x1, y1, x2, y2 = boxes[s["box_id"]]
+        area = (x2 - x1) * (y2 - y1)
+        total_area += area
+        if s["similarity"] >= threshold:
+            matched_area += area
+    share = matched_area / total_area if total_area > 0 else 0
+    if share > 0.9:
+        stock_status = "high"
+    elif share < 0.5:
+        stock_status = "low"
+    else:
+        stock_status = "medium"
+    return share, stock_status, total_area, matched_area

best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:731b75c451e7797635b321905e9304b98570c82960c9830a4ae58f43f0634101
+size 5358277

processor.py ADDED Viewed

	@@ -0,0 +1,188 @@

+# -*- coding: utf-8 -*-
+"""Shelf inventory processing utilities."""
+from __future__ import annotations
+from typing import List, Tuple, Dict, Any
+import numpy as np
+from PIL import Image, ImageDraw
+class ShelfInventoryProcessor:
+    def __init__(
+        self,
+        model,
+        overlap_threshold: float = 0.5,
+        min_box_height: int = 20,
+        min_items_per_shelf: int = 8,
+        merge_overlap_threshold: float = 0.3,
+    ) -> None:
+        self.model = model
+        self.overlap_threshold = overlap_threshold
+        self.min_box_height = min_box_height
+        self.min_items_per_shelf = min_items_per_shelf
+        self.merge_overlap_threshold = merge_overlap_threshold
+    @staticmethod
+    def vertical_overlap(range1: Tuple[float, float], range2: Tuple[float, float]) -> float:
+        inter = min(range1[1], range2[1]) - max(range1[0], range2[0])
+        if inter <= 0:
+            return 0.0
+        h1 = range1[1] - range1[0]
+        return inter / h1 if h1 > 0 else 0.0
+    def run_inference(self, image: Image.Image):
+        results = self.model(image, verbose=False)[0]
+        img = image.convert("RGB")
+        draw = ImageDraw.Draw(img)
+        if not results.boxes:
+            return None, img, draw
+        boxes = results.boxes.xyxy.cpu().numpy()
+        boxes = boxes[np.argsort(boxes[:, 1])]
+        return boxes, img, draw
+    def group_boxes_into_shelves(self, boxes: np.ndarray) -> List[List[np.ndarray]]:
+        shelves: List[List[np.ndarray]] = []
+        for box in boxes:
+            x1, y1, x2, y2 = box
+            box_h = y2 - y1
+            if box_h < self.min_box_height:
+                continue
+            matched = False
+            for shelf in shelves:
+                s_y1 = np.median([b[1] for b in shelf])
+                s_y2 = np.median([b[3] for b in shelf])
+                inter = min(y2, s_y2) - max(y1, s_y1)
+                overlap_ratio = inter / box_h if box_h > 0 else 0
+                if overlap_ratio > self.overlap_threshold:
+                    shelf.append(box)
+                    matched = True
+                    break
+            if not matched:
+                shelves.append([box])
+        return shelves
+    def build_shelf_objects(self, shelves: List[List[np.ndarray]]) -> List[Dict[str, Any]]:
+        shelf_objs: List[Dict[str, Any]] = []
+        for shelf in shelves:
+            ys = [b[1] for b in shelf] + [b[3] for b in shelf]
+            shelf_objs.append({"boxes": shelf, "y_range": (min(ys), max(ys))})
+        return shelf_objs
+    def merge_weak_shelves(self, shelf_objs: List[Dict[str, Any]]) -> List[List[np.ndarray]]:
+        merged: List[List[np.ndarray]] = []
+        used = [False] * len(shelf_objs)
+        for i in range(len(shelf_objs)):
+            if used[i]:
+                continue
+            cur_boxes = shelf_objs[i]["boxes"]
+            cur_range = shelf_objs[i]["y_range"]
+            for j in range(i + 1, len(shelf_objs)):
+                if used[j]:
+                    continue
+                overlap = self.vertical_overlap(cur_range, shelf_objs[j]["y_range"])
+                if (
+                    overlap > self.merge_overlap_threshold
+                    and (
+                        len(cur_boxes) < self.min_items_per_shelf
+                        or len(shelf_objs[j]["boxes"]) < self.min_items_per_shelf
+                    )
+                ):
+                    cur_boxes.extend(shelf_objs[j]["boxes"])
+                    used[j] = True
+            merged.append(cur_boxes)
+            used[i] = True
+        return merged
+    def annotate_and_build_metadata(self, shelves, draw: ImageDraw.ImageDraw):
+        final_boxes = []
+        shelf_metadata = []
+        avg_items = np.mean([len(s) for s in shelves]) if shelves else 1
+        for shelf_id, shelf in enumerate(shelves, start=1):
+            ys = [b[1] for b in shelf] + [b[3] for b in shelf]
+            min_y, max_y = min(ys), max(ys)
+            num_items = len(shelf)
+            confidence = round(num_items / avg_items, 2)
+            shelf_metadata.append(
+                {
+                    "shelf_id": shelf_id,
+                    "num_items": num_items,
+                    "y_range": (int(min_y), int(max_y)),
+                    "confidence": confidence,
+                    "status": "stable" if confidence >= 0.5 else "unstable",
+                }
+            )
+            for b in shelf:
+                draw.rectangle([b[0], b[1], b[2], b[3]], outline="red", width=3)
+                draw.text((b[0], b[1] - 10), f"S{shelf_id}", fill="red")
+                final_boxes.append(b)
+        return final_boxes, shelf_metadata
+    def crop_annotated_image_by_object(
+        self,
+        annotated_img: Image.Image,
+        boxes: List[np.ndarray],
+        box_id: int | None = None,
+        padding: int = 5,
+    ):
+        width, height = annotated_img.size
+        def _safe_crop(x1, y1, x2, y2):
+            x1 = max(0, int(x1 - padding))
+            y1 = max(0, int(y1 - padding))
+            x2 = min(width, int(x2 + padding))
+            y2 = min(height, int(y2 + padding))
+            return annotated_img.crop((x1, y1, x2, y2))
+        if box_id is not None:
+            if box_id < 0 or box_id >= len(boxes):
+                raise IndexError(f"Box ID {box_id} out of range")
+            x1, y1, x2, y2 = boxes[box_id]
+            return _safe_crop(x1, y1, x2, y2)
+        cropped = {}
+        for i, (x1, y1, x2, y2) in enumerate(boxes):
+            cropped[i] = _safe_crop(x1, y1, x2, y2)
+        return cropped
+    def run(self, image: Image.Image):
+        boxes, img, draw = self.run_inference(image)
+        if boxes is None:
+            return [], [], 0, img
+        shelves = self.group_boxes_into_shelves(boxes)
+        shelf_objs = self.build_shelf_objects(shelves)
+        merged_shelves = self.merge_weak_shelves(shelf_objs)
+        final_boxes, shelf_metadata = self.annotate_and_build_metadata(
+            merged_shelves, draw
+        )
+        return final_boxes, shelf_metadata, len(merged_shelves), img

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio>=4.0.0
+ultralytics>=8.0.0
+torch
+torchvision
+transformers>=4.38.0
+pillow
+numpy

router.py ADDED Viewed

	@@ -0,0 +1,143 @@

+# -*- coding: utf-8 -*-
+"""Gradio router for shelf analysis."""
+from __future__ import annotations
+from pathlib import Path
+from typing import List
+import sys
+import gradio as gr
+import torch
+from PIL import Image, ImageDraw
+from ultralytics import YOLO
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+sys.path.append(str(PROJECT_ROOT))
+from src.processor import ShelfInventoryProcessor
+from src.augmentation_embeddings import (
+    build_reference_embeddings,
+    compute_similarities,
+    adaptive_similarity_threshold,
+    calculate_shelf_share,
+)
+MODEL_PATH = PROJECT_ROOT / "models" / "best.pt"
+def get_device() -> torch.device:
+    return torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def load_model() -> YOLO:
+    if not MODEL_PATH.exists():
+        raise FileNotFoundError(f"Model not found at {MODEL_PATH}")
+    return YOLO(str(MODEL_PATH))
+MODEL = load_model()
+PROCESSOR = ShelfInventoryProcessor(model=MODEL)
+def _load_reference_images(reference_paths: List[str]) -> List[Image.Image]:
+    images: List[Image.Image] = []
+    for path in reference_paths:
+        img = Image.open(path).convert("RGB")
+        images.append(img)
+    return images
+def _build_facing_text(shelf_metadata, shelf_share: float) -> str:
+    if not shelf_metadata:
+        return "facing: no shelves detected"
+    best_shelf = max(shelf_metadata, key=lambda s: s["num_items"])
+    label = "very good place" if shelf_share >= 0.7 else "needs attention"
+    return f"facing: shelf {best_shelf['shelf_id']} {label}"
+def analyze_shelf(shelf_image: Image.Image, reference_files: List[str]):
+    if shelf_image is None:
+        return "Please upload a shelf photo.", None
+    if not reference_files:
+        return "Please upload at least one reference photo.", None
+    device = get_device()
+    boxes, metadata, _shelf_count, annotated_img = PROCESSOR.run(shelf_image)
+    if not boxes:
+        return "No products detected.", annotated_img
+    object_crops = PROCESSOR.crop_annotated_image_by_object(shelf_image, boxes)
+    ref_images = _load_reference_images(reference_files)
+    ref_embeddings = build_reference_embeddings(ref_images, device)
+    similarities = compute_similarities(object_crops, ref_embeddings, device)
+    if not similarities:
+        return "No matches found.", annotated_img
+    threshold = adaptive_similarity_threshold(similarities)
+    shelf_share, stock_status, total_area, matched_area = calculate_shelf_share(
+        similarities, boxes, threshold
+    )
+    facing_text = _build_facing_text(metadata, shelf_share)
+    result_lines = [
+        f"Shelf Share: {shelf_share * 100:.2f}%",
+        facing_text,
+        f"Stock Status: {stock_status}",
+        f"Matched Area: {matched_area:.0f} px² / Total Shelf Area: {total_area:.0f} px²",
+    ]
+    annotated = shelf_image.copy()
+    draw = ImageDraw.Draw(annotated)
+    for s in similarities:
+        if s["similarity"] < threshold:
+            continue
+        x1, y1, x2, y2 = map(int, boxes[s["box_id"]])
+        draw.rectangle([x1, y1, x2, y2], outline="green", width=3)
+        draw.text((x1, max(y1 - 12, 0)), f"{s['similarity']:.2f}", fill="green")
+    return "\n".join(result_lines), annotated
+def build_app():
+    with gr.Blocks(title="Shelf Analysis") as demo:
+        gr.Markdown("# Shelf Analysis")
+        gr.Markdown(
+            "Upload a shelf photo and one or more reference product photos."
+        )
+        with gr.Row():
+            shelf_input = gr.Image(type="pil", label="Shelf Photo")
+            ref_input = gr.File(
+                file_types=["image"],
+                file_count="multiple",
+                type="filepath",
+                label="Reference Photos",
+            )
+        with gr.Row():
+            output_text = gr.Textbox(label="Results", lines=6)
+            output_image = gr.Image(type="pil", label="Annotated Matches")
+        analyze_btn = gr.Button("Analyze")
+        analyze_btn.click(
+            fn=analyze_shelf,
+            inputs=[shelf_input, ref_input],
+            outputs=[output_text, output_image],
+        )
+    return demo
+if __name__ == "__main__":
+    app = build_app()
+    app.launch()