carpedm20
/

seg

Model card Files Files and versions

xet

Community

carpedm20 commited on Jan 6

Commit

3dc508a

verified ·

1 Parent(s): f6e4f38

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +58 -124

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
-import cv2
 import gradio as gr
 import numpy as np
 import torch
@@ -9,7 +8,7 @@ from PIL import Image
 import tempfile
 from gradio.themes.utils import sizes
-from classes_and_palettes import GOLIATH_PALETTE, GOLIATH_CLASSES
 # =========================================================
@@ -34,32 +33,22 @@ class ModelManager:
     _cache = {}
     @staticmethod
-    def load_model(checkpoint_name: str):
-        if checkpoint_name in ModelManager._cache:
-            return ModelManager._cache[checkpoint_name]
-        checkpoint_path = os.path.join(
-            Config.CHECKPOINTS_DIR,
-            Config.CHECKPOINTS[checkpoint_name],
-        )
-        model = torch.jit.load(checkpoint_path)
-        model.eval()
-        model.to("cuda")
-        ModelManager._cache[checkpoint_name] = model
         return model
     @staticmethod
     @torch.inference_mode()
-    def run_model(model, input_tensor, height, width):
-        output = model(input_tensor)
-        output = F.interpolate(
-            output,
-            size=(height, width),
-            mode="bilinear",
-            align_corners=False,
-        )
-        _, preds = torch.max(output, 1)
-        return preds
 # =========================================================
@@ -68,7 +57,7 @@ class ModelManager:
 class ImageProcessor:
     def __init__(self):
-        self.transform_fn = transforms.Compose([
             transforms.Resize((1024, 768)),
             transforms.ToTensor(),
             transforms.Normalize(
@@ -77,40 +66,37 @@ class ImageProcessor:
             ),
         ])
-    def process_image(self, image: Image.Image, model_name: str):
         model = ModelManager.load_model(model_name)
-        input_tensor = self.transform_fn(image).unsqueeze(0).to("cuda")
-        preds = ModelManager.run_model(
-            model,
-            input_tensor,
-            image.height,
-            image.width,
-        )
-        mask = preds.squeeze(0).cpu().numpy()
-        blended_image = self.visualize_pred_with_overlay(image, mask)
         npy_path = tempfile.mktemp(suffix=".npy")
         np.save(npy_path, mask)
-        return blended_image, npy_path
-    @staticmethod
-    def visualize_pred_with_overlay(img, sem_seg, alpha=0.5):
-        img_np = np.array(img.convert("RGB"))
-        sem_seg = np.array(sem_seg)
-        num_classes = len(GOLIATH_CLASSES)
-        ids = np.unique(sem_seg)
-        ids = ids[ids < num_classes]
-        overlay = np.zeros((*sem_seg.shape, 3), dtype=np.uint8)
-        for label in ids:
-            overlay[sem_seg == label] = GOLIATH_PALETTE[label]
-        blended = np.uint8(img_np * (1 - alpha) + overlay * alpha)
-        return Image.fromarray(blended)
 # =========================================================
@@ -119,12 +105,9 @@ class ImageProcessor:
 class GradioInterface:
     def __init__(self):
-        self.image_processor = ImageProcessor()
-    def create_interface(self):
-        # -------------------------
-        # Theme (modern Gradio)
-        # -------------------------
         theme = gr.themes.Soft(
             primary_hue="neutral",
             secondary_hue="slate",
@@ -135,28 +118,14 @@ class GradioInterface:
             body_background_fill="#1a1a1a",
             body_text_color="#fafafa",
             block_background_fill="#2a2a2a",
-            block_border_color="#333333",
-            button_primary_background_fill="#4a4a4a",
-            button_primary_background_fill_hover="#5a5a5a",
-            input_background_fill="#3a3a3a",
         )
-        # -------------------------
-        # Minimal CSS (layout only)
-        # -------------------------
         css = """
-        .image-preview img {
-            max-width: 512px;
-            max-height: 512px;
-            margin: 0 auto;
-            display: block;
-            object-fit: contain;
-            border-radius: 6px;
-        }
         .app-header {
             padding: 24px;
-            margin-bottom: 24px;
             text-align: center;
         }
         .app-title {
             font-size: 48px;
@@ -166,44 +135,27 @@ class GradioInterface:
             font-size: 24px;
             opacity: 0.9;
         }
-        .publication-links {
-            display: flex;
-            justify-content: center;
-            flex-wrap: wrap;
-            gap: 8px;
-            margin-top: 12px;
-        }
         """
-        header_html = """
         <div class="app-header">
-            <h1 class="app-title">Sapiens: Body-Part Segmentation</h1>
             <h2 class="app-subtitle">ECCV 2024 (Oral)</h2>
-            <p>
-                Foundation models for human-centric vision tasks pretrained on
-                300M human images. This demo showcases fine-tuned body-part
-                segmentation.
-            </p>
-            <div class="publication-links">
-                <a href="https://arxiv.org/abs/2408.12569">arXiv</a>
-                <a href="https://github.com/facebookresearch/sapiens">GitHub</a>
-                <a href="https://about.meta.com/realitylabs/codecavatars/sapiens/">Meta</a>
-            </div>
         </div>
         """
-        def process(image, model_name):
-            return self.image_processor.process_image(image, model_name)
         with gr.Blocks(theme=theme, css=css) as demo:
-            gr.HTML(header_html)
             with gr.Row():
-                with gr.Column():
                     input_image = gr.Image(
                         label="Input Image",
                         type="pil",
-                        elem_classes="image-preview",
                     )
                     model_name = gr.Dropdown(
@@ -212,37 +164,21 @@ class GradioInterface:
                         value="1b",
                     )
-                    gr.Examples(
-                        inputs=input_image,
-                        examples=[
-                            os.path.join(Config.ASSETS_DIR, "images", img)
-                            for img in os.listdir(
-                                os.path.join(Config.ASSETS_DIR, "images")
-                            )
-                        ],
-                        examples_per_page=14,
-                    )
-                with gr.Column():
-                    result_image = gr.Image(
                         label="Segmentation Result",
-                        type="pil",
-                        elem_classes="image-preview",
-                    )
-                    npy_output = gr.File(label="Segmentation (.npy)")
-                    run_button = gr.Button("Run", variant="primary")
-                    gr.Image(
-                        os.path.join(Config.ASSETS_DIR, "palette.jpg"),
-                        label="Class Palette",
-                        type="filepath",
-                        elem_classes="image-preview",
                     )
-            run_button.click(
-                fn=process,
                 inputs=[input_image, model_name],
-                outputs=[result_image, npy_output],
             )
         return demo
@@ -257,11 +193,9 @@ def main():
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
-    interface = GradioInterface()
-    demo = interface.create_interface()
-    demo.launch(server_name="0.0.0.0", share=False)
 if __name__ == "__main__":
     main()

 import os
 import gradio as gr
 import numpy as np
 import torch
 import tempfile
 from gradio.themes.utils import sizes
+from classes_and_palettes import GOLIATH_CLASSES
 # =========================================================
     _cache = {}
     @staticmethod
+    def load_model(name: str):
+        if name in ModelManager._cache:
+            return ModelManager._cache[name]
+        path = os.path.join(Config.CHECKPOINTS_DIR, Config.CHECKPOINTS[name])
+        model = torch.jit.load(path)
+        model.eval().to("cuda")
+        ModelManager._cache[name] = model
         return model
     @staticmethod
     @torch.inference_mode()
+    def run(model, x, h, w):
+        out = model(x)
+        out = F.interpolate(out, size=(h, w), mode="bilinear", align_corners=False)
+        return out.argmax(1)
 # =========================================================
 class ImageProcessor:
     def __init__(self):
+        self.tf = transforms.Compose([
             transforms.Resize((1024, 768)),
             transforms.ToTensor(),
             transforms.Normalize(
             ),
         ])
+    def process(self, image: Image.Image, model_name: str):
         model = ModelManager.load_model(model_name)
+        x = self.tf(image).unsqueeze(0).to("cuda")
+        pred = ModelManager.run(model, x, image.height, image.width)
+        mask = pred.squeeze(0).cpu().numpy()
+        # Save raw mask
         npy_path = tempfile.mktemp(suffix=".npy")
         np.save(npy_path, mask)
+        # Build AnnotatedImage output
+        annotations = self._build_annotations(mask)
+        return (image, annotations), npy_path
+    def _build_annotations(self, mask: np.ndarray):
+        annotations = []
+        for class_id in np.unique(mask):
+            if class_id >= len(GOLIATH_CLASSES):
+                continue
+            binary_mask = (mask == class_id).astype(np.uint8)
+            if binary_mask.sum() == 0:
+                continue
+            annotations.append(
+                (binary_mask, GOLIATH_CLASSES[class_id])
+            )
+        return annotations
 # =========================================================
 class GradioInterface:
     def __init__(self):
+        self.processor = ImageProcessor()
+    def create(self):
         theme = gr.themes.Soft(
             primary_hue="neutral",
             secondary_hue="slate",
             body_background_fill="#1a1a1a",
             body_text_color="#fafafa",
             block_background_fill="#2a2a2a",
+            block_border_color="#333",
         )
         css = """
         .app-header {
             padding: 24px;
             text-align: center;
+            margin-bottom: 24px;
         }
         .app-title {
             font-size: 48px;
             font-size: 24px;
             opacity: 0.9;
         }
         """
+        header = """
         <div class="app-header">
+            <h1 class="app-title">Sapiens Body-Part Segmentation</h1>
             <h2 class="app-subtitle">ECCV 2024 (Oral)</h2>
+            <p>Foundation model fine-tuned for dense human part segmentation.</p>
         </div>
         """
+        def run(image, model):
+            return self.processor.process(image, model)
         with gr.Blocks(theme=theme, css=css) as demo:
+            gr.HTML(header)
             with gr.Row():
+                with gr.Column(scale=1):
                     input_image = gr.Image(
                         label="Input Image",
                         type="pil",
                     )
                     model_name = gr.Dropdown(
                         value="1b",
                     )
+                    run_btn = gr.Button("Run Segmentation", variant="primary")
+                with gr.Column(scale=2):
+                    annotated = gr.AnnotatedImage(
                         label="Segmentation Result",
+                        show_legend=True,
+                        height=512,
                     )
+                    mask_file = gr.File(label="Raw Mask (.npy)")
+            run_btn.click(
+                fn=run,
                 inputs=[input_image, model_name],
+                outputs=[annotated, mask_file],
             )
         return demo
         torch.backends.cuda.matmul.allow_tf32 = True
         torch.backends.cudnn.allow_tf32 = True
+    app = GradioInterface().create()
+    app.launch(server_name="0.0.0.0", share=False)
 if __name__ == "__main__":
     main()