carpedm20 commited on Jan 6

Commit

f6e4f38

verified ·

1 Parent(s): 1879f62

Upload folder using huggingface_hub

Browse files

Files changed (24) hide show

.gitattributes +3 -0
README.md +13 -0
app.py +267 -0
assets/checkpoints/sapiens_0.3b_goliath_best_goliath_mIoU_7673_epoch_194_torchscript.pt2 +3 -0
assets/checkpoints/sapiens_0.6b_goliath_best_goliath_mIoU_7777_epoch_178_torchscript.pt2 +3 -0
assets/checkpoints/sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2 +3 -0
assets/checkpoints/sapiens_2b_goliath_best_goliath_mIoU_8131_epoch_200_torchscript.pt2 +3 -0
assets/images/68204.png +3 -0
assets/images/68210.png +3 -0
assets/images/68658.png +3 -0
assets/images/68666.png +3 -0
assets/images/68691.png +3 -0
assets/images/68956.png +3 -0
assets/images/pexels-amresh444-17315601.png +3 -0
assets/images/pexels-gabby-k-6311686.png +3 -0
assets/images/pexels-julia-m-cameron-4145040.png +3 -0
assets/images/pexels-marcus-aurelius-6787357.png +3 -0
assets/images/pexels-mo-saeed-3616599-5409085.png +3 -0
assets/images/pexels-riedelmax-27355495.png +3 -0
assets/images/pexels-sergeymakashin-5368660.png +3 -0
assets/images/pexels-vinicius-wiesehofer-289347-4219918.png +3 -0
assets/palette.jpg +3 -0
classes_and_palettes.py +93 -0
requirements.txt +8 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.pt2 filter=lfs diff=lfs merge=lfs -text
+assets/palette.jpg filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Sapiens Segmentation
+emoji: 🌍
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 4.42.0
+app_file: app.py
+pinned: false
+license: cc-by-nc-4.0
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,267 @@

+import os
+import cv2
+import gradio as gr
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torchvision import transforms
+from PIL import Image
+import tempfile
+from gradio.themes.utils import sizes
+from classes_and_palettes import GOLIATH_PALETTE, GOLIATH_CLASSES
+# =========================================================
+# Config
+# =========================================================
+class Config:
+    ASSETS_DIR = os.path.join(os.path.dirname(__file__), "assets")
+    CHECKPOINTS_DIR = os.path.join(ASSETS_DIR, "checkpoints")
+    CHECKPOINTS = {
+        "0.3b": "sapiens_0.3b_goliath_best_goliath_mIoU_7673_epoch_194_torchscript.pt2",
+        "0.6b": "sapiens_0.6b_goliath_best_goliath_mIoU_7777_epoch_178_torchscript.pt2",
+        "1b": "sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2",
+    }
+# =========================================================
+# Model
+# =========================================================
+class ModelManager:
+    _cache = {}
+    @staticmethod
+    def load_model(checkpoint_name: str):
+        if checkpoint_name in ModelManager._cache:
+            return ModelManager._cache[checkpoint_name]
+        checkpoint_path = os.path.join(
+            Config.CHECKPOINTS_DIR,
+            Config.CHECKPOINTS[checkpoint_name],
+        )
+        model = torch.jit.load(checkpoint_path)
+        model.eval()
+        model.to("cuda")
+        ModelManager._cache[checkpoint_name] = model
+        return model
+    @staticmethod
+    @torch.inference_mode()
+    def run_model(model, input_tensor, height, width):
+        output = model(input_tensor)
+        output = F.interpolate(
+            output,
+            size=(height, width),
+            mode="bilinear",
+            align_corners=False,
+        )
+        _, preds = torch.max(output, 1)
+        return preds
+# =========================================================
+# Image Processing
+# =========================================================
+class ImageProcessor:
+    def __init__(self):
+        self.transform_fn = transforms.Compose([
+            transforms.Resize((1024, 768)),
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=[123.5 / 255, 116.5 / 255, 103.5 / 255],
+                std=[58.5 / 255, 57.0 / 255, 57.5 / 255],
+            ),
+        ])
+    def process_image(self, image: Image.Image, model_name: str):
+        model = ModelManager.load_model(model_name)
+        input_tensor = self.transform_fn(image).unsqueeze(0).to("cuda")
+        preds = ModelManager.run_model(
+            model,
+            input_tensor,
+            image.height,
+            image.width,
+        )
+        mask = preds.squeeze(0).cpu().numpy()
+        blended_image = self.visualize_pred_with_overlay(image, mask)
+        npy_path = tempfile.mktemp(suffix=".npy")
+        np.save(npy_path, mask)
+        return blended_image, npy_path
+    @staticmethod
+    def visualize_pred_with_overlay(img, sem_seg, alpha=0.5):
+        img_np = np.array(img.convert("RGB"))
+        sem_seg = np.array(sem_seg)
+        num_classes = len(GOLIATH_CLASSES)
+        ids = np.unique(sem_seg)
+        ids = ids[ids < num_classes]
+        overlay = np.zeros((*sem_seg.shape, 3), dtype=np.uint8)
+        for label in ids:
+            overlay[sem_seg == label] = GOLIATH_PALETTE[label]
+        blended = np.uint8(img_np * (1 - alpha) + overlay * alpha)
+        return Image.fromarray(blended)
+# =========================================================
+# UI
+# =========================================================
+class GradioInterface:
+    def __init__(self):
+        self.image_processor = ImageProcessor()
+    def create_interface(self):
+        # -------------------------
+        # Theme (modern Gradio)
+        # -------------------------
+        theme = gr.themes.Soft(
+            primary_hue="neutral",
+            secondary_hue="slate",
+            neutral_hue="zinc",
+            radius_size=sizes.radius_md,
+            text_size=sizes.text_md,
+        ).set(
+            body_background_fill="#1a1a1a",
+            body_text_color="#fafafa",
+            block_background_fill="#2a2a2a",
+            block_border_color="#333333",
+            button_primary_background_fill="#4a4a4a",
+            button_primary_background_fill_hover="#5a5a5a",
+            input_background_fill="#3a3a3a",
+        )
+        # -------------------------
+        # Minimal CSS (layout only)
+        # -------------------------
+        css = """
+        .image-preview img {
+            max-width: 512px;
+            max-height: 512px;
+            margin: 0 auto;
+            display: block;
+            object-fit: contain;
+            border-radius: 6px;
+        }
+        .app-header {
+            padding: 24px;
+            margin-bottom: 24px;
+            text-align: center;
+        }
+        .app-title {
+            font-size: 48px;
+            font-weight: 700;
+        }
+        .app-subtitle {
+            font-size: 24px;
+            opacity: 0.9;
+        }
+        .publication-links {
+            display: flex;
+            justify-content: center;
+            flex-wrap: wrap;
+            gap: 8px;
+            margin-top: 12px;
+        }
+        """
+        header_html = """
+        <div class="app-header">
+            <h1 class="app-title">Sapiens: Body-Part Segmentation</h1>
+            <h2 class="app-subtitle">ECCV 2024 (Oral)</h2>
+            <p>
+                Foundation models for human-centric vision tasks pretrained on
+                300M human images. This demo showcases fine-tuned body-part
+                segmentation.
+            </p>
+            <div class="publication-links">
+                <a href="https://arxiv.org/abs/2408.12569">arXiv</a>
+                <a href="https://github.com/facebookresearch/sapiens">GitHub</a>
+                <a href="https://about.meta.com/realitylabs/codecavatars/sapiens/">Meta</a>
+            </div>
+        </div>
+        """
+        def process(image, model_name):
+            return self.image_processor.process_image(image, model_name)
+        with gr.Blocks(theme=theme, css=css) as demo:
+            gr.HTML(header_html)
+            with gr.Row():
+                with gr.Column():
+                    input_image = gr.Image(
+                        label="Input Image",
+                        type="pil",
+                        elem_classes="image-preview",
+                    )
+                    model_name = gr.Dropdown(
+                        label="Model Size",
+                        choices=list(Config.CHECKPOINTS.keys()),
+                        value="1b",
+                    )
+                    gr.Examples(
+                        inputs=input_image,
+                        examples=[
+                            os.path.join(Config.ASSETS_DIR, "images", img)
+                            for img in os.listdir(
+                                os.path.join(Config.ASSETS_DIR, "images")
+                            )
+                        ],
+                        examples_per_page=14,
+                    )
+                with gr.Column():
+                    result_image = gr.Image(
+                        label="Segmentation Result",
+                        type="pil",
+                        elem_classes="image-preview",
+                    )
+                    npy_output = gr.File(label="Segmentation (.npy)")
+                    run_button = gr.Button("Run", variant="primary")
+                    gr.Image(
+                        os.path.join(Config.ASSETS_DIR, "palette.jpg"),
+                        label="Class Palette",
+                        type="filepath",
+                        elem_classes="image-preview",
+                    )
+            run_button.click(
+                fn=process,
+                inputs=[input_image, model_name],
+                outputs=[result_image, npy_output],
+            )
+        return demo
+# =========================================================
+# Entrypoint
+# =========================================================
+def main():
+    if torch.cuda.is_available() and torch.cuda.get_device_properties(0).major >= 8:
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.allow_tf32 = True
+    interface = GradioInterface()
+    demo = interface.create_interface()
+    demo.launch(server_name="0.0.0.0", share=False)
+if __name__ == "__main__":
+    main()

assets/checkpoints/sapiens_0.3b_goliath_best_goliath_mIoU_7673_epoch_194_torchscript.pt2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:735a9a8d63fe8f3f6a4ca3d787de07e69b1f9708ad550e09bb33c9854b7eafbc
+size 1358871599

assets/checkpoints/sapiens_0.6b_goliath_best_goliath_mIoU_7777_epoch_178_torchscript.pt2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86aa2cb9d7310ba1cb1971026889f1d10d80ddf655d6028aea060aae94d82082
+size 2685144079

assets/checkpoints/sapiens_1b_goliath_best_goliath_mIoU_7994_epoch_151_torchscript.pt2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:33bba30f3de8d9cfd44e4eaa4817b1bfdd98c188edfc87fa7cc031ba0f4edc17
+size 4716314057

assets/checkpoints/sapiens_2b_goliath_best_goliath_mIoU_8131_epoch_200_torchscript.pt2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:afe0970265f2af97f9eeb625036f147730d56820d6891803b13a278160c0f98a
+size 8706620345

assets/images/68204.png ADDED Viewed

Git LFS Details

SHA256: 9b0268cb801ed164864a4b5f6d131e0ac5cc2fbd149a6467d5d0c97da47122c2
Pointer size: 132 Bytes
Size of remote file: 4.29 MB

assets/images/68210.png ADDED Viewed

Git LFS Details

SHA256: dbe5f80498af4ebd1ff09ae4184f37c20ba981e53bd554c3cc78d39ae0ee7fd7
Pointer size: 132 Bytes
Size of remote file: 3.93 MB

assets/images/68658.png ADDED Viewed

Git LFS Details

SHA256: 61a68b619bd17235e683324f2826ce0693322e45ab8c86f1c057851ecb333ac7
Pointer size: 132 Bytes
Size of remote file: 5.1 MB

assets/images/68666.png ADDED Viewed

Git LFS Details

SHA256: ea3047e6c2ccb485fdb3966aa2325e803cbf49c27c0bff00287b44bc16f18914
Pointer size: 132 Bytes
Size of remote file: 4.56 MB

assets/images/68691.png ADDED Viewed

Git LFS Details

SHA256: fae39e4055c1b297af7068cdddfeeba8d685363281b839d8c5afac1980204b57
Pointer size: 132 Bytes
Size of remote file: 3.74 MB

assets/images/68956.png ADDED Viewed

Git LFS Details

SHA256: eee1f27082b10999d0fa848121ecb06cda3386b1a864b9aa0f59ae78261f8908
Pointer size: 132 Bytes
Size of remote file: 4.15 MB

assets/images/pexels-amresh444-17315601.png ADDED Viewed

Git LFS Details

SHA256: 4e17ee1b229147e4b52e8348a6ef426bc9e9a2f90738e776e15b26b325abb9b3
Pointer size: 132 Bytes
Size of remote file: 3.5 MB

assets/images/pexels-gabby-k-6311686.png ADDED Viewed

Git LFS Details

SHA256: 3f10eded3fb05ab04b963f7b9fd2e183d8d4e81b20569b1c6b0653549639421f
Pointer size: 132 Bytes
Size of remote file: 3.65 MB

assets/images/pexels-julia-m-cameron-4145040.png ADDED Viewed

Git LFS Details

SHA256: 459cf0280667b028ffbca16aa11188780d7a0205c0defec02916ff3cbaeecb72
Pointer size: 132 Bytes
Size of remote file: 2.92 MB

assets/images/pexels-marcus-aurelius-6787357.png ADDED Viewed

Git LFS Details

SHA256: 7d35452f76492125eaf7d5783aa9fd6b0d5990ebe0579fe9dfd58a9d634f4955
Pointer size: 132 Bytes
Size of remote file: 3.3 MB

assets/images/pexels-mo-saeed-3616599-5409085.png ADDED Viewed

Git LFS Details

SHA256: 7c1ca7afd6c2a654e94ef59d5fb56fca4f3cde5fb5216f6b218c34a7b8c143dc
Pointer size: 132 Bytes
Size of remote file: 3.13 MB

assets/images/pexels-riedelmax-27355495.png ADDED Viewed

Git LFS Details

SHA256: 4141d2f5f718f162ea1f6710c06b28b5cb51fd69598fde35948f8f3491228164
Pointer size: 132 Bytes
Size of remote file: 3.73 MB

assets/images/pexels-sergeymakashin-5368660.png ADDED Viewed

Git LFS Details

SHA256: af8f5a8f26dd102d87d94c1be36ec903791fe8e6d951c68ebb9ebcfc6d7397bb
Pointer size: 132 Bytes
Size of remote file: 4.08 MB

assets/images/pexels-vinicius-wiesehofer-289347-4219918.png ADDED Viewed

Git LFS Details

SHA256: a6eef5eee15b81fe65ea95627e9a46040b9889466689b3c1ca6ed273e02fe84f
Pointer size: 132 Bytes
Size of remote file: 3.63 MB

assets/palette.jpg ADDED Viewed

Git LFS Details

SHA256: b17692ef3956cbc93376b0238e8256b0759544b694d03f612f21219f6d9c3877
Pointer size: 131 Bytes
Size of remote file: 313 kB

classes_and_palettes.py ADDED Viewed

	@@ -0,0 +1,93 @@

+ORIGINAL_GOLIATH_CLASSES = (
+    "Background",
+    "Apparel",
+    "Chair",
+    "Eyeglass_Frame",
+    "Eyeglass_Lenses",
+    "Face_Neck",
+    "Hair",
+    "Headset",
+    "Left_Foot",
+    "Left_Hand",
+    "Left_Lower_Arm",
+    "Left_Lower_Leg",
+    "Left_Shoe",
+    "Left_Sock",
+    "Left_Upper_Arm",
+    "Left_Upper_Leg",
+    "Lower_Clothing",
+    "Lower_Spandex",
+    "Right_Foot",
+    "Right_Hand",
+    "Right_Lower_Arm",
+    "Right_Lower_Leg",
+    "Right_Shoe",
+    "Right_Sock",
+    "Right_Upper_Arm",
+    "Right_Upper_Leg",
+    "Torso",
+    "Upper_Clothing",
+    "Visible_Badge",
+    "Lower_Lip",
+    "Upper_Lip",
+    "Lower_Teeth",
+    "Upper_Teeth",
+    "Tongue",
+)
+ORIGINAL_GOLIATH_PALETTE = [
+    [50, 50, 50],
+    [255, 218, 0],
+    [102, 204, 0],
+    [14, 0, 204],
+    [0, 204, 160],
+    [128, 200, 255],
+    [255, 0, 109],
+    [0, 255, 36],
+    [189, 0, 204],
+    [255, 0, 218],
+    [0, 160, 204],
+    [0, 255, 145],
+    [204, 0, 131],
+    [182, 0, 255],
+    [255, 109, 0],
+    [0, 255, 255],
+    [72, 0, 255],
+    [204, 43, 0],
+    [204, 131, 0],
+    [255, 0, 0],
+    [72, 255, 0],
+    [189, 204, 0],
+    [182, 255, 0],
+    [102, 0, 204],
+    [32, 72, 204],
+    [0, 145, 255],
+    [14, 204, 0],
+    [0, 128, 72],
+    [204, 0, 43],
+    [235, 205, 119],
+    [115, 227, 112],
+    [157, 113, 143],
+    [132, 93, 50],
+    [82, 21, 114],
+]
+## 6 classes to remove
+REMOVE_CLASSES = (
+    "Eyeglass_Frame",
+    "Eyeglass_Lenses",
+    "Visible_Badge",
+    "Chair",
+    "Lower_Spandex",
+    "Headset",
+)
+## 34 - 6 = 28 classes left
+GOLIATH_CLASSES = tuple(
+    [x for x in ORIGINAL_GOLIATH_CLASSES if x not in REMOVE_CLASSES]
+)
+GOLIATH_PALETTE = [
+    ORIGINAL_GOLIATH_PALETTE[idx]
+    for idx in range(len(ORIGINAL_GOLIATH_CLASSES))
+    if ORIGINAL_GOLIATH_CLASSES[idx] not in REMOVE_CLASSES
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio
+numpy
+torch
+torchvision
+matplotlib
+pillow
+spaces
+opencv-python