Spaces:

prithivMLmods
/

SAM3-Demo

Running on Zero

App Files Files Community

prithivMLmods commited on Nov 25, 2025

Commit

d5ea556

verified ·

1 Parent(s): a94e539

Update app.py

Browse files

Files changed (1) hide show

app.py +142 -137

app.py CHANGED Viewed

@@ -8,86 +8,82 @@ import torch
 import cv2
 import tempfile
 import shutil
-import traceback
 from PIL import Image
 from typing import Iterable
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
-from transformers import Sam3Processor, Sam3Model
 # ---------------------------------------------------------
-# 0. HEADLESS RENDERING SETUP
 # ---------------------------------------------------------
-# Essential for running 3D visualizers (pyrender) in cloud environments
-os.environ["PYOPENGL_PLATFORM"] = "egl"
-# ---------------------------------------------------------
-# 1. SETUP & DYNAMIC IMPORTS
-# ---------------------------------------------------------
 REPO_URL = "https://github.com/facebookresearch/sam-3d-body.git"
 REPO_DIR = "sam-3d-body"
-def setup_environment():
-    """Clones repo and sets up Python paths."""
-    print("--- Checking Environment for SAM 3D Body ---")
-    # 1. Clone Repository
     if not os.path.exists(REPO_DIR):
-        print(f"Cloning {REPO_URL}...")
         try:
             subprocess.run(["git", "clone", REPO_URL], check=True)
-            print("Repository cloned.")
-            # Install in editable mode to ensure package discovery works
-            subprocess.run([sys.executable, "-m", "pip", "install", "-e", f"./{REPO_DIR}"], check=True)
-        except Exception as e:
-            print(f"Git clone/Install failed: {e}")
     # 2. Add paths to sys.path
-    repo_abs = os.path.abspath(REPO_DIR)
-    notebook_abs = os.path.abspath(os.path.join(REPO_DIR, "notebook"))
-    if repo_abs not in sys.path:
-        sys.path.insert(0, repo_abs)
-    if notebook_abs not in sys.path:
-        sys.path.insert(0, notebook_abs)
-    print(f"Python Paths: {sys.path[:2]}...")
-setup_environment()
-# Global variables for models and error tracking
-sam3d_estimator = None
-sam3d_visualizer = None
-sam3d_load_error = None
-SAM3D_AVAILABLE = False
 try:
-    # Try importing from the utils file in the cloned repo
-    # This expects 'notebook/utils.py' to exist
-    from utils import (
-        setup_sam_3d_body,
-        setup_visualizer,
-        visualize_2d_results,
-        visualize_3d_mesh,
-        save_mesh_results
-    )
-    print("Loading SAM 3D Body Estimator (this may take time)...")
-    # Initialize the model immediately to catch errors early
-    sam3d_estimator = setup_sam_3d_body(hf_repo_id="facebook/sam-3d-body-dinov3")
-    sam3d_visualizer = setup_visualizer()
-    SAM3D_AVAILABLE = True
-    print("SAM 3D Body Model loaded successfully.")
-except Exception as e:
-    # Capture the exact error (e.g., missing mmhuman3d)
-    sam3d_load_error = f"{type(e).__name__}: {str(e)}\n{traceback.format_exc()}"
-    print(f"CRITICAL ERROR loading SAM 3D Body:\n{sam3d_load_error}")
-    SAM3D_AVAILABLE = False
 # ---------------------------------------------------------
-# 2. THEME DEFINITION
 # ---------------------------------------------------------
 colors.steel_blue = colors.Color(
     name="steel_blue",
@@ -155,31 +151,49 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
 # ---------------------------------------------------------
-# 3. MODEL LOADING (SAM3)
 # ---------------------------------------------------------
-try:
-    print("Loading SAM3 Model...")
-    sam3_model = Sam3Model.from_pretrained("facebook/sam3").to(device)
-    sam3_processor = Sam3Processor.from_pretrained("facebook/sam3")
-    print("SAM3 Model loaded successfully.")
-except Exception as e:
-    print(f"Error loading SAM3 model: {e}")
-    sam3_model = None
-    sam3_processor = None
 # ---------------------------------------------------------
-# 4. INFERENCE FUNCTIONS
 # ---------------------------------------------------------
 @spaces.GPU
 def segment_image(input_image, text_prompt, threshold=0.5):
-    """Function for Tab 1: SAM3 Segmentation"""
     if input_image is None:
         raise gr.Error("Please upload an image.")
     if not text_prompt:
         raise gr.Error("Please enter a text prompt.")
-    if sam3_model is None or sam3_processor is None:
-        raise gr.Error("SAM3 Model not loaded correctly.")
     image_pil = input_image.convert("RGB")
     inputs = sam3_processor(images=image_pil, text=text_prompt, return_tensors="pt").to(device)
@@ -204,62 +218,74 @@ def segment_image(input_image, text_prompt, threshold=0.5):
     return (image_pil, annotations)
 @spaces.GPU
 def process_3d_body(input_image):
-    """Function for Tab 2: SAM 3D Body"""
     if input_image is None:
         raise gr.Error("Please upload an image.")
-    # Check if initialization failed
     if not SAM3D_AVAILABLE or sam3d_estimator is None:
-        # Raise the specific error captured during startup
-        error_msg = sam3d_load_error if sam3d_load_error else "Unknown initialization error."
-        raise gr.Error(f"Model Setup Failed. Logs:\n{error_msg}")
-    # Convert PIL to CV2 BGR
     img_np = np.array(input_image.convert("RGB"))
     img_cv2 = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
-    # Helper requires a physical file path
     with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
         tmp_path = tmp_file.name
         cv2.imwrite(tmp_path, img_cv2)
     try:
-        print(f"Running inference on {tmp_path}...")
         outputs = sam3d_estimator.process_one_image(tmp_path)
         if not outputs:
             return None, None, None, "No people detected."
-        # 1. 2D Vis
         vis_results_2d = visualize_2d_results(img_cv2, outputs, sam3d_visualizer)
-        res_2d_rgb = cv2.cvtColor(vis_results_2d[0], cv2.COLOR_BGR2RGB) if vis_results_2d else img_np
-        # 2. 3D Overlay
         mesh_results_img = visualize_3d_mesh(img_cv2, outputs, sam3d_estimator.faces)
-        res_3d_overlay_rgb = cv2.cvtColor(mesh_results_img[0], cv2.COLOR_BGR2RGB) if mesh_results_img else img_np
-        # 3. Save PLY
         output_dir = tempfile.mkdtemp()
-        image_name = "gradio_mesh_result"
         ply_files = save_mesh_results(img_cv2, outputs, sam3d_estimator.faces, output_dir, image_name)
-        ply_path = ply_files[0] if ply_files else None
-        status = f"Success! Detected {len(outputs)} person(s)."
         return res_2d_rgb, res_3d_overlay_rgb, ply_path, status
     except Exception as e:
         traceback.print_exc()
-        raise gr.Error(f"Inference Runtime Error: {str(e)}")
     finally:
         if os.path.exists(tmp_path):
             os.remove(tmp_path)
 # ---------------------------------------------------------
-# 5. GRADIO UI LAYOUT
 # ---------------------------------------------------------
 css = """
@@ -275,73 +301,52 @@ with gr.Blocks(css=css, theme=steel_blue_theme) as demo:
         gr.Markdown("# **SAM Integrated Vision Suite**", elem_id="main-title")
         with gr.Tabs():
-            # ================= TAB 1: SEGMENTATION =================
             with gr.Tab("SAM3 Segmentation"):
                 gr.Markdown("Segment objects using **SAM3** with text prompts.")
                 with gr.Row():
                     with gr.Column(scale=1):
-                        t1_input_image = gr.Image(label="Input Image", type="pil", height=350)
-                        t1_text_prompt = gr.Textbox(label="Text Prompt", placeholder="e.g., cat, ear, car wheel...")
-                        t1_threshold = gr.Slider(label="Confidence Threshold", minimum=0.0, maximum=1.0, value=0.4, step=0.05)
-                        t1_run_btn = gr.Button("Segment Image", variant="primary")
                     with gr.Column(scale=1.5):
-                        t1_output_image = gr.AnnotatedImage(label="Segmented Output", height=450)
-                t1_run_btn.click(
-                    fn=segment_image,
-                    inputs=[t1_input_image, t1_text_prompt, t1_threshold],
-                    outputs=[t1_output_image]
-                )
-            # ================= TAB 2: 3D BODY =================
             with gr.Tab("SAM 3D Body"):
                 gr.Markdown("Detect human bodies and reconstruct **3D Meshes**.")
                 with gr.Row():
                     with gr.Column(scale=1):
-                        t2_input_image = gr.Image(label="Input Image", type="pil", height=350)
-                        t2_run_btn = gr.Button("Generate 3D Body", variant="primary")
                         t2_status = gr.Textbox(label="Status", interactive=False)
-                        # Warning box if initialization failed
-                        if not SAM3D_AVAILABLE:
-                            gr.Markdown(
-                                "⚠️ **Warning: SAM 3D Body failed to load.**\n"
-                                f"Error: {sam3d_load_error}\n"
-                                "Please check `mmhuman3d` and `mmcv` dependencies.",
-                                elem_classes=["error-box"]
-                            )
                     with gr.Column(scale=2):
                         with gr.Row():
-                            t2_output_2d = gr.Image(label="2D Keypoints", type="numpy")
-                            t2_output_overlay = gr.Image(label="Mesh Overlay", type="numpy")
-                        t2_output_3d = gr.Model3D(
-                            label="Interactive 3D Mesh (PLY)",
                             clear_color=[0.0, 0.0, 0.0, 0.0],
-                            camera_position=[0, 0, 3]
                         )
-                t2_run_btn.click(
-                    fn=process_3d_body,
-                    inputs=[t2_input_image],
-                    outputs=[t2_output_2d, t2_output_overlay, t2_output_3d, t2_status]
                 )
-                # Dynamic examples
-                ex_files = []
-                if os.path.exists("examples/player.jpg"): ex_files.append(["examples/player.jpg"])
-                if os.path.exists("examples/dancing.jpg"): ex_files.append(["examples/dancing.jpg"])
-                if ex_files:
-                    gr.Examples(
-                        examples=ex_files,
-                        inputs=[t2_input_image],
-                        label="3D Body Examples"
-                    )
 if __name__ == "__main__":
     demo.launch(mcp_server=True, ssr_mode=False, show_error=True)

 import cv2
 import tempfile
 import shutil
 from PIL import Image
 from typing import Iterable
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
 # ---------------------------------------------------------
+# 1. ENVIRONMENT SETUP & REPO CLONING
 # ---------------------------------------------------------
+# Define the repository path
 REPO_URL = "https://github.com/facebookresearch/sam-3d-body.git"
 REPO_DIR = "sam-3d-body"
+def setup_sam_3d_env():
+    """Clones the repo and sets up paths."""
+    # 1. Clone if not exists
     if not os.path.exists(REPO_DIR):
+        print(f"Cloning SAM 3D Body repository from {REPO_URL}...")
         try:
             subprocess.run(["git", "clone", REPO_URL], check=True)
+            # Install the package in editable mode to handle internal imports
+            print("Installing sam-3d-body package...")
+            subprocess.run([sys.executable, "-m", "pip", "install", "-e", REPO_DIR], check=True)
+        except subprocess.CalledProcessError as e:
+            print(f"Error during setup: {e}")
+            return False
     # 2. Add paths to sys.path
+    repo_abs_path = os.path.abspath(REPO_DIR)
+    notebook_path = os.path.join(repo_abs_path, "notebook")
+    # Add repo root (for sam_3d_body package)
+    if repo_abs_path not in sys.path:
+        sys.path.insert(0, repo_abs_path)
+    # Add notebook folder (for utils.py)
+    if notebook_path not in sys.path:
+        sys.path.insert(0, notebook_path)
+    return True
+# Run setup
+env_ready = setup_sam_3d_env()
+# ---------------------------------------------------------
+# 2. IMPORTS
+# ---------------------------------------------------------
+# Import SAM3 (Transformers)
 try:
+    from transformers import Sam3Processor, Sam3Model
+    SAM3_AVAILABLE = True
+except ImportError:
+    print("Warning: transformers library not found or outdated. SAM3 will be disabled.")
+    SAM3_AVAILABLE = False
+# Import SAM 3D Body Utils
+SAM3D_AVAILABLE = False
+if env_ready:
+    try:
+        # Import specific functions from the notebook/utils.py
+        # Note: We rely on the path insertion above to find 'utils'
+        from utils import (
+            setup_sam_3d_body,
+            setup_visualizer,
+            visualize_2d_results,
+            visualize_3d_mesh,
+            save_mesh_results
+        )
+        SAM3D_AVAILABLE = True
+        print("SAM 3D Body utils imported successfully.")
+    except ImportError as e:
+        print(f"Error importing SAM 3D Body utils: {e}")
+        print("Ensure requirements are installed (pytorch3d, opencv, etc.)")
 # ---------------------------------------------------------
+# 3. THEME DEFINITION
 # ---------------------------------------------------------
 colors.steel_blue = colors.Color(
     name="steel_blue",
 print(f"Using device: {device}")
 # ---------------------------------------------------------
+# 4. LOAD MODELS
 # ---------------------------------------------------------
+# --- Load SAM3 ---
+sam3_model = None
+sam3_processor = None
+if SAM3_AVAILABLE:
+    try:
+        print("Loading SAM3 Model...")
+        sam3_model = Sam3Model.from_pretrained("facebook/sam3").to(device)
+        sam3_processor = Sam3Processor.from_pretrained("facebook/sam3")
+        print("SAM3 Loaded.")
+    except Exception as e:
+        print(f"Error loading SAM3: {e}")
+# --- Load SAM 3D Body ---
+sam3d_estimator = None
+sam3d_visualizer = None
+if SAM3D_AVAILABLE:
+    try:
+        print("Loading SAM 3D Body Estimator...")
+        # Note: This might require huggingface_hub login if the repo is gated,
+        # but facebook/sam-3d-body-dinov3 is usually public.
+        sam3d_estimator = setup_sam_3d_body(hf_repo_id="facebook/sam-3d-body-dinov3")
+        sam3d_visualizer = setup_visualizer()
+        print("SAM 3D Body Loaded.")
+    except Exception as e:
+        print(f"Error loading SAM 3D Body model: {e}")
+        SAM3D_AVAILABLE = False
 # ---------------------------------------------------------
+# 5. INFERENCE FUNCTIONS
 # ---------------------------------------------------------
 @spaces.GPU
 def segment_image(input_image, text_prompt, threshold=0.5):
     if input_image is None:
         raise gr.Error("Please upload an image.")
     if not text_prompt:
         raise gr.Error("Please enter a text prompt.")
+    if sam3_model is None:
+        raise gr.Error("SAM3 Model is not loaded.")
     image_pil = input_image.convert("RGB")
     inputs = sam3_processor(images=image_pil, text=text_prompt, return_tensors="pt").to(device)
     return (image_pil, annotations)
 @spaces.GPU
 def process_3d_body(input_image):
     if input_image is None:
         raise gr.Error("Please upload an image.")
     if not SAM3D_AVAILABLE or sam3d_estimator is None:
+        raise gr.Error("SAM 3D Body libraries or model not available (Check logs for import errors).")
+    # Prepare Image
     img_np = np.array(input_image.convert("RGB"))
     img_cv2 = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
+    # The utils/estimator usually requires a file path
     with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmp_file:
         tmp_path = tmp_file.name
         cv2.imwrite(tmp_path, img_cv2)
     try:
+        # Run Inference
+        print(f"Processing 3D Body for {tmp_path}...")
         outputs = sam3d_estimator.process_one_image(tmp_path)
         if not outputs:
             return None, None, None, "No people detected."
+        # 1. 2D Visuals
         vis_results_2d = visualize_2d_results(img_cv2, outputs, sam3d_visualizer)
+        # Handle case if visualize_2d_results returns list of images (one per person)
+        if isinstance(vis_results_2d, list) and len(vis_results_2d) > 0:
+            # Just take the first one or combine them?
+            # Usually it returns cropped visuals. Let's assume list of images.
+            res_2d_rgb = cv2.cvtColor(vis_results_2d[0], cv2.COLOR_BGR2RGB)
+        else:
+            res_2d_rgb = img_np
+        # 2. 3D Overlay Visuals
         mesh_results_img = visualize_3d_mesh(img_cv2, outputs, sam3d_estimator.faces)
+        if isinstance(mesh_results_img, list) and len(mesh_results_img) > 0:
+            res_3d_overlay_rgb = cv2.cvtColor(mesh_results_img[0], cv2.COLOR_BGR2RGB)
+        else:
+            res_3d_overlay_rgb = img_np
+        # 3. Save PLY for Model3D
         output_dir = tempfile.mkdtemp()
+        image_name = "gradio_mesh"
+        # save_mesh_results returns list of paths to .ply files
         ply_files = save_mesh_results(img_cv2, outputs, sam3d_estimator.faces, output_dir, image_name)
+        ply_path = None
+        if ply_files and len(ply_files) > 0:
+            ply_path = ply_files[0] # Return the first mesh found
+        status = f"Detected {len(outputs)} person(s). Showing result for Person 0."
         return res_2d_rgb, res_3d_overlay_rgb, ply_path, status
     except Exception as e:
+        import traceback
         traceback.print_exc()
+        raise gr.Error(f"Inference failed: {e}")
     finally:
         if os.path.exists(tmp_path):
             os.remove(tmp_path)
 # ---------------------------------------------------------
+# 6. GUI
 # ---------------------------------------------------------
 css = """
         gr.Markdown("# **SAM Integrated Vision Suite**", elem_id="main-title")
         with gr.Tabs():
+            # TAB 1: SEGMENTATION
             with gr.Tab("SAM3 Segmentation"):
                 gr.Markdown("Segment objects using **SAM3** with text prompts.")
                 with gr.Row():
                     with gr.Column(scale=1):
+                        t1_input = gr.Image(label="Input Image", type="pil", height=350)
+                        t1_prompt = gr.Textbox(label="Text Prompt", placeholder="e.g., cat, face...")
+                        t1_thresh = gr.Slider(0.0, 1.0, 0.4, step=0.05, label="Threshold")
+                        t1_btn = gr.Button("Segment", variant="primary")
                     with gr.Column(scale=1.5):
+                        t1_output = gr.AnnotatedImage(label="Segmented Output", height=450)
+                t1_btn.click(segment_image, [t1_input, t1_prompt, t1_thresh], [t1_output])
+            # TAB 2: 3D BODY
             with gr.Tab("SAM 3D Body"):
                 gr.Markdown("Detect human bodies and reconstruct **3D Meshes**.")
                 with gr.Row():
                     with gr.Column(scale=1):
+                        t2_input = gr.Image(label="Input Image", type="pil", height=350)
+                        t2_btn = gr.Button("Generate 3D Body", variant="primary")
                         t2_status = gr.Textbox(label="Status", interactive=False)
                     with gr.Column(scale=2):
                         with gr.Row():
+                            t2_vis_2d = gr.Image(label="2D Detection", type="numpy")
+                            t2_vis_overlay = gr.Image(label="Mesh Overlay", type="numpy")
+                        t2_model_3d = gr.Model3D(
+                            label="Interactive 3D Mesh",
                             clear_color=[0.0, 0.0, 0.0, 0.0],
+                            camera_position=[0, 0, 2.5]
                         )
+                t2_btn.click(
+                    process_3d_body,
+                    inputs=[t2_input],
+                    outputs=[t2_vis_2d, t2_vis_overlay, t2_model_3d, t2_status]
                 )
+                gr.Examples(
+                    examples=[["examples/player.jpg"], ["examples/dancing.jpg"]],
+                    inputs=[t2_input],
+                    label="3D Body Examples"
+                )
 if __name__ == "__main__":
     demo.launch(mcp_server=True, ssr_mode=False, show_error=True)