Spaces:

lakshmi082024
/

sam_object

Runtime error

App Files Files Community

lakshmi082024 commited on May 10, 2025

Commit

b847bc7

verified ·

1 Parent(s): b10122d

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -86

app.py CHANGED Viewed

@@ -1,103 +1,51 @@
-import gradio as gr
 import torch
-import numpy as np
 import cv2
-from PIL import Image
-import pandas as pd
-from torchvision.transforms import Compose, Resize, ToTensor, Normalize
-from segment_anything import SamPredictor, sam_model_registry
-import os
-# Load SAM and MiDaS models
 def load_models():
     sam_checkpoint = "sam_vit_b_01ec64.pth"
-    model_type = "vit_b"  # <-- Must match checkpoint
     sam = sam_model_registry[model_type](checkpoint=sam_checkpoint).to(device)
-    if not os.path.exists(sam_checkpoint):
-        raise FileNotFoundError("Please upload the SAM checkpoint file to the working directory.")
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    sam = sam_model_registry["vit_h"](checkpoint=sam_checkpoint).to(device)
     predictor = SamPredictor(sam)
-    midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large")
-    midas.eval().to(device)
-    midas_transform = Compose([
-        Resize(384),
-        ToTensor(),
-        Normalize(mean=[0.5]*3, std=[0.5]*3)
-    ])
-    return predictor, midas, midas_transform
 predictor, midas_model, midas_transform = load_models()
-# Processing function
-def process_image(image_pil):
-    image_np = np.array(image_pil)
-    img_h, img_w = image_np.shape[:2]
-    # Real-world reference dimensions (adjust as needed)
-    real_image_width_cm = 100
-    real_image_height_cm = 75
-    assumed_max_depth_cm = 100
-    pixel_to_cm_x = real_image_width_cm / img_w
-    pixel_to_cm_y = real_image_height_cm / img_h
-    # SAM segmentation
-    predictor.set_image(image_np)
-    masks, _, _ = predictor.predict(multimask_output=False)
-    # MiDaS depth estimation
-    input_tensor = midas_transform(image_pil).unsqueeze(0).to(next(midas_model.parameters()).device)
     with torch.no_grad():
-        depth_prediction = midas_model(input_tensor).squeeze().cpu().numpy()
-    depth_resized = cv2.resize(depth_prediction, (img_w, img_h))
-    # Object volume computation
-    volume_data = []
-    for i, mask in enumerate(masks):
-        x, y, w, h = cv2.boundingRect(mask.astype(np.uint8))
-        width_px = w
-        height_px = h
-        width_cm = width_px * pixel_to_cm_x
-        height_cm = height_px * pixel_to_cm_y
-        depth_masked = depth_resized[mask > 0.5]
-        if depth_masked.size == 0:
-            continue
-        normalized_depth = (depth_masked - np.min(depth_resized)) / (np.max(depth_resized) - np.min(depth_resized) + 1e-6)
-        depth_cm = np.mean(normalized_depth) * assumed_max_depth_cm
-        volume_cm3 = round(depth_cm * width_cm * height_cm, 2)
-        volume_data.append([
-            f"Object #{i+1}",
-            round(depth_cm, 2),
-            round(width_cm, 2),
-            round(height_cm, 2),
-            volume_cm3
-        ])
-    if not volume_data:
-        return image_pil, "No objects segmented."
-    df = pd.DataFrame(volume_data, columns=["Object", "Length (Depth) cm", "Breadth (Width) cm", "Height cm", "Volume cm³"])
-    return image_pil, df
-# Gradio Interface
-with gr.Blocks() as demo:
-    gr.Markdown("# 📦 Volume Estimation using SAM + MiDaS")
-    with gr.Row():
-        image_input = gr.Image(type="pil", label="Upload Image")
-        run_btn = gr.Button("Estimate Volume")
-    with gr.Row():
-        output_image = gr.Image(label="Original Image")
-        volume_table = gr.Dataframe(headers=["Object", "Length (Depth) cm", "Breadth (Width) cm", "Height cm", "Volume cm³"])
-    run_btn.click(fn=process_image, inputs=image_input, outputs=[output_image, volume_table])
-demo.launch()

+import streamlit as st
 import torch
 import cv2
+import numpy as np
+from segment_anything import sam_model_registry, SamPredictor
+@st.cache_resource
 def load_models():
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    # Load SAM (vit_b)
     sam_checkpoint = "sam_vit_b_01ec64.pth"
+    model_type = "vit_b"
     sam = sam_model_registry[model_type](checkpoint=sam_checkpoint).to(device)
     predictor = SamPredictor(sam)
+    # Load MiDaS
+    midas = torch.hub.load("intel-isl/MiDaS", "DPT_Large").to(device)
+    midas.eval()
+    midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
+    transform = midas_transforms.dpt_transform
+    return predictor, midas, transform
 predictor, midas_model, midas_transform = load_models()
+st.title("SAM + MiDaS Depth App")
+uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
+if uploaded_file:
+    image = cv2.imdecode(np.frombuffer(uploaded_file.read(), np.uint8), 1)
+    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    st.image(image_rgb, caption="Original Image", use_column_width=True)
+    # Ask for click input
+    st.write("Click a point for segmentation")
+    coords = st.image(image_rgb, use_column_width=True)
+    # For now, run depth estimation directly
+    input_tensor = midas_transform(image_rgb).to("cuda" if torch.cuda.is_available() else "cpu")
     with torch.no_grad():
+        depth = midas_model(input_tensor.unsqueeze(0))
+        depth = torch.nn.functional.interpolate(
+            depth.unsqueeze(1),
+            size=image_rgb.shape[:2],
+            mode="bicubic",
+            align_corners=False,
+        ).squeeze().cpu().numpy()
+    st.image(depth, caption="Estimated Depth", use_column_width=True, clamp=True)