Spaces:

shortcipher
/

2dto3d

Runtime error

App Files Files Community

shortcipher commited on Mar 22, 2025

Commit

a772610

verified ·

1 Parent(s): 1da259a

Create app.py

Browse files

Files changed (1) hide show

app.py +105 -0

app.py ADDED Viewed

	@@ -0,0 +1,105 @@

+# Hugging Face Space: 2D to 3D Stereo Pair Generator using Depth + LaMa Inpainting
+import gradio as gr
+import torch
+import numpy as np
+import cv2
+from PIL import Image
+from transformers import DPTForDepthEstimation, DPTFeatureExtractor
+import requests
+import tempfile
+import subprocess
+import os
+# === DEVICE ===
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# === DEPTH MODEL ===
+def load_depth_model():
+    model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device)
+    processor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas")
+    return model, processor
+@torch.no_grad()
+def estimate_depth(image: Image.Image, model, processor):
+    image = image.resize((384, 384))
+    inputs = processor(images=image, return_tensors="pt").to(device)
+    depth = model(**inputs).predicted_depth
+    depth = torch.nn.functional.interpolate(
+        depth.unsqueeze(1),
+        size=image.size[::-1],
+        mode="bicubic",
+        align_corners=False,
+    ).squeeze().detach().cpu().numpy()
+    depth_min, depth_max = depth.min(), depth.max()
+    return (depth - depth_min) / (depth_max - depth_min)
+def depth_to_disparity(depth, max_disp=32):
+    return (1.0 - depth) * max_disp
+def generate_right_and_mask(image, disparity):
+    h, w = image.shape[:2]
+    right = np.zeros_like(image)
+    mask = np.ones((h, w), dtype=np.uint8)
+    for y in range(h):
+        for x in range(w):
+            d = int(round(disparity[y, x]))
+            x_r = x - d
+            if 0 <= x_r < w:
+                right[y, x_r] = image[y, x]
+                mask[y, x_r] = 0
+    return right, mask
+# === LAMA INPAINTING ===
+LAMA_API = "https://huggingface.co/spaces/saic-mdal/lama-inpainting"
+def run_lama_inpainting(image_bgr, mask):
+    img = Image.fromarray(cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB))
+    mask_img = Image.fromarray(mask * 255).convert("RGB")
+    # Save temporarily
+    tmp_dir = tempfile.mkdtemp()
+    img_path = os.path.join(tmp_dir, "input.png")
+    mask_path = os.path.join(tmp_dir, "mask.png")
+    img.save(img_path)
+    mask_img.save(mask_path)
+    # Use Hugging Face's API-compatible request
+    files = {"image": open(img_path, "rb"), "mask": open(mask_path, "rb")}
+    response = requests.post(f"{LAMA_API}/run/predict", files=files)
+    if response.status_code == 200:
+        result = Image.open(requests.get(response.json()["data"][0]["name"], stream=True).raw)
+        return cv2.cvtColor(np.array(result), cv2.COLOR_RGB2BGR)
+    else:
+        raise Exception("LAMA inpainting failed")
+# === APP LOGIC ===
+depth_model, depth_processor = load_depth_model()
+def stereo_pipeline(image_pil):
+    image = image_pil.convert("RGB")
+    image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    depth = estimate_depth(image, depth_model, depth_processor)
+    disparity = depth_to_disparity(depth)
+    right_img, mask = generate_right_and_mask(image_cv, disparity)
+    right_filled = run_lama_inpainting(right_img, mask)
+    left = image_pil
+    right = Image.fromarray(cv2.cvtColor(right_filled, cv2.COLOR_BGR2RGB))
+    return left, right
+# === GRADIO UI ===
+demo = gr.Interface(
+    fn=stereo_pipeline,
+    inputs=gr.Image(type="pil", label="Upload 2D Image"),
+    outputs=[
+        gr.Image(label="Left Eye (Original)"),
+        gr.Image(label="Right Eye (AI Generated)")
+    ],
+    title="2D to 3D Stereo Generator with LaMa Inpainting",
+    description="Generates a stereo pair from a 2D image using depth estimation and LaMa AI inpainting to handle occluded pixels in the right-eye view."
+)
+demo.launch()