Spaces:

enoky
/

2D-to-Stereo-3D

Running

App Files Files Community

enoky commited on 14 days ago

Commit

f98a0fe

verified ·

1 Parent(s): 7c6c979

Add Divergence (3D Strength) and Convergence (Focus Point) sliders

Browse files

Files changed (1) hide show

app.py +46 -35

app.py CHANGED Viewed

@@ -42,13 +42,10 @@ def estimate_depth(image_pil, model, processor):
         return (depth - depth_min) / (depth_max - depth_min)
     return depth
-def depth_to_disparity(depth, max_disp=30):
-    # Invert depth: close objects (bright) shift more
-    return depth * max_disp
-def generate_right_and_mask(image, disparity):
     """
-    Vectorized shift operation. 100x faster than for-loops.
     """
     height, width = image.shape[:2]
@@ -56,16 +53,14 @@ def generate_right_and_mask(image, disparity):
     x_coords, y_coords = np.meshgrid(np.arange(width), np.arange(height))
     # Calculate target coordinates (shift pixels to the left for right eye)
-    # Note: Disparity logic depends on convergence plane.
-    # Usually: Right Eye View = Original - Disparity
-    shift = disparity.astype(int)
     target_x = x_coords - shift
     # Initialize output and mask
     right = np.zeros_like(image)
     mask = np.ones((height, width), dtype=np.uint8) * 255 # 255 = hole/inpainting area
-    # Valid indices mask
     valid_mask = (target_x >= 0) & (target_x < width)
     # Flatten arrays for advanced indexing
@@ -74,9 +69,7 @@ def generate_right_and_mask(image, disparity):
     flat_x_source = x_coords[valid_mask]
     # Assign pixels
-    # Note: In case of collision (two pixels mapping to same spot),
-    # this simple method overwrites. For better results, Z-buffering is needed,
-    # but this is sufficient for basic stereo.
     right[flat_y, flat_x_target] = image[flat_y, flat_x_source]
     # Update Mask: Areas that were written to are NOT holes (0)
@@ -108,12 +101,8 @@ def make_anaglyph(left, right):
 # === LAMA INPAINTING (Via Gradio Client) ===
 # Note: You need a valid Space that accepts image + mask.
-# Using a popular LaMa space as reference.
 try:
-    # Attempt to connect to a public LaMa space
-    # You can change this string to "frxngb23/lama-inpainting-api" if that space
-    # supports the API client, otherwise use "any-other-lama-space"
-    lama_client = Client("frxngb23/lama-inpainting-api")
 except Exception as e:
     print(f"Could not connect to external LaMa client: {e}")
     lama_client = None
@@ -135,12 +124,10 @@ def run_lama_inpainting(image_bgr, mask):
         try:
             # Predict using the external space
-            # Note: The api_name="/predict" or parameters might vary per Space.
-            # You must check the "View API" button at the bottom of the target Space.
             result_path = lama_client.predict(
                 image=handle_file(f_img.name),
                 mask=handle_file(f_mask.name),
-                api_name="/inpaint"
             )
             # Result is a filepath
@@ -158,20 +145,25 @@ def run_lama_inpainting(image_bgr, mask):
 # === APP LOGIC ===
 depth_model, depth_processor = load_depth_model()
-def stereo_pipeline(image_pil):
     if image_pil is None:
-        return None
     image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
-    # 1. Estimate Depth
     depth = estimate_depth(image_pil, depth_model, depth_processor)
-    # 2. Calculate Disparity
-    disparity = depth_to_disparity(depth)
     # 3. Shift Pixels
-    right_img, mask = generate_right_and_mask(image_cv, disparity)
     # 4. Inpaint Holes
     # Pass the mask where 255 indicates holes to be filled
@@ -197,17 +189,36 @@ with gr.Blocks(title="2D to 3D Stereo") as demo:
     gr.Markdown("Generates a side-by-side stereo pair and anaglyph using Depth Estimation and LaMa Inpainting.")
     with gr.Row():
-        input_img = gr.Image(type="pil", label="Input Image")
     with gr.Row():
-        # Changed to a single output image
-        out_stereo = gr.Image(label="Side-by-Side Stereo Pair")
-        out_anaglyph = gr.Image(label="Anaglyph (Red/Cyan)")
-    btn = gr.Button("Generate 3D")
-    # Updated outputs to single component
-    btn.click(fn=stereo_pipeline, inputs=input_img, outputs=[out_stereo, out_anaglyph])
 if __name__ == "__main__":
     demo.launch()

         return (depth - depth_min) / (depth_max - depth_min)
     return depth
+def generate_right_and_mask(image, shift_map):
     """
+    Vectorized shift operation.
+    shift_map: 2D array indicating how many pixels to shift left (positive) or right (negative).
     """
     height, width = image.shape[:2]
     x_coords, y_coords = np.meshgrid(np.arange(width), np.arange(height))
     # Calculate target coordinates (shift pixels to the left for right eye)
+    shift = shift_map.astype(int)
     target_x = x_coords - shift
     # Initialize output and mask
     right = np.zeros_like(image)
     mask = np.ones((height, width), dtype=np.uint8) * 255 # 255 = hole/inpainting area
+    # Valid indices mask (ensure pixels land within image bounds)
     valid_mask = (target_x >= 0) & (target_x < width)
     # Flatten arrays for advanced indexing
     flat_x_source = x_coords[valid_mask]
     # Assign pixels
+    # Note: simple overwriting handles occlusions naively but effectively for this use case
     right[flat_y, flat_x_target] = image[flat_y, flat_x_source]
     # Update Mask: Areas that were written to are NOT holes (0)
 # === LAMA INPAINTING (Via Gradio Client) ===
 # Note: You need a valid Space that accepts image + mask.
 try:
+    lama_client = Client("asif-k/LaMa-Inpainting")
 except Exception as e:
     print(f"Could not connect to external LaMa client: {e}")
     lama_client = None
         try:
             # Predict using the external space
             result_path = lama_client.predict(
                 image=handle_file(f_img.name),
                 mask=handle_file(f_mask.name),
+                api_name="/predict"
             )
             # Result is a filepath
 # === APP LOGIC ===
 depth_model, depth_processor = load_depth_model()
+def stereo_pipeline(image_pil, divergence, convergence):
     if image_pil is None:
+        return None, None
     image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
+    # 1. Estimate Depth (0.0 far to 1.0 near)
     depth = estimate_depth(image_pil, depth_model, depth_processor)
+    # 2. Calculate Shift Map
+    # Divergence: Overall separation strength (pixels)
+    # Convergence: The depth plane that stays still (0.0 - 1.0)
+    # Result:
+    #   Positive shift (Leftwards) = Pop out of screen (Near objects)
+    #   Negative shift (Rightwards) = Go into screen (Far objects)
+    shift = (depth - convergence) * divergence
     # 3. Shift Pixels
+    right_img, mask = generate_right_and_mask(image_cv, shift)
     # 4. Inpaint Holes
     # Pass the mask where 255 indicates holes to be filled
     gr.Markdown("Generates a side-by-side stereo pair and anaglyph using Depth Estimation and LaMa Inpainting.")
     with gr.Row():
+        with gr.Column(scale=1):
+            input_img = gr.Image(type="pil", label="Input Image", height=480)
+            # === Controls ===
+            with gr.Group():
+                gr.Markdown("### 3D Controls")
+                divergence_slider = gr.Slider(
+                    minimum=0, maximum=100, value=30, step=1,
+                    label="3D Strength (Divergence)",
+                    info="Max pixel separation. Higher = Deeper 3D effect."
+                )
+                convergence_slider = gr.Slider(
+                    minimum=0.0, maximum=1.0, value=0.1, step=0.05,
+                    label="Focus Plane (Convergence)",
+                    info="0.0 = Background at screen depth. 0.5 = Mid-range at screen. 1.0 = Foreground at screen."
+                )
+            btn = gr.Button("Generate 3D", variant="primary")
+        with gr.Column(scale=1):
+            out_anaglyph = gr.Image(label="Anaglyph (Red/Cyan)", height=480)
     with gr.Row():
+        out_stereo = gr.Image(label="Side-by-Side Stereo Pair", height=400)
+    btn.click(
+        fn=stereo_pipeline,
+        inputs=[input_img, divergence_slider, convergence_slider],
+        outputs=[out_stereo, out_anaglyph]
+    )
 if __name__ == "__main__":
     demo.launch()