Spaces:
Running
Running
Add Divergence (3D Strength) and Convergence (Focus Point) sliders
Browse files
app.py
CHANGED
|
@@ -42,13 +42,10 @@ def estimate_depth(image_pil, model, processor):
|
|
| 42 |
return (depth - depth_min) / (depth_max - depth_min)
|
| 43 |
return depth
|
| 44 |
|
| 45 |
-
def
|
| 46 |
-
# Invert depth: close objects (bright) shift more
|
| 47 |
-
return depth * max_disp
|
| 48 |
-
|
| 49 |
-
def generate_right_and_mask(image, disparity):
|
| 50 |
"""
|
| 51 |
-
Vectorized shift operation.
|
|
|
|
| 52 |
"""
|
| 53 |
height, width = image.shape[:2]
|
| 54 |
|
|
@@ -56,16 +53,14 @@ def generate_right_and_mask(image, disparity):
|
|
| 56 |
x_coords, y_coords = np.meshgrid(np.arange(width), np.arange(height))
|
| 57 |
|
| 58 |
# Calculate target coordinates (shift pixels to the left for right eye)
|
| 59 |
-
|
| 60 |
-
# Usually: Right Eye View = Original - Disparity
|
| 61 |
-
shift = disparity.astype(int)
|
| 62 |
target_x = x_coords - shift
|
| 63 |
|
| 64 |
# Initialize output and mask
|
| 65 |
right = np.zeros_like(image)
|
| 66 |
mask = np.ones((height, width), dtype=np.uint8) * 255 # 255 = hole/inpainting area
|
| 67 |
|
| 68 |
-
# Valid indices mask
|
| 69 |
valid_mask = (target_x >= 0) & (target_x < width)
|
| 70 |
|
| 71 |
# Flatten arrays for advanced indexing
|
|
@@ -74,9 +69,7 @@ def generate_right_and_mask(image, disparity):
|
|
| 74 |
flat_x_source = x_coords[valid_mask]
|
| 75 |
|
| 76 |
# Assign pixels
|
| 77 |
-
# Note:
|
| 78 |
-
# this simple method overwrites. For better results, Z-buffering is needed,
|
| 79 |
-
# but this is sufficient for basic stereo.
|
| 80 |
right[flat_y, flat_x_target] = image[flat_y, flat_x_source]
|
| 81 |
|
| 82 |
# Update Mask: Areas that were written to are NOT holes (0)
|
|
@@ -108,12 +101,8 @@ def make_anaglyph(left, right):
|
|
| 108 |
|
| 109 |
# === LAMA INPAINTING (Via Gradio Client) ===
|
| 110 |
# Note: You need a valid Space that accepts image + mask.
|
| 111 |
-
# Using a popular LaMa space as reference.
|
| 112 |
try:
|
| 113 |
-
|
| 114 |
-
# You can change this string to "frxngb23/lama-inpainting-api" if that space
|
| 115 |
-
# supports the API client, otherwise use "any-other-lama-space"
|
| 116 |
-
lama_client = Client("frxngb23/lama-inpainting-api")
|
| 117 |
except Exception as e:
|
| 118 |
print(f"Could not connect to external LaMa client: {e}")
|
| 119 |
lama_client = None
|
|
@@ -135,12 +124,10 @@ def run_lama_inpainting(image_bgr, mask):
|
|
| 135 |
|
| 136 |
try:
|
| 137 |
# Predict using the external space
|
| 138 |
-
# Note: The api_name="/predict" or parameters might vary per Space.
|
| 139 |
-
# You must check the "View API" button at the bottom of the target Space.
|
| 140 |
result_path = lama_client.predict(
|
| 141 |
image=handle_file(f_img.name),
|
| 142 |
mask=handle_file(f_mask.name),
|
| 143 |
-
api_name="/
|
| 144 |
)
|
| 145 |
|
| 146 |
# Result is a filepath
|
|
@@ -158,20 +145,25 @@ def run_lama_inpainting(image_bgr, mask):
|
|
| 158 |
# === APP LOGIC ===
|
| 159 |
depth_model, depth_processor = load_depth_model()
|
| 160 |
|
| 161 |
-
def stereo_pipeline(image_pil):
|
| 162 |
if image_pil is None:
|
| 163 |
-
return None
|
| 164 |
|
| 165 |
image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
|
| 166 |
|
| 167 |
-
# 1. Estimate Depth
|
| 168 |
depth = estimate_depth(image_pil, depth_model, depth_processor)
|
| 169 |
|
| 170 |
-
# 2. Calculate
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
# 3. Shift Pixels
|
| 174 |
-
right_img, mask = generate_right_and_mask(image_cv,
|
| 175 |
|
| 176 |
# 4. Inpaint Holes
|
| 177 |
# Pass the mask where 255 indicates holes to be filled
|
|
@@ -197,17 +189,36 @@ with gr.Blocks(title="2D to 3D Stereo") as demo:
|
|
| 197 |
gr.Markdown("Generates a side-by-side stereo pair and anaglyph using Depth Estimation and LaMa Inpainting.")
|
| 198 |
|
| 199 |
with gr.Row():
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
with gr.Row():
|
| 203 |
-
|
| 204 |
-
out_stereo = gr.Image(label="Side-by-Side Stereo Pair")
|
| 205 |
-
out_anaglyph = gr.Image(label="Anaglyph (Red/Cyan)")
|
| 206 |
|
| 207 |
-
btn
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
|
|
|
| 211 |
|
| 212 |
if __name__ == "__main__":
|
| 213 |
demo.launch()
|
|
|
|
| 42 |
return (depth - depth_min) / (depth_max - depth_min)
|
| 43 |
return depth
|
| 44 |
|
| 45 |
+
def generate_right_and_mask(image, shift_map):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
"""
|
| 47 |
+
Vectorized shift operation.
|
| 48 |
+
shift_map: 2D array indicating how many pixels to shift left (positive) or right (negative).
|
| 49 |
"""
|
| 50 |
height, width = image.shape[:2]
|
| 51 |
|
|
|
|
| 53 |
x_coords, y_coords = np.meshgrid(np.arange(width), np.arange(height))
|
| 54 |
|
| 55 |
# Calculate target coordinates (shift pixels to the left for right eye)
|
| 56 |
+
shift = shift_map.astype(int)
|
|
|
|
|
|
|
| 57 |
target_x = x_coords - shift
|
| 58 |
|
| 59 |
# Initialize output and mask
|
| 60 |
right = np.zeros_like(image)
|
| 61 |
mask = np.ones((height, width), dtype=np.uint8) * 255 # 255 = hole/inpainting area
|
| 62 |
|
| 63 |
+
# Valid indices mask (ensure pixels land within image bounds)
|
| 64 |
valid_mask = (target_x >= 0) & (target_x < width)
|
| 65 |
|
| 66 |
# Flatten arrays for advanced indexing
|
|
|
|
| 69 |
flat_x_source = x_coords[valid_mask]
|
| 70 |
|
| 71 |
# Assign pixels
|
| 72 |
+
# Note: simple overwriting handles occlusions naively but effectively for this use case
|
|
|
|
|
|
|
| 73 |
right[flat_y, flat_x_target] = image[flat_y, flat_x_source]
|
| 74 |
|
| 75 |
# Update Mask: Areas that were written to are NOT holes (0)
|
|
|
|
| 101 |
|
| 102 |
# === LAMA INPAINTING (Via Gradio Client) ===
|
| 103 |
# Note: You need a valid Space that accepts image + mask.
|
|
|
|
| 104 |
try:
|
| 105 |
+
lama_client = Client("asif-k/LaMa-Inpainting")
|
|
|
|
|
|
|
|
|
|
| 106 |
except Exception as e:
|
| 107 |
print(f"Could not connect to external LaMa client: {e}")
|
| 108 |
lama_client = None
|
|
|
|
| 124 |
|
| 125 |
try:
|
| 126 |
# Predict using the external space
|
|
|
|
|
|
|
| 127 |
result_path = lama_client.predict(
|
| 128 |
image=handle_file(f_img.name),
|
| 129 |
mask=handle_file(f_mask.name),
|
| 130 |
+
api_name="/predict"
|
| 131 |
)
|
| 132 |
|
| 133 |
# Result is a filepath
|
|
|
|
| 145 |
# === APP LOGIC ===
|
| 146 |
depth_model, depth_processor = load_depth_model()
|
| 147 |
|
| 148 |
+
def stereo_pipeline(image_pil, divergence, convergence):
|
| 149 |
if image_pil is None:
|
| 150 |
+
return None, None
|
| 151 |
|
| 152 |
image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
|
| 153 |
|
| 154 |
+
# 1. Estimate Depth (0.0 far to 1.0 near)
|
| 155 |
depth = estimate_depth(image_pil, depth_model, depth_processor)
|
| 156 |
|
| 157 |
+
# 2. Calculate Shift Map
|
| 158 |
+
# Divergence: Overall separation strength (pixels)
|
| 159 |
+
# Convergence: The depth plane that stays still (0.0 - 1.0)
|
| 160 |
+
# Result:
|
| 161 |
+
# Positive shift (Leftwards) = Pop out of screen (Near objects)
|
| 162 |
+
# Negative shift (Rightwards) = Go into screen (Far objects)
|
| 163 |
+
shift = (depth - convergence) * divergence
|
| 164 |
|
| 165 |
# 3. Shift Pixels
|
| 166 |
+
right_img, mask = generate_right_and_mask(image_cv, shift)
|
| 167 |
|
| 168 |
# 4. Inpaint Holes
|
| 169 |
# Pass the mask where 255 indicates holes to be filled
|
|
|
|
| 189 |
gr.Markdown("Generates a side-by-side stereo pair and anaglyph using Depth Estimation and LaMa Inpainting.")
|
| 190 |
|
| 191 |
with gr.Row():
|
| 192 |
+
with gr.Column(scale=1):
|
| 193 |
+
input_img = gr.Image(type="pil", label="Input Image", height=480)
|
| 194 |
+
|
| 195 |
+
# === Controls ===
|
| 196 |
+
with gr.Group():
|
| 197 |
+
gr.Markdown("### 3D Controls")
|
| 198 |
+
divergence_slider = gr.Slider(
|
| 199 |
+
minimum=0, maximum=100, value=30, step=1,
|
| 200 |
+
label="3D Strength (Divergence)",
|
| 201 |
+
info="Max pixel separation. Higher = Deeper 3D effect."
|
| 202 |
+
)
|
| 203 |
+
convergence_slider = gr.Slider(
|
| 204 |
+
minimum=0.0, maximum=1.0, value=0.1, step=0.05,
|
| 205 |
+
label="Focus Plane (Convergence)",
|
| 206 |
+
info="0.0 = Background at screen depth. 0.5 = Mid-range at screen. 1.0 = Foreground at screen."
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
btn = gr.Button("Generate 3D", variant="primary")
|
| 210 |
+
|
| 211 |
+
with gr.Column(scale=1):
|
| 212 |
+
out_anaglyph = gr.Image(label="Anaglyph (Red/Cyan)", height=480)
|
| 213 |
|
| 214 |
with gr.Row():
|
| 215 |
+
out_stereo = gr.Image(label="Side-by-Side Stereo Pair", height=400)
|
|
|
|
|
|
|
| 216 |
|
| 217 |
+
btn.click(
|
| 218 |
+
fn=stereo_pipeline,
|
| 219 |
+
inputs=[input_img, divergence_slider, convergence_slider],
|
| 220 |
+
outputs=[out_stereo, out_anaglyph]
|
| 221 |
+
)
|
| 222 |
|
| 223 |
if __name__ == "__main__":
|
| 224 |
demo.launch()
|