enoky commited on
Commit
f98a0fe
·
verified ·
1 Parent(s): 7c6c979

Add Divergence (3D Strength) and Convergence (Focus Point) sliders

Browse files
Files changed (1) hide show
  1. app.py +46 -35
app.py CHANGED
@@ -42,13 +42,10 @@ def estimate_depth(image_pil, model, processor):
42
  return (depth - depth_min) / (depth_max - depth_min)
43
  return depth
44
 
45
- def depth_to_disparity(depth, max_disp=30):
46
- # Invert depth: close objects (bright) shift more
47
- return depth * max_disp
48
-
49
- def generate_right_and_mask(image, disparity):
50
  """
51
- Vectorized shift operation. 100x faster than for-loops.
 
52
  """
53
  height, width = image.shape[:2]
54
 
@@ -56,16 +53,14 @@ def generate_right_and_mask(image, disparity):
56
  x_coords, y_coords = np.meshgrid(np.arange(width), np.arange(height))
57
 
58
  # Calculate target coordinates (shift pixels to the left for right eye)
59
- # Note: Disparity logic depends on convergence plane.
60
- # Usually: Right Eye View = Original - Disparity
61
- shift = disparity.astype(int)
62
  target_x = x_coords - shift
63
 
64
  # Initialize output and mask
65
  right = np.zeros_like(image)
66
  mask = np.ones((height, width), dtype=np.uint8) * 255 # 255 = hole/inpainting area
67
 
68
- # Valid indices mask
69
  valid_mask = (target_x >= 0) & (target_x < width)
70
 
71
  # Flatten arrays for advanced indexing
@@ -74,9 +69,7 @@ def generate_right_and_mask(image, disparity):
74
  flat_x_source = x_coords[valid_mask]
75
 
76
  # Assign pixels
77
- # Note: In case of collision (two pixels mapping to same spot),
78
- # this simple method overwrites. For better results, Z-buffering is needed,
79
- # but this is sufficient for basic stereo.
80
  right[flat_y, flat_x_target] = image[flat_y, flat_x_source]
81
 
82
  # Update Mask: Areas that were written to are NOT holes (0)
@@ -108,12 +101,8 @@ def make_anaglyph(left, right):
108
 
109
  # === LAMA INPAINTING (Via Gradio Client) ===
110
  # Note: You need a valid Space that accepts image + mask.
111
- # Using a popular LaMa space as reference.
112
  try:
113
- # Attempt to connect to a public LaMa space
114
- # You can change this string to "frxngb23/lama-inpainting-api" if that space
115
- # supports the API client, otherwise use "any-other-lama-space"
116
- lama_client = Client("frxngb23/lama-inpainting-api")
117
  except Exception as e:
118
  print(f"Could not connect to external LaMa client: {e}")
119
  lama_client = None
@@ -135,12 +124,10 @@ def run_lama_inpainting(image_bgr, mask):
135
 
136
  try:
137
  # Predict using the external space
138
- # Note: The api_name="/predict" or parameters might vary per Space.
139
- # You must check the "View API" button at the bottom of the target Space.
140
  result_path = lama_client.predict(
141
  image=handle_file(f_img.name),
142
  mask=handle_file(f_mask.name),
143
- api_name="/inpaint"
144
  )
145
 
146
  # Result is a filepath
@@ -158,20 +145,25 @@ def run_lama_inpainting(image_bgr, mask):
158
  # === APP LOGIC ===
159
  depth_model, depth_processor = load_depth_model()
160
 
161
- def stereo_pipeline(image_pil):
162
  if image_pil is None:
163
- return None
164
 
165
  image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
166
 
167
- # 1. Estimate Depth
168
  depth = estimate_depth(image_pil, depth_model, depth_processor)
169
 
170
- # 2. Calculate Disparity
171
- disparity = depth_to_disparity(depth)
 
 
 
 
 
172
 
173
  # 3. Shift Pixels
174
- right_img, mask = generate_right_and_mask(image_cv, disparity)
175
 
176
  # 4. Inpaint Holes
177
  # Pass the mask where 255 indicates holes to be filled
@@ -197,17 +189,36 @@ with gr.Blocks(title="2D to 3D Stereo") as demo:
197
  gr.Markdown("Generates a side-by-side stereo pair and anaglyph using Depth Estimation and LaMa Inpainting.")
198
 
199
  with gr.Row():
200
- input_img = gr.Image(type="pil", label="Input Image")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  with gr.Row():
203
- # Changed to a single output image
204
- out_stereo = gr.Image(label="Side-by-Side Stereo Pair")
205
- out_anaglyph = gr.Image(label="Anaglyph (Red/Cyan)")
206
 
207
- btn = gr.Button("Generate 3D")
208
-
209
- # Updated outputs to single component
210
- btn.click(fn=stereo_pipeline, inputs=input_img, outputs=[out_stereo, out_anaglyph])
 
211
 
212
  if __name__ == "__main__":
213
  demo.launch()
 
42
  return (depth - depth_min) / (depth_max - depth_min)
43
  return depth
44
 
45
+ def generate_right_and_mask(image, shift_map):
 
 
 
 
46
  """
47
+ Vectorized shift operation.
48
+ shift_map: 2D array indicating how many pixels to shift left (positive) or right (negative).
49
  """
50
  height, width = image.shape[:2]
51
 
 
53
  x_coords, y_coords = np.meshgrid(np.arange(width), np.arange(height))
54
 
55
  # Calculate target coordinates (shift pixels to the left for right eye)
56
+ shift = shift_map.astype(int)
 
 
57
  target_x = x_coords - shift
58
 
59
  # Initialize output and mask
60
  right = np.zeros_like(image)
61
  mask = np.ones((height, width), dtype=np.uint8) * 255 # 255 = hole/inpainting area
62
 
63
+ # Valid indices mask (ensure pixels land within image bounds)
64
  valid_mask = (target_x >= 0) & (target_x < width)
65
 
66
  # Flatten arrays for advanced indexing
 
69
  flat_x_source = x_coords[valid_mask]
70
 
71
  # Assign pixels
72
+ # Note: simple overwriting handles occlusions naively but effectively for this use case
 
 
73
  right[flat_y, flat_x_target] = image[flat_y, flat_x_source]
74
 
75
  # Update Mask: Areas that were written to are NOT holes (0)
 
101
 
102
  # === LAMA INPAINTING (Via Gradio Client) ===
103
  # Note: You need a valid Space that accepts image + mask.
 
104
  try:
105
+ lama_client = Client("asif-k/LaMa-Inpainting")
 
 
 
106
  except Exception as e:
107
  print(f"Could not connect to external LaMa client: {e}")
108
  lama_client = None
 
124
 
125
  try:
126
  # Predict using the external space
 
 
127
  result_path = lama_client.predict(
128
  image=handle_file(f_img.name),
129
  mask=handle_file(f_mask.name),
130
+ api_name="/predict"
131
  )
132
 
133
  # Result is a filepath
 
145
  # === APP LOGIC ===
146
  depth_model, depth_processor = load_depth_model()
147
 
148
+ def stereo_pipeline(image_pil, divergence, convergence):
149
  if image_pil is None:
150
+ return None, None
151
 
152
  image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
153
 
154
+ # 1. Estimate Depth (0.0 far to 1.0 near)
155
  depth = estimate_depth(image_pil, depth_model, depth_processor)
156
 
157
+ # 2. Calculate Shift Map
158
+ # Divergence: Overall separation strength (pixels)
159
+ # Convergence: The depth plane that stays still (0.0 - 1.0)
160
+ # Result:
161
+ # Positive shift (Leftwards) = Pop out of screen (Near objects)
162
+ # Negative shift (Rightwards) = Go into screen (Far objects)
163
+ shift = (depth - convergence) * divergence
164
 
165
  # 3. Shift Pixels
166
+ right_img, mask = generate_right_and_mask(image_cv, shift)
167
 
168
  # 4. Inpaint Holes
169
  # Pass the mask where 255 indicates holes to be filled
 
189
  gr.Markdown("Generates a side-by-side stereo pair and anaglyph using Depth Estimation and LaMa Inpainting.")
190
 
191
  with gr.Row():
192
+ with gr.Column(scale=1):
193
+ input_img = gr.Image(type="pil", label="Input Image", height=480)
194
+
195
+ # === Controls ===
196
+ with gr.Group():
197
+ gr.Markdown("### 3D Controls")
198
+ divergence_slider = gr.Slider(
199
+ minimum=0, maximum=100, value=30, step=1,
200
+ label="3D Strength (Divergence)",
201
+ info="Max pixel separation. Higher = Deeper 3D effect."
202
+ )
203
+ convergence_slider = gr.Slider(
204
+ minimum=0.0, maximum=1.0, value=0.1, step=0.05,
205
+ label="Focus Plane (Convergence)",
206
+ info="0.0 = Background at screen depth. 0.5 = Mid-range at screen. 1.0 = Foreground at screen."
207
+ )
208
+
209
+ btn = gr.Button("Generate 3D", variant="primary")
210
+
211
+ with gr.Column(scale=1):
212
+ out_anaglyph = gr.Image(label="Anaglyph (Red/Cyan)", height=480)
213
 
214
  with gr.Row():
215
+ out_stereo = gr.Image(label="Side-by-Side Stereo Pair", height=400)
 
 
216
 
217
+ btn.click(
218
+ fn=stereo_pipeline,
219
+ inputs=[input_img, divergence_slider, convergence_slider],
220
+ outputs=[out_stereo, out_anaglyph]
221
+ )
222
 
223
  if __name__ == "__main__":
224
  demo.launch()