enoky commited on
Commit
0482a4e
·
verified ·
1 Parent(s): 25f23e9

Upload space application files

Browse files
Files changed (2) hide show
  1. app.py +180 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ import cv2
5
+ from PIL import Image
6
+ from transformers import DPTForDepthEstimation, DPTImageProcessor
7
+ from gradio_client import Client, handle_file
8
+ import tempfile
9
+ import os
10
+
11
+ # === DEVICE ===
12
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
13
+
14
+ # === DEPTH MODEL ===
15
+ def load_depth_model():
16
+ # DPTImageProcessor is the modern replacement for FeatureExtractor
17
+ model = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device)
18
+ processor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")
19
+ return model, processor
20
+
21
+ @torch.no_grad()
22
+ def estimate_depth(image_pil, model, processor):
23
+ # Keep original size for restoration later
24
+ original_size = image_pil.size # (width, height)
25
+
26
+ # Preprocess (processor handles resizing internally for the model)
27
+ inputs = processor(images=image_pil, return_tensors="pt").to(device)
28
+
29
+ depth = model(**inputs).predicted_depth
30
+
31
+ # Interpolate depth back to ORIGINAL image size
32
+ depth = torch.nn.functional.interpolate(
33
+ depth.unsqueeze(1),
34
+ size=(original_size[1], original_size[0]), # torch expects (H, W)
35
+ mode="bicubic",
36
+ align_corners=False,
37
+ ).squeeze().detach().cpu().numpy()
38
+
39
+ # Normalize
40
+ depth_min, depth_max = depth.min(), depth.max()
41
+ if depth_max - depth_min > 0:
42
+ return (depth - depth_min) / (depth_max - depth_min)
43
+ return depth
44
+
45
+ def depth_to_disparity(depth, max_disp=30):
46
+ # Invert depth: close objects (bright) shift more
47
+ return depth * max_disp
48
+
49
+ def generate_right_and_mask(image, disparity):
50
+ """
51
+ Vectorized shift operation. 100x faster than for-loops.
52
+ """
53
+ height, width = image.shape[:2]
54
+
55
+ # Create a grid of coordinates
56
+ x_coords, y_coords = np.meshgrid(np.arange(width), np.arange(height))
57
+
58
+ # Calculate target coordinates (shift pixels to the left for right eye)
59
+ # Note: Disparity logic depends on convergence plane.
60
+ # Usually: Right Eye View = Original - Disparity
61
+ shift = disparity.astype(int)
62
+ target_x = x_coords - shift
63
+
64
+ # Initialize output and mask
65
+ right = np.zeros_like(image)
66
+ mask = np.ones((height, width), dtype=np.uint8) * 255 # 255 = hole/inpainting area
67
+
68
+ # Valid indices mask
69
+ valid_mask = (target_x >= 0) & (target_x < width)
70
+
71
+ # Flatten arrays for advanced indexing
72
+ flat_y = y_coords[valid_mask]
73
+ flat_x_target = target_x[valid_mask]
74
+ flat_x_source = x_coords[valid_mask]
75
+
76
+ # Assign pixels
77
+ # Note: In case of collision (two pixels mapping to same spot),
78
+ # this simple method overwrites. For better results, Z-buffering is needed,
79
+ # but this is sufficient for basic stereo.
80
+ right[flat_y, flat_x_target] = image[flat_y, flat_x_source]
81
+
82
+ # Update Mask: Areas that were written to are NOT holes (0)
83
+ mask[flat_y, flat_x_target] = 0
84
+
85
+ return right, mask
86
+
87
+ # === LAMA INPAINTING (Via Gradio Client) ===
88
+ # Note: You need a valid Space that accepts image + mask.
89
+ # Using a popular LaMa space as reference.
90
+ try:
91
+ # Attempt to connect to a public LaMa space
92
+ # You can change this string to "frxngb23/lama-inpainting-api" if that space
93
+ # supports the API client, otherwise use "any-other-lama-space"
94
+ lama_client = Client("asif-k/LaMa-Inpainting")
95
+ except Exception as e:
96
+ print(f"Could not connect to external LaMa client: {e}")
97
+ lama_client = None
98
+
99
+ def run_lama_inpainting(image_bgr, mask):
100
+ if lama_client is None:
101
+ print("LaMa client unavailable, returning unfilled image.")
102
+ return image_bgr
103
+
104
+ # Prepare files for Gradio Client
105
+ # Convert BGR (OpenCV) to RGB for PIL
106
+ img_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
107
+
108
+ with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f_img, \
109
+ tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f_mask:
110
+
111
+ Image.fromarray(img_rgb).save(f_img.name)
112
+ Image.fromarray(mask).save(f_mask.name)
113
+
114
+ try:
115
+ # Predict using the external space
116
+ # Note: The api_name="/predict" or parameters might vary per Space.
117
+ # You must check the "View API" button at the bottom of the target Space.
118
+ result_path = lama_client.predict(
119
+ image=handle_file(f_img.name),
120
+ mask=handle_file(f_mask.name),
121
+ api_name="/predict"
122
+ )
123
+
124
+ # Result is a filepath
125
+ res_img = Image.open(result_path)
126
+ return cv2.cvtColor(np.array(res_img), cv2.COLOR_RGB2BGR)
127
+
128
+ except Exception as e:
129
+ print(f"Inpainting failed: {e}")
130
+ return image_bgr # Return original with holes if fail
131
+ finally:
132
+ # Cleanup
133
+ os.remove(f_img.name)
134
+ os.remove(f_mask.name)
135
+
136
+ # === APP LOGIC ===
137
+ depth_model, depth_processor = load_depth_model()
138
+
139
+ def stereo_pipeline(image_pil):
140
+ if image_pil is None:
141
+ return None, None
142
+
143
+ image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
144
+
145
+ # 1. Estimate Depth
146
+ depth = estimate_depth(image_pil, depth_model, depth_processor)
147
+
148
+ # 2. Calculate Disparity
149
+ disparity = depth_to_disparity(depth)
150
+
151
+ # 3. Shift Pixels
152
+ right_img, mask = generate_right_and_mask(image_cv, disparity)
153
+
154
+ # 4. Inpaint Holes
155
+ # Pass the mask where 255 indicates holes to be filled
156
+ right_filled = run_lama_inpainting(right_img, mask)
157
+
158
+ left = image_pil
159
+ right = Image.fromarray(cv2.cvtColor(right_filled, cv2.COLOR_BGR2RGB))
160
+
161
+ return left, right
162
+
163
+ # === GRADIO UI ===
164
+ with gr.Blocks(title="2D to 3D Stereo") as demo:
165
+ gr.Markdown("## 2D to 3D Stereo Generator")
166
+ gr.Markdown("Generates a stereo pair using Depth Estimation and LaMa Inpainting.")
167
+
168
+ with gr.Row():
169
+ input_img = gr.Image(type="pil", label="Input Image")
170
+
171
+ with gr.Row():
172
+ out_left = gr.Image(label="Left Eye")
173
+ out_right = gr.Image(label="Right Eye (Generated)")
174
+
175
+ btn = gr.Button("Generate 3D")
176
+
177
+ btn.click(fn=stereo_pipeline, inputs=input_img, outputs=[out_left, out_right])
178
+
179
+ if __name__ == "__main__":
180
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ gradio_client
3
+ torch
4
+ numpy
5
+ opencv-python
6
+ pillow
7
+ transformers
8
+ scipy