Spaces:
Runtime error
Runtime error
Update generator.py
Browse files- generator.py +16 -24
generator.py
CHANGED
|
@@ -146,64 +146,56 @@ class RetroArtConverter:
|
|
| 146 |
print("============================\n")
|
| 147 |
|
| 148 |
def get_depth_map(self, image):
|
| 149 |
-
"""
|
| 150 |
-
|
| 151 |
-
Supports: LeresDetector, ZoeDetector, or MidasDetector.
|
| 152 |
-
"""
|
| 153 |
-
if self.depth_detector is not None:
|
| 154 |
try:
|
| 155 |
if image.mode != 'RGB':
|
| 156 |
image = image.convert('RGB')
|
| 157 |
|
| 158 |
orig_width, orig_height = image.size
|
|
|
|
| 159 |
orig_width = int(orig_width)
|
| 160 |
orig_height = int(orig_height)
|
| 161 |
|
|
|
|
| 162 |
target_width = int((orig_width // 64) * 64)
|
| 163 |
target_height = int((orig_height // 64) * 64)
|
| 164 |
|
| 165 |
target_width = int(max(64, target_width))
|
| 166 |
target_height = int(max(64, target_height))
|
| 167 |
|
|
|
|
| 168 |
size_for_depth = (int(target_width), int(target_height))
|
|
|
|
|
|
|
|
|
|
| 169 |
image_for_depth = image.resize(size_for_depth, Image.LANCZOS)
|
| 170 |
|
| 171 |
if target_width != orig_width or target_height != orig_height:
|
| 172 |
-
print(f"[DEPTH] Resized for
|
| 173 |
|
| 174 |
-
#
|
| 175 |
with torch.no_grad():
|
| 176 |
-
|
| 177 |
-
self.depth_detector.to(self.device)
|
| 178 |
-
depth_image = self.depth_detector(image_for_depth)
|
| 179 |
-
self.depth_detector.to("cpu")
|
| 180 |
-
|
| 181 |
-
# ADDED: Clear GPU cache after depth detection
|
| 182 |
-
if torch.cuda.is_available():
|
| 183 |
-
torch.cuda.empty_cache()
|
| 184 |
|
| 185 |
depth_width, depth_height = depth_image.size
|
| 186 |
if depth_width != orig_width or depth_height != orig_height:
|
|
|
|
| 187 |
depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
|
|
|
|
| 188 |
|
| 189 |
-
print(f"[DEPTH]
|
| 190 |
return depth_image
|
| 191 |
|
| 192 |
except Exception as e:
|
| 193 |
-
print(f"[DEPTH]
|
| 194 |
-
# ADDED: Clear cache on error
|
| 195 |
-
if torch.cuda.is_available():
|
| 196 |
-
torch.cuda.empty_cache()
|
| 197 |
-
|
| 198 |
gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
|
| 199 |
depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
|
| 200 |
return Image.fromarray(depth_colored)
|
| 201 |
else:
|
| 202 |
-
print("[DEPTH] No depth detector available, using grayscale fallback")
|
| 203 |
gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
|
| 204 |
depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
|
| 205 |
-
return Image.fromarray(depth_colored)
|
| 206 |
-
|
| 207 |
|
| 208 |
|
| 209 |
def add_trigger_word(self, prompt):
|
|
|
|
| 146 |
print("============================\n")
|
| 147 |
|
| 148 |
def get_depth_map(self, image):
|
| 149 |
+
"""Generate depth map using Zoe Depth"""
|
| 150 |
+
if self.zoe_depth is not None:
|
|
|
|
|
|
|
|
|
|
| 151 |
try:
|
| 152 |
if image.mode != 'RGB':
|
| 153 |
image = image.convert('RGB')
|
| 154 |
|
| 155 |
orig_width, orig_height = image.size
|
| 156 |
+
# **FIX 1 START: Ensure all size variables are standard Python int**
|
| 157 |
orig_width = int(orig_width)
|
| 158 |
orig_height = int(orig_height)
|
| 159 |
|
| 160 |
+
# FIXED: Use multiples of 64 (not 32)
|
| 161 |
target_width = int((orig_width // 64) * 64)
|
| 162 |
target_height = int((orig_height // 64) * 64)
|
| 163 |
|
| 164 |
target_width = int(max(64, target_width))
|
| 165 |
target_height = int(max(64, target_height))
|
| 166 |
|
| 167 |
+
# Create an explicit tuple of standard ints
|
| 168 |
size_for_depth = (int(target_width), int(target_height))
|
| 169 |
+
|
| 170 |
+
# Always resize using the explicit int tuple to avoid numpy.int64 issues
|
| 171 |
+
# This replaces the conditional resize
|
| 172 |
image_for_depth = image.resize(size_for_depth, Image.LANCZOS)
|
| 173 |
|
| 174 |
if target_width != orig_width or target_height != orig_height:
|
| 175 |
+
print(f"[DEPTH] Resized for ZoeDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
|
| 176 |
|
| 177 |
+
# FIXED: Add torch.no_grad() wrapper
|
| 178 |
with torch.no_grad():
|
| 179 |
+
depth_image = self.zoe_depth(image_for_depth) # Use the correctly-typed resized image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
depth_width, depth_height = depth_image.size
|
| 182 |
if depth_width != orig_width or depth_height != orig_height:
|
| 183 |
+
# Resize back to the original size that get_depth_map received
|
| 184 |
depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
|
| 185 |
+
# **FIX 1 END**
|
| 186 |
|
| 187 |
+
print(f"[DEPTH] Zoe depth map generated: {orig_width}x{orig_height}")
|
| 188 |
return depth_image
|
| 189 |
|
| 190 |
except Exception as e:
|
| 191 |
+
print(f"[DEPTH] ZoeDetector failed ({e}), falling back to grayscale depth")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 192 |
gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
|
| 193 |
depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
|
| 194 |
return Image.fromarray(depth_colored)
|
| 195 |
else:
|
|
|
|
| 196 |
gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
|
| 197 |
depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
|
| 198 |
+
return Image.fromarray(depth_colored)
|
|
|
|
| 199 |
|
| 200 |
|
| 201 |
def add_trigger_word(self, prompt):
|