Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- generator.py +12 -12
- models.py +9 -9
generator.py
CHANGED
|
@@ -33,16 +33,16 @@ class RetroArtConverter:
|
|
| 33 |
'custom_checkpoint': False,
|
| 34 |
'lora': False,
|
| 35 |
'instantid': False,
|
| 36 |
-
'
|
| 37 |
'ip_adapter': False
|
| 38 |
}
|
| 39 |
|
| 40 |
# Initialize face analysis
|
| 41 |
self.face_app, self.face_detection_enabled = load_face_analysis()
|
| 42 |
|
| 43 |
-
# Load
|
| 44 |
-
self.
|
| 45 |
-
self.models_loaded['
|
| 46 |
|
| 47 |
# Load ControlNets
|
| 48 |
controlnet_depth, self.controlnet_instantid, instantid_success = load_controlnets()
|
|
@@ -146,8 +146,8 @@ class RetroArtConverter:
|
|
| 146 |
print("============================\n")
|
| 147 |
|
| 148 |
def get_depth_map(self, image):
|
| 149 |
-
"""Generate depth map using
|
| 150 |
-
if self.
|
| 151 |
try:
|
| 152 |
if image.mode != 'RGB':
|
| 153 |
image = image.convert('RGB')
|
|
@@ -165,11 +165,11 @@ class RetroArtConverter:
|
|
| 165 |
|
| 166 |
if target_width != orig_width or target_height != orig_height:
|
| 167 |
image = image.resize((int(target_width), int(target_height)), Image.LANCZOS)
|
| 168 |
-
print(f"[DEPTH] Resized for
|
| 169 |
|
| 170 |
# FIXED: Add torch.no_grad() wrapper
|
| 171 |
with torch.no_grad():
|
| 172 |
-
depth_image = self.
|
| 173 |
|
| 174 |
depth_width, depth_height = depth_image.size
|
| 175 |
# Convert numpy int64 to Python int to avoid PIL errors
|
|
@@ -181,11 +181,11 @@ class RetroArtConverter:
|
|
| 181 |
if depth_width != orig_width_int or depth_height != orig_height_int:
|
| 182 |
depth_image = depth_image.resize((orig_width_int, orig_height_int), Image.LANCZOS)
|
| 183 |
|
| 184 |
-
print(f"[DEPTH]
|
| 185 |
return depth_image
|
| 186 |
|
| 187 |
except Exception as e:
|
| 188 |
-
print(f"[DEPTH]
|
| 189 |
gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
|
| 190 |
depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
|
| 191 |
return Image.fromarray(depth_colored)
|
|
@@ -467,7 +467,7 @@ class RetroArtConverter:
|
|
| 467 |
resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
|
| 468 |
|
| 469 |
# Generate depth map
|
| 470 |
-
print("Generating
|
| 471 |
depth_image = self.get_depth_map(resized_image)
|
| 472 |
if depth_image.size != (target_width, target_height):
|
| 473 |
depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
|
|
@@ -636,7 +636,7 @@ class RetroArtConverter:
|
|
| 636 |
# Reshape for Resampler: [1, 1, 512]
|
| 637 |
face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
|
| 638 |
|
| 639 |
-
# Pass through Resampler: [1, 1, 512]
|
| 640 |
face_proj_embeds = self.image_proj_model(face_emb_tensor)
|
| 641 |
|
| 642 |
# Scale with identity preservation
|
|
|
|
| 33 |
'custom_checkpoint': False,
|
| 34 |
'lora': False,
|
| 35 |
'instantid': False,
|
| 36 |
+
'midas_depth': False,
|
| 37 |
'ip_adapter': False
|
| 38 |
}
|
| 39 |
|
| 40 |
# Initialize face analysis
|
| 41 |
self.face_app, self.face_detection_enabled = load_face_analysis()
|
| 42 |
|
| 43 |
+
# Load Midas Depth detector
|
| 44 |
+
self.midas_depth, midas_success = load_depth_detector()
|
| 45 |
+
self.models_loaded['midas_depth'] = midas_success
|
| 46 |
|
| 47 |
# Load ControlNets
|
| 48 |
controlnet_depth, self.controlnet_instantid, instantid_success = load_controlnets()
|
|
|
|
| 146 |
print("============================\n")
|
| 147 |
|
| 148 |
def get_depth_map(self, image):
|
| 149 |
+
"""Generate depth map using Midas Depth"""
|
| 150 |
+
if self.midas_depth is not None:
|
| 151 |
try:
|
| 152 |
if image.mode != 'RGB':
|
| 153 |
image = image.convert('RGB')
|
|
|
|
| 165 |
|
| 166 |
if target_width != orig_width or target_height != orig_height:
|
| 167 |
image = image.resize((int(target_width), int(target_height)), Image.LANCZOS)
|
| 168 |
+
print(f"[DEPTH] Resized for MidasDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
|
| 169 |
|
| 170 |
# FIXED: Add torch.no_grad() wrapper
|
| 171 |
with torch.no_grad():
|
| 172 |
+
depth_image = self.midas_depth(image)
|
| 173 |
|
| 174 |
depth_width, depth_height = depth_image.size
|
| 175 |
# Convert numpy int64 to Python int to avoid PIL errors
|
|
|
|
| 181 |
if depth_width != orig_width_int or depth_height != orig_height_int:
|
| 182 |
depth_image = depth_image.resize((orig_width_int, orig_height_int), Image.LANCZOS)
|
| 183 |
|
| 184 |
+
print(f"[DEPTH] Midas depth map generated: {orig_width}x{orig_height}")
|
| 185 |
return depth_image
|
| 186 |
|
| 187 |
except Exception as e:
|
| 188 |
+
print(f"[DEPTH] MidasDetector failed ({e}), falling back to grayscale depth")
|
| 189 |
gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
|
| 190 |
depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
|
| 191 |
return Image.fromarray(depth_colored)
|
|
|
|
| 467 |
resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
|
| 468 |
|
| 469 |
# Generate depth map
|
| 470 |
+
print("Generating Midas depth map...")
|
| 471 |
depth_image = self.get_depth_map(resized_image)
|
| 472 |
if depth_image.size != (target_width, target_height):
|
| 473 |
depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
|
|
|
|
| 636 |
# Reshape for Resampler: [1, 1, 512]
|
| 637 |
face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
|
| 638 |
|
| 639 |
+
# Pass through Resampler: [1, 1, 512] → [1, 16, 2048]
|
| 640 |
face_proj_embeds = self.image_proj_model(face_emb_tensor)
|
| 641 |
|
| 642 |
# Scale with identity preservation
|
models.py
CHANGED
|
@@ -13,7 +13,7 @@ from diffusers import (
|
|
| 13 |
from diffusers.models.attention_processor import AttnProcessor2_0
|
| 14 |
from transformers import CLIPVisionModelWithProjection
|
| 15 |
from insightface.app import FaceAnalysis
|
| 16 |
-
from controlnet_aux import
|
| 17 |
from huggingface_hub import hf_hub_download
|
| 18 |
from compel import Compel, ReturnedEmbeddingsType
|
| 19 |
|
|
@@ -82,15 +82,15 @@ def load_face_analysis():
|
|
| 82 |
|
| 83 |
|
| 84 |
def load_depth_detector():
|
| 85 |
-
"""Load
|
| 86 |
-
print("Loading
|
| 87 |
try:
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
print(" [OK]
|
| 91 |
-
return
|
| 92 |
except Exception as e:
|
| 93 |
-
print(f" [WARNING]
|
| 94 |
return None, False
|
| 95 |
|
| 96 |
|
|
@@ -276,7 +276,7 @@ def setup_ip_adapter(pipe, image_encoder):
|
|
| 276 |
|
| 277 |
print(" [OK] IP-Adapter fully loaded with InstantID architecture")
|
| 278 |
print(f" - Resampler: 4 layers, 20 heads, 16 output tokens")
|
| 279 |
-
print(f" - Face embeddings: 512D
|
| 280 |
|
| 281 |
return image_proj_model, True
|
| 282 |
|
|
|
|
| 13 |
from diffusers.models.attention_processor import AttnProcessor2_0
|
| 14 |
from transformers import CLIPVisionModelWithProjection
|
| 15 |
from insightface.app import FaceAnalysis
|
| 16 |
+
from controlnet_aux import MidasDetector
|
| 17 |
from huggingface_hub import hf_hub_download
|
| 18 |
from compel import Compel, ReturnedEmbeddingsType
|
| 19 |
|
|
|
|
| 82 |
|
| 83 |
|
| 84 |
def load_depth_detector():
|
| 85 |
+
"""Load Midas Depth detector."""
|
| 86 |
+
print("Loading Midas Depth detector...")
|
| 87 |
try:
|
| 88 |
+
midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
|
| 89 |
+
midas_depth.to(device)
|
| 90 |
+
print(" [OK] Midas Depth loaded successfully")
|
| 91 |
+
return midas_depth, True
|
| 92 |
except Exception as e:
|
| 93 |
+
print(f" [WARNING] Midas Depth not available: {e}")
|
| 94 |
return None, False
|
| 95 |
|
| 96 |
|
|
|
|
| 276 |
|
| 277 |
print(" [OK] IP-Adapter fully loaded with InstantID architecture")
|
| 278 |
print(f" - Resampler: 4 layers, 20 heads, 16 output tokens")
|
| 279 |
+
print(f" - Face embeddings: 512D → 16x2048D")
|
| 280 |
|
| 281 |
return image_proj_model, True
|
| 282 |
|