Spaces:
Runtime error
Runtime error
Upload generator.py
Browse files- generator.py +27 -42
generator.py
CHANGED
|
@@ -223,22 +223,8 @@ class RetroArtConverter:
|
|
| 223 |
# Detect faces
|
| 224 |
has_detected_faces = False
|
| 225 |
face_kps_image = None
|
|
|
|
| 226 |
face_bbox_original = None
|
| 227 |
-
|
| 228 |
-
# === FIX: START ===
|
| 229 |
-
# The InstantID pipeline requires image_embeds. We must provide a dummy tensor if no face is detected.
|
| 230 |
-
|
| 231 |
-
# Get the expected embedding dimension from the pipeline
|
| 232 |
-
try:
|
| 233 |
-
# Get dim from the loaded model
|
| 234 |
-
image_emb_dim = self.pipe.image_proj_model_in_features
|
| 235 |
-
except AttributeError:
|
| 236 |
-
print("[WARN] Cannot find image_proj_model_in_features, defaulting to 512")
|
| 237 |
-
image_emb_dim = 512 # Default dim for InstantID
|
| 238 |
-
|
| 239 |
-
# Initialize with dummy embeddings (zeros)
|
| 240 |
-
face_embeddings = np.zeros(image_emb_dim, dtype=np.float32) # Or dtype=dtype ???
|
| 241 |
-
# === FIX: END ===
|
| 242 |
|
| 243 |
if self.face_detection_enabled and self.face_app is not None:
|
| 244 |
try:
|
|
@@ -323,53 +309,52 @@ class RetroArtConverter:
|
|
| 323 |
else:
|
| 324 |
pipe_kwargs["prompt"] = prompt
|
| 325 |
pipe_kwargs["negative_prompt"] = negative_prompt
|
| 326 |
-
|
| 327 |
-
|
| 328 |
# Configure ControlNets + IP-Adapter (SIMPLIFIED!)
|
| 329 |
-
# *** ALWAYS PASS EMBEDDINGS ***
|
| 330 |
-
# The pipeline always needs image_embeds.
|
| 331 |
-
# This will be the REAL embeddings or the DUMMY (zero) embeddings.
|
| 332 |
-
pipe_kwargs["image_embeds"] = face_embeddings
|
| 333 |
-
|
| 334 |
if has_detected_faces and face_kps_image is not None:
|
| 335 |
print("Using InstantID (keypoints + embeddings) + Depth ControlNets")
|
| 336 |
-
|
| 337 |
# Control images: [face keypoints, depth map]
|
| 338 |
pipe_kwargs["control_image"] = [face_kps_image, depth_image]
|
| 339 |
-
|
| 340 |
# Conditioning scales: [identity, depth]
|
| 341 |
pipe_kwargs["controlnet_conditioning_scale"] = [
|
| 342 |
identity_control_scale,
|
| 343 |
depth_control_scale
|
| 344 |
]
|
| 345 |
-
|
| 346 |
-
# Control guidance timing
|
|
|
|
|
|
|
| 347 |
pipe_kwargs["control_guidance_start"] = [0.0, 0.0]
|
| 348 |
pipe_kwargs["control_guidance_end"] = [1.0, 1.0]
|
| 349 |
-
|
| 350 |
-
#
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
else:
|
| 357 |
print("No faces detected - using Depth ControlNet only")
|
| 358 |
-
|
| 359 |
# Use depth for both ControlNet slots (identity scale = 0)
|
| 360 |
pipe_kwargs["control_image"] = [depth_image, depth_image]
|
| 361 |
pipe_kwargs["controlnet_conditioning_scale"] = [0.0, depth_control_scale]
|
| 362 |
-
|
| 363 |
# Control guidance timing for both slots
|
| 364 |
pipe_kwargs["control_guidance_start"] = [0.0, 0.0]
|
| 365 |
pipe_kwargs["control_guidance_end"] = [1.0, 1.0]
|
| 366 |
-
|
| 367 |
-
# *** SET IP-ADAPTER SCALE TO 0 ***
|
| 368 |
-
# We passed dummy embeddings, so set scale to 0
|
| 369 |
-
pipe_kwargs["ip_adapter_scale"] = 0.0
|
| 370 |
-
|
| 371 |
-
print(f" - Dummy embeddings shape: {face_embeddings.shape}")
|
| 372 |
-
print(f" - IP-Adapter scale: 0.0")
|
| 373 |
|
| 374 |
# Generate
|
| 375 |
print(f"Generating: Steps={num_inference_steps}, CFG={guidance_scale}, Strength={strength}")
|
|
|
|
| 223 |
# Detect faces
|
| 224 |
has_detected_faces = False
|
| 225 |
face_kps_image = None
|
| 226 |
+
face_embeddings = None
|
| 227 |
face_bbox_original = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
if self.face_detection_enabled and self.face_app is not None:
|
| 230 |
try:
|
|
|
|
| 309 |
else:
|
| 310 |
pipe_kwargs["prompt"] = prompt
|
| 311 |
pipe_kwargs["negative_prompt"] = negative_prompt
|
| 312 |
+
|
|
|
|
| 313 |
# Configure ControlNets + IP-Adapter (SIMPLIFIED!)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
if has_detected_faces and face_kps_image is not None:
|
| 315 |
print("Using InstantID (keypoints + embeddings) + Depth ControlNets")
|
| 316 |
+
|
| 317 |
# Control images: [face keypoints, depth map]
|
| 318 |
pipe_kwargs["control_image"] = [face_kps_image, depth_image]
|
| 319 |
+
|
| 320 |
# Conditioning scales: [identity, depth]
|
| 321 |
pipe_kwargs["controlnet_conditioning_scale"] = [
|
| 322 |
identity_control_scale,
|
| 323 |
depth_control_scale
|
| 324 |
]
|
| 325 |
+
|
| 326 |
+
# Control guidance timing (when each ControlNet is active)
|
| 327 |
+
# [start, start] - both active from beginning
|
| 328 |
+
# [end, end] - both active until end
|
| 329 |
pipe_kwargs["control_guidance_start"] = [0.0, 0.0]
|
| 330 |
pipe_kwargs["control_guidance_end"] = [1.0, 1.0]
|
| 331 |
+
|
| 332 |
+
# IP-Adapter face embeddings (SIMPLE - pipeline handles everything!)
|
| 333 |
+
if face_embeddings is not None:
|
| 334 |
+
print(f"Adding face embeddings for IP-Adapter...")
|
| 335 |
+
|
| 336 |
+
# Just pass the embeddings - pipeline does the rest!
|
| 337 |
+
pipe_kwargs["image_embeds"] = face_embeddings
|
| 338 |
+
|
| 339 |
+
# Control IP-Adapter strength
|
| 340 |
+
pipe_kwargs["ip_adapter_scale"] = identity_preservation
|
| 341 |
+
|
| 342 |
+
print(f" - Face embeddings shape: {face_embeddings.shape}")
|
| 343 |
+
print(f" - IP-Adapter scale: {identity_preservation}")
|
| 344 |
+
print(f" [OK] Face embeddings configured")
|
| 345 |
+
else:
|
| 346 |
+
print(" [WARNING] No face embeddings - using keypoints only")
|
| 347 |
+
|
| 348 |
else:
|
| 349 |
print("No faces detected - using Depth ControlNet only")
|
| 350 |
+
|
| 351 |
# Use depth for both ControlNet slots (identity scale = 0)
|
| 352 |
pipe_kwargs["control_image"] = [depth_image, depth_image]
|
| 353 |
pipe_kwargs["controlnet_conditioning_scale"] = [0.0, depth_control_scale]
|
| 354 |
+
|
| 355 |
# Control guidance timing for both slots
|
| 356 |
pipe_kwargs["control_guidance_start"] = [0.0, 0.0]
|
| 357 |
pipe_kwargs["control_guidance_end"] = [1.0, 1.0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
|
| 359 |
# Generate
|
| 360 |
print(f"Generating: Steps={num_inference_steps}, CFG={guidance_scale}, Strength={strength}")
|