primerz commited on
Commit
176aa63
·
verified ·
1 Parent(s): c6ad10b

Upload 2 files

Browse files
Files changed (2) hide show
  1. generator.py +58 -12
  2. models.py +16 -18
generator.py CHANGED
@@ -18,7 +18,7 @@ from utils import (
18
  )
19
  from models import (
20
  load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
21
- load_sdxl_pipeline, load_lora, setup_ip_adapter,
22
  setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip
23
  )
24
 
@@ -83,10 +83,8 @@ class RetroArtConverter:
83
 
84
  # Setup Compel
85
  # TEMPORARILY DISABLED - SDXL token mismatch issue
86
- # Skip Compel - use native SDXL encoding instead
87
- self.compel = None
88
- self.use_compel = False
89
- print(" [INFO] Using native SDXL prompt encoding (more reliable than Compel)")
90
  print(" [INFO] Compel temporarily disabled - using standard prompts")
91
 
92
  # Setup LCM scheduler
@@ -577,10 +575,37 @@ class RetroArtConverter:
577
 
578
  pipe_kwargs["generator"] = generator
579
 
580
- # Use native SDXL prompt encoding (more reliable than Compel)
581
- print("Using native SDXL prompt encoding...")
582
- pipe_kwargs["prompt"] = prompt
583
- pipe_kwargs["negative_prompt"] = negative_prompt if negative_prompt and negative_prompt.strip() else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
584
 
585
  # Add CLIP skip
586
  if hasattr(self.pipe, 'text_encoder'):
@@ -620,9 +645,30 @@ class RetroArtConverter:
620
  print(f" - Resampler output: {face_proj_embeds.shape}")
621
  print(f" - Scale: {boosted_scale:.2f}")
622
 
623
- # Pass face embeddings through image_embeds parameter (works without Compel)
624
- pipe_kwargs['image_embeds'] = face_proj_embeds
625
- print(f" [OK] Face embeddings set for IP-Adapter!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
626
 
627
  elif has_detected_faces and self.models_loaded.get('ip_adapter', False):
628
  # Face detected but embeddings unavailable
 
18
  )
19
  from models import (
20
  load_face_analysis, load_depth_detector, load_controlnets, load_image_encoder,
21
+ load_sdxl_pipeline, load_lora, setup_ip_adapter, setup_compel,
22
  setup_scheduler, optimize_pipeline, load_caption_model, set_clip_skip
23
  )
24
 
 
83
 
84
  # Setup Compel
85
  # TEMPORARILY DISABLED - SDXL token mismatch issue
86
+ # Setup Compel
87
+ self.compel, self.use_compel = setup_compel(self.pipe)
 
 
88
  print(" [INFO] Compel temporarily disabled - using standard prompts")
89
 
90
  # Setup LCM scheduler
 
575
 
576
  pipe_kwargs["generator"] = generator
577
 
578
+ # Use Compel for prompt encoding (critical for quality)
579
+ negative_conditioning = None # Initialize for later use
580
+ if self.use_compel and self.compel is not None:
581
+ try:
582
+ print("Encoding prompts with Compel...")
583
+
584
+ # Direct tuple unpacking as in working example
585
+ conditioning, pooled = self.compel(prompt)
586
+
587
+ # Handle negative prompt conditionally
588
+ if negative_prompt and negative_prompt.strip():
589
+ negative_conditioning, negative_pooled = self.compel(negative_prompt)
590
+ else:
591
+ negative_conditioning, negative_pooled = None, None
592
+
593
+ # Set embeddings for pipeline
594
+ pipe_kwargs["prompt_embeds"] = conditioning
595
+ pipe_kwargs["pooled_prompt_embeds"] = pooled
596
+ pipe_kwargs["negative_prompt_embeds"] = negative_conditioning
597
+ pipe_kwargs["negative_pooled_prompt_embeds"] = negative_pooled
598
+
599
+ print("[OK] Using Compel-encoded prompts")
600
+ except Exception as e:
601
+ print(f"[FALLBACK] Compel failed ({e}), using standard encoding")
602
+ pipe_kwargs["prompt"] = prompt
603
+ pipe_kwargs["negative_prompt"] = negative_prompt if negative_prompt and negative_prompt.strip() else None
604
+ else:
605
+ # Fallback to native SDXL encoding
606
+ print("Using standard SDXL prompt encoding...")
607
+ pipe_kwargs["prompt"] = prompt
608
+ pipe_kwargs["negative_prompt"] = negative_prompt if negative_prompt and negative_prompt.strip() else None
609
 
610
  # Add CLIP skip
611
  if hasattr(self.pipe, 'text_encoder'):
 
645
  print(f" - Resampler output: {face_proj_embeds.shape}")
646
  print(f" - Scale: {boosted_scale:.2f}")
647
 
648
+ # Handle face embeddings with or without Compel
649
+ if 'prompt_embeds' in pipe_kwargs:
650
+ # Compel is being used - concatenate embeddings
651
+ original_embeds = pipe_kwargs['prompt_embeds']
652
+
653
+ # Handle CFG (classifier-free guidance)
654
+ if negative_conditioning is not None:
655
+ # Duplicate for negative + positive
656
+ face_proj_embeds = torch.cat([
657
+ torch.zeros_like(face_proj_embeds), # Negative
658
+ face_proj_embeds # Positive
659
+ ], dim=0)
660
+
661
+ # Concatenate: [batch, text_tokens, 2048] + [batch, 16, 2048]
662
+ combined_embeds = torch.cat([original_embeds, face_proj_embeds], dim=1)
663
+ pipe_kwargs['prompt_embeds'] = combined_embeds
664
+
665
+ print(f" - Text embeds: {original_embeds.shape}")
666
+ print(f" - Combined embeds: {combined_embeds.shape}")
667
+ print(f" [OK] Face embeddings concatenated with text embeddings!")
668
+ else:
669
+ # Native encoding - use image_embeds parameter
670
+ pipe_kwargs['image_embeds'] = face_proj_embeds
671
+ print(f" [OK] Face embeddings set via image_embeds!")
672
 
673
  elif has_detected_faces and self.models_loaded.get('ip_adapter', False):
674
  # Face detected but embeddings unavailable
models.py CHANGED
@@ -15,7 +15,7 @@ from transformers import CLIPVisionModelWithProjection
15
  from insightface.app import FaceAnalysis
16
  from controlnet_aux import LeresDetector
17
  from huggingface_hub import hf_hub_download
18
- # removed compel - using native SDXL encoding
19
 
20
  # Use reference implementation's attention processor
21
  from attention_processor import IPAttnProcessor2_0, AttnProcessor
@@ -326,23 +326,21 @@ def setup_ip_adapter(pipe, image_encoder):
326
  return None, False
327
 
328
 
329
- # Removed setup_compel - using native SDXL encoding instead
330
- # def setup_compel(pipe):
331
- # """Setup Compel for better SDXL prompt handling."""
332
- # print("Setting up Compel for enhanced prompt processing...")
333
- # try:
334
- # compel = Compel(
335
- # tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
336
- # text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
337
- # returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
338
- # requires_pooled=[False, True],
339
- # truncate_long_prompts=False # Important for SDXL compatibility
340
- # )
341
- # print(" [OK] Compel loaded successfully")
342
- # return compel, True
343
- # except Exception as e:
344
- # print(f" [WARNING] Compel not available: {e}")
345
- # return None, False
346
 
347
 
348
  def setup_scheduler(pipe):
 
15
  from insightface.app import FaceAnalysis
16
  from controlnet_aux import LeresDetector
17
  from huggingface_hub import hf_hub_download
18
+ from compel import Compel, ReturnedEmbeddingsType
19
 
20
  # Use reference implementation's attention processor
21
  from attention_processor import IPAttnProcessor2_0, AttnProcessor
 
326
  return None, False
327
 
328
 
329
+ def setup_compel(pipe):
330
+ """Setup Compel for SDXL prompt handling - based on working example."""
331
+ print("Setting up Compel for enhanced prompt processing...")
332
+ try:
333
+ compel = Compel(
334
+ tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
335
+ text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
336
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
337
+ requires_pooled=[False, True]
338
+ )
339
+ print(" [OK] Compel loaded successfully")
340
+ return compel, True
341
+ except Exception as e:
342
+ print(f" [WARNING] Compel not available: {e}")
343
+ return None, False
 
 
344
 
345
 
346
  def setup_scheduler(pipe):