pixagram-dev

Runtime error

App Files Files Community

primerz commited on Oct 30

Commit

8f934d6

verified ·

1 Parent(s): 69e6233

Upload 2 files

Browse files

Files changed (2) hide show

generator.py +54 -103
utils.py +9 -9

generator.py CHANGED Viewed

@@ -145,48 +145,49 @@ class RetroArtConverter:
             print(f"[INFO] Verification skipped: {e}")
         print("============================\n")
-    def get_depth_map(self, image):
-        """Generate depth map using Zoe Depth"""
-        if self.zoe_depth is not None:
-            try:
-                if image.mode != 'RGB':
-                    image = image.convert('RGB')
-                orig_width, orig_height = image.size
-                orig_width = int(orig_width)
-                orig_height = int(orig_height)
-                # FIXED: Use multiples of 64 (not 32)
-                target_width = int((orig_width // 64) * 64)
-                target_height = int((orig_height // 64) * 64)
-                target_width = int(max(64, target_width))
-                target_height = int(max(64, target_height))
-                if target_width != orig_width or target_height != orig_height:
-                    image = image.resize((int(target_width), int(target_height)), Image.LANCZOS)
-                    print(f"[DEPTH] Resized for ZoeDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
-                # FIXED: Add torch.no_grad() wrapper
-                with torch.no_grad():
-                    depth_image = self.zoe_depth(image)
-                depth_width, depth_height = depth_image.size
-                if depth_width != orig_width or depth_height != orig_height:
-                    depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
-                print(f"[DEPTH] Zoe depth map generated: {orig_width}x{orig_height}")
-                return depth_image
-            except Exception as e:
-                print(f"[DEPTH] ZoeDetector failed ({e}), falling back to grayscale depth")
                 gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                 depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
-                return Image.fromarray(depth_colored)
-        else:
-            gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
-            depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
-            return Image.fromarray(depth_colored)
     def add_trigger_word(self, prompt):
@@ -570,76 +571,26 @@ class RetroArtConverter:
         pipe_kwargs["generator"] = generator
         if self.use_compel and self.compel is not None:
             try:
                 print("Encoding prompts with Compel...")
-                try:
-                    # Tuple unpacking: (prompt_embeds, pooled_prompt_embeds)
-                    conditioning = self.compel(prompt)
-                    prompt_embeds, pooled_prompt_embeds = conditioning
-                    # Handle negative prompt conditionally
-                    if negative_prompt and negative_prompt.strip():
-                        negative_conditioning = self.compel(negative_prompt)
-                        negative_prompt_embeds, negative_pooled_prompt_embeds = negative_conditioning
-                    else:
-                        # Use zeros for negative
-                        negative_prompt_embeds = torch.zeros_like(prompt_embeds)
-                        negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
-                except RuntimeError as e:
-                    error_msg = str(e)
-                    if ("size of tensor" in error_msg and "must match" in error_msg) or "dimension" in error_msg:
-                        print(f"[COMPEL] Token length mismatch detected: {e}")
-                        print(f"[COMPEL] Falling back to standard prompt encoding")
-                        raise
-                    else:
-                        raise
-                # Handle token length mismatch by padding/truncating to 77 tokens
-                target_length = 77
-                if prompt_embeds.shape[1] != target_length or negative_prompt_embeds.shape[1] != target_length:
-                    print(f"[COMPEL] Adjusting token lengths: pos={prompt_embeds.shape[1]}, neg={negative_prompt_embeds.shape[1]} -> {target_length}")
-                    # Truncate or pad positive embeddings
-                    if prompt_embeds.shape[1] > target_length:
-                        prompt_embeds = prompt_embeds[:, :target_length, :]
-                    elif prompt_embeds.shape[1] < target_length:
-                        padding = torch.zeros(
-                            prompt_embeds.shape[0],
-                            target_length - prompt_embeds.shape[1],
-                            prompt_embeds.shape[2],
-                            dtype=prompt_embeds.dtype,
-                            device=prompt_embeds.device
-                        )
-                        prompt_embeds = torch.cat([prompt_embeds, padding], dim=1)
-                    # Truncate or pad negative embeddings
-                    if negative_prompt_embeds.shape[1] > target_length:
-                        negative_prompt_embeds = negative_prompt_embeds[:, :target_length, :]
-                    elif negative_prompt_embeds.shape[1] < target_length:
-                        padding = torch.zeros(
-                            negative_prompt_embeds.shape[0],
-                            target_length - negative_prompt_embeds.shape[1],
-                            negative_prompt_embeds.shape[2],
-                            dtype=negative_prompt_embeds.dtype,
-                            device=negative_prompt_embeds.device
-                        )
-                        negative_prompt_embeds = torch.cat([negative_prompt_embeds, padding], dim=1)
-                pipe_kwargs["prompt_embeds"] = prompt_embeds
-                pipe_kwargs["pooled_prompt_embeds"] = pooled_prompt_embeds
-                pipe_kwargs["negative_prompt_embeds"] = negative_prompt_embeds
-                pipe_kwargs["negative_pooled_prompt_embeds"] = negative_pooled_prompt_embeds
-                compel_success = True
                 print("[OK] Using Compel-encoded prompts")
             except Exception as e:
-                print(f"[COMPEL] Encoding failed: {e}")
-                print(f"[COMPEL] Using standard prompt encoding instead")
-                compel_success = False
         # Add CLIP skip
         if hasattr(self.pipe, 'text_encoder'):

             print(f"[INFO] Verification skipped: {e}")
         print("============================\n")
+        def get_depth_map(self, image):
+            """Generate depth map using Zoe Depth"""
+            if self.zoe_depth is not None:
+                try:
+                    if image.mode != 'RGB':
+                        image = image.convert('RGB')
+                    orig_width, orig_height = image.size
+                    orig_width = int(orig_width)
+                    orig_height = int(orig_height)
+                    # FIXED: Use multiples of 64 (not 32)
+                    target_width = int((orig_width // 64) * 64)
+                    target_height = int((orig_height // 64) * 64)
+                    target_width = int(max(64, target_width))
+                    target_height = int(max(64, target_height))
+                    if target_width != orig_width or target_height != orig_height:
+                        image = image.resize((int(target_width), int(target_height)), Image.LANCZOS)
+                        print(f"[DEPTH] Resized for ZoeDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
+                    # FIXED: Add torch.no_grad() wrapper
+                    with torch.no_grad():
+                        depth_image = self.zoe_depth(image)
+                    depth_width, depth_height = depth_image.size
+                    if depth_width != orig_width or depth_height != orig_height:
+                        depth_image = depth_image.resize((int(orig_width), int(orig_height)), Image.LANCZOS)
+                    print(f"[DEPTH] Zoe depth map generated: {orig_width}x{orig_height}")
+                    return depth_image
+                except Exception as e:
+                    print(f"[DEPTH] ZoeDetector failed ({e}), falling back to grayscale depth")
+                    gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
+                    depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
+                    return Image.fromarray(depth_colored)
+            else:
                 gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
                 depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
+                return Image.fromarray(depth_colored)
     def add_trigger_word(self, prompt):
         pipe_kwargs["generator"] = generator
+        # Use Compel for prompt encoding if available
         if self.use_compel and self.compel is not None:
             try:
                 print("Encoding prompts with Compel...")
+                conditioning = self.compel(prompt)
+                negative_conditioning = self.compel(negative_prompt)
+                pipe_kwargs["prompt_embeds"] = conditioning[0]
+                pipe_kwargs["pooled_prompt_embeds"] = conditioning[1]
+                pipe_kwargs["negative_prompt_embeds"] = negative_conditioning[0]
+                pipe_kwargs["negative_pooled_prompt_embeds"] = negative_conditioning[1]
                 print("[OK] Using Compel-encoded prompts")
             except Exception as e:
+                print(f"Compel encoding failed, using standard prompts: {e}")
+                pipe_kwargs["prompt"] = prompt
+                pipe_kwargs["negative_prompt"] = negative_prompt
+        else:
+            pipe_kwargs["prompt"] = prompt
+            pipe_kwargs["negative_prompt"] = negative_prompt
         # Add CLIP skip
         if hasattr(self.pipe, 'text_encoder'):

utils.py CHANGED Viewed

@@ -395,10 +395,10 @@ def get_demographic_description(age, gender_code):
 def calculate_optimal_size(original_width, original_height, recommended_sizes=None, max_dimension=1536):
     """
-    Calculate optimal size maintaining aspect ratio with dimensions as multiples of 8.
     This updated version supports ANY aspect ratio (not just predefined ones),
-    while ensuring dimensions are multiples of 8 and keeping total pixels reasonable.
     Args:
         original_width: Original image width
@@ -407,7 +407,7 @@ def calculate_optimal_size(original_width, original_height, recommended_sizes=No
         max_dimension: Maximum allowed dimension (default 1536)
     Returns:
-        Tuple of (optimal_width, optimal_height) as multiples of 8
     """
     aspect_ratio = original_width / original_height
@@ -423,7 +423,7 @@ def calculate_optimal_size(original_width, original_height, recommended_sizes=No
                 best_diff = diff
                 best_match = (width, height)
-        # Ensure dimensions are multiples of 8
         width, height = best_match
         width = int((width // 64) * 64)
         height = int((height // 64) * 64)
@@ -431,7 +431,7 @@ def calculate_optimal_size(original_width, original_height, recommended_sizes=No
         return width, height
     # NEW: Support any aspect ratio
-    # Strategy: Keep aspect ratio, scale to reasonable total pixels, round to multiples of 8
     # Target total pixels (around 1 megapixel for SDXL, adjustable)
     target_pixels = 1024 * 1024  # ~1MP, good balance for SDXL
@@ -455,7 +455,7 @@ def calculate_optimal_size(original_width, original_height, recommended_sizes=No
         optimal_height = max_dimension
         optimal_width = optimal_height * aspect_ratio
-    # Round to nearest multiple of 8
     width = int(round(optimal_width / 64) * 64)
     height = int(round(optimal_height / 64) * 64)
@@ -469,9 +469,9 @@ def calculate_optimal_size(original_width, original_height, recommended_sizes=No
             height = min_dimension
             width = int(round((height * aspect_ratio) / 64) * 64)
-    # Final safety check: ensure multiples of 8
-    width = max(8, int((width // 64) * 64))
-    height = max(8, int((height // 64) * 64))
     print(f"[SIZING] Aspect ratio: {aspect_ratio:.3f}, Output: {width}x{height} ({width*height/1e6:.2f}MP)")

 def calculate_optimal_size(original_width, original_height, recommended_sizes=None, max_dimension=1536):
     """
+    Calculate optimal size maintaining aspect ratio with dimensions as multiples of 64.
     This updated version supports ANY aspect ratio (not just predefined ones),
+    while ensuring dimensions are multiples of 64 and keeping total pixels reasonable.
     Args:
         original_width: Original image width
         max_dimension: Maximum allowed dimension (default 1536)
     Returns:
+        Tuple of (optimal_width, optimal_height) as multiples of 64
     """
     aspect_ratio = original_width / original_height
                 best_diff = diff
                 best_match = (width, height)
+        # Ensure dimensions are multiples of 64
         width, height = best_match
         width = int((width // 64) * 64)
         height = int((height // 64) * 64)
         return width, height
     # NEW: Support any aspect ratio
+    # Strategy: Keep aspect ratio, scale to reasonable total pixels, round to multiples of 64
     # Target total pixels (around 1 megapixel for SDXL, adjustable)
     target_pixels = 1024 * 1024  # ~1MP, good balance for SDXL
         optimal_height = max_dimension
         optimal_width = optimal_height * aspect_ratio
+    # Round to nearest multiple of 64
     width = int(round(optimal_width / 64) * 64)
     height = int(round(optimal_height / 64) * 64)
             height = min_dimension
             width = int(round((height * aspect_ratio) / 64) * 64)
+    # Final safety check: ensure multiples of 64
+    width = max(64, int((width // 64) * 64))
+    height = max(64, int((height // 64) * 64))
     print(f"[SIZING] Aspect ratio: {aspect_ratio:.3f}, Output: {width}x{height} ({width*height/1e6:.2f}MP)")