primerz commited on
Commit
7c0c2dc
·
verified ·
1 Parent(s): d48111e

Upload 12 files

Browse files
Files changed (6) hide show
  1. app.py +3 -3
  2. config.py +5 -1
  3. generator.py +72 -40
  4. gitattributes (1) +35 -0
  5. models.py +40 -13
  6. utils.py +39 -20
app.py CHANGED
@@ -106,7 +106,7 @@ def get_model_status():
106
  status_text += f"- Custom Checkpoint (Horizon): {'[OK] Loaded' if converter.models_loaded['custom_checkpoint'] else '[OK] Using SDXL base'}\n"
107
  status_text += f"- LORA (RetroArt): {'[OK] Loaded' if converter.models_loaded['lora'] else ' Disabled'}\n"
108
  status_text += f"- InstantID: {'[OK] Loaded' if converter.models_loaded['instantid'] else ' Disabled'}\n"
109
- status_text += f"- Zoe Depth: {'[OK] Loaded' if converter.models_loaded['zoe_depth'] else ' Fallback'}\n"
110
  status_text += f"- IP-Adapter (Face Embeddings): {'[OK] Loaded' if converter.models_loaded.get('ip_adapter', False) else ' Keypoints only'}\n"
111
  return status_text
112
  return "**Model status unavailable**"
@@ -351,7 +351,7 @@ with gr.Blocks(title="Pixagram - AI Pixel Art Generator", theme=gr.themes.Soft()
351
  **[ADAPTIVE] Automatic Adjustments:**
352
  - Small faces (< 50K px): Boosts identity preservation to 1.8
353
  - Low confidence (< 80%): Increases identity control to 0.9
354
- - Profile views (> 20° yaw): Enhances preservation to 1.7
355
  - Good quality faces: Uses your selected parameters
356
 
357
  **[PARAMETERS] Parameter Relationships:**
@@ -452,4 +452,4 @@ if __name__ == "__main__":
452
  server_port=7860,
453
  share=True,
454
  show_api=True
455
- )
 
106
  status_text += f"- Custom Checkpoint (Horizon): {'[OK] Loaded' if converter.models_loaded['custom_checkpoint'] else '[OK] Using SDXL base'}\n"
107
  status_text += f"- LORA (RetroArt): {'[OK] Loaded' if converter.models_loaded['lora'] else ' Disabled'}\n"
108
  status_text += f"- InstantID: {'[OK] Loaded' if converter.models_loaded['instantid'] else ' Disabled'}\n"
109
+ status_text += f"- Midas Depth: {'[OK] Loaded' if converter.models_loaded['midas_depth'] else ' Fallback'}\n"
110
  status_text += f"- IP-Adapter (Face Embeddings): {'[OK] Loaded' if converter.models_loaded.get('ip_adapter', False) else ' Keypoints only'}\n"
111
  return status_text
112
  return "**Model status unavailable**"
 
351
  **[ADAPTIVE] Automatic Adjustments:**
352
  - Small faces (< 50K px): Boosts identity preservation to 1.8
353
  - Low confidence (< 80%): Increases identity control to 0.9
354
+ - Profile views (> 20° yaw): Enhances preservation to 1.7
355
  - Good quality faces: Uses your selected parameters
356
 
357
  **[PARAMETERS] Parameter Relationships:**
 
452
  server_port=7860,
453
  share=True,
454
  show_api=True
455
+ )
config.py CHANGED
@@ -29,7 +29,11 @@ FACE_DETECTION_CONFIG = {
29
  "ctx_id": 0
30
  }
31
 
32
- # Recommended resolutions
 
 
 
 
33
  RECOMMENDED_SIZES = [
34
  (896, 1152), # Portrait
35
  (1152, 896), # Landscape
 
29
  "ctx_id": 0
30
  }
31
 
32
+ # Depth detection configuration
33
+ DEPTH_DETECTION_CONFIG = {
34
+ "model_name": "leres++", # LeRes++ provides superior depth accuracy
35
+ "method": "leres"
36
+ }
37
  RECOMMENDED_SIZES = [
38
  (896, 1152), # Portrait
39
  (1152, 896), # Landscape
generator.py CHANGED
@@ -33,16 +33,16 @@ class RetroArtConverter:
33
  'custom_checkpoint': False,
34
  'lora': False,
35
  'instantid': False,
36
- 'zoe_depth': False,
37
  'ip_adapter': False
38
  }
39
 
40
  # Initialize face analysis
41
  self.face_app, self.face_detection_enabled = load_face_analysis()
42
 
43
- # Load Zoe Depth detector
44
- self.zoe_depth, zoe_success = load_depth_detector()
45
- self.models_loaded['zoe_depth'] = zoe_success
46
 
47
  # Load ControlNets
48
  controlnet_depth, self.controlnet_instantid, instantid_success = load_controlnets()
@@ -146,34 +146,54 @@ class RetroArtConverter:
146
  print("============================\n")
147
 
148
  def get_depth_map(self, image):
149
- """Generate depth map using Zoe Depth"""
150
- if self.zoe_depth is not None:
151
- try:
152
- # Ensure clean PIL Image
153
- if image.mode != 'RGB':
154
- image = image.convert('RGB')
155
-
156
- # Get dimensions and ensure they're Python ints
157
- width, height = image.size
158
- width, height = int(width), int(height)
159
-
160
- # Create a fresh image to avoid numpy type issues
161
- image_array = np.array(image)
162
- clean_image = Image.fromarray(image_array.astype(np.uint8))
163
-
164
- # Use Zoe detector
165
- depth_image = self.zoe_depth(clean_image)
166
- return depth_image
167
- except Exception as e:
168
- print(f"Warning: ZoeDetector failed ({e}), falling back to grayscale depth")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
170
  depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
171
- return Image.fromarray(depth_colored)
172
- else:
173
- # Fallback to simple grayscale
174
- gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
175
- depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
176
- return Image.fromarray(depth_colored)
177
 
178
  def add_trigger_word(self, prompt):
179
  """Add trigger word to prompt if not present"""
@@ -447,7 +467,7 @@ class RetroArtConverter:
447
  resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
448
 
449
  # Generate depth map
450
- print("Generating Zoe depth map...")
451
  depth_image = self.get_depth_map(resized_image)
452
  if depth_image.size != (target_width, target_height):
453
  depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
@@ -463,7 +483,11 @@ class RetroArtConverter:
463
  if using_multiple_controlnets and self.face_app is not None:
464
  print("Detecting faces and extracting keypoints...")
465
  img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
466
- faces = self.face_app.get(img_array)
 
 
 
 
467
 
468
  if len(faces) > 0:
469
  has_detected_faces = True
@@ -531,7 +555,8 @@ class RetroArtConverter:
531
  # Set LORA scale
532
  if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
533
  try:
534
- self.pipe.set_adapters(["retroart"], adapter_weights=[lora_scale])
 
535
  print(f"LORA scale: {lora_scale}")
536
  except Exception as e:
537
  print(f"Could not set LORA scale: {e}")
@@ -563,14 +588,21 @@ class RetroArtConverter:
563
  conditioning = self.compel(prompt)
564
  negative_conditioning = self.compel(negative_prompt)
565
 
566
- pipe_kwargs["prompt_embeds"] = conditioning[0]
567
- pipe_kwargs["pooled_prompt_embeds"] = conditioning[1]
568
- pipe_kwargs["negative_prompt_embeds"] = negative_conditioning[0]
569
- pipe_kwargs["negative_pooled_prompt_embeds"] = negative_conditioning[1]
 
 
 
 
 
 
 
570
 
571
  print("[OK] Using Compel-encoded prompts")
572
  except Exception as e:
573
- print(f"Compel encoding failed, using standard prompts: {e}")
574
  pipe_kwargs["prompt"] = prompt
575
  pipe_kwargs["negative_prompt"] = negative_prompt
576
  else:
@@ -604,7 +636,7 @@ class RetroArtConverter:
604
  # Reshape for Resampler: [1, 1, 512]
605
  face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
606
 
607
- # Pass through Resampler: [1, 1, 512] [1, 16, 2048]
608
  face_proj_embeds = self.image_proj_model(face_emb_tensor)
609
 
610
  # Scale with identity preservation
@@ -692,4 +724,4 @@ class RetroArtConverter:
692
  return generated_image
693
 
694
 
695
- print("[OK] Generator class ready")
 
33
  'custom_checkpoint': False,
34
  'lora': False,
35
  'instantid': False,
36
+ 'midas_depth': False,
37
  'ip_adapter': False
38
  }
39
 
40
  # Initialize face analysis
41
  self.face_app, self.face_detection_enabled = load_face_analysis()
42
 
43
+ # Load Midas Depth detector
44
+ self.midas_depth, midas_success = load_depth_detector()
45
+ self.models_loaded['midas_depth'] = midas_success
46
 
47
  # Load ControlNets
48
  controlnet_depth, self.controlnet_instantid, instantid_success = load_controlnets()
 
146
  print("============================\n")
147
 
148
  def get_depth_map(self, image):
149
+ """Generate depth map using Midas Depth"""
150
+ if self.midas_depth is not None:
151
+ try:
152
+ if image.mode != 'RGB':
153
+ image = image.convert('RGB')
154
+
155
+ orig_width, orig_height = image.size
156
+ orig_width = int(orig_width)
157
+ orig_height = int(orig_height)
158
+
159
+ # FIXED: Use multiples of 64 (not 32)
160
+ target_width = int((orig_width // 64) * 64)
161
+ target_height = int((orig_height // 64) * 64)
162
+
163
+ target_width = int(max(64, target_width))
164
+ target_height = int(max(64, target_height))
165
+
166
+ if target_width != orig_width or target_height != orig_height:
167
+ image = image.resize((int(target_width), int(target_height)), Image.LANCZOS)
168
+ print(f"[DEPTH] Resized for MidasDetector: {orig_width}x{orig_height} -> {target_width}x{target_height}")
169
+
170
+ # FIXED: Add torch.no_grad() wrapper
171
+ with torch.no_grad():
172
+ depth_image = self.midas_depth(image)
173
+
174
+ depth_width, depth_height = depth_image.size
175
+ # Convert numpy int64 to Python int to avoid PIL errors
176
+ depth_width = int(depth_width)
177
+ depth_height = int(depth_height)
178
+ orig_width_int = int(orig_width)
179
+ orig_height_int = int(orig_height)
180
+
181
+ if depth_width != orig_width_int or depth_height != orig_height_int:
182
+ depth_image = depth_image.resize((orig_width_int, orig_height_int), Image.LANCZOS)
183
+
184
+ print(f"[DEPTH] Midas depth map generated: {orig_width}x{orig_height}")
185
+ return depth_image
186
+
187
+ except Exception as e:
188
+ print(f"[DEPTH] MidasDetector failed ({e}), falling back to grayscale depth")
189
+ gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
190
+ depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
191
+ return Image.fromarray(depth_colored)
192
+ else:
193
  gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
194
  depth_colored = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
195
+ return Image.fromarray(depth_colored)
196
+
 
 
 
 
197
 
198
  def add_trigger_word(self, prompt):
199
  """Add trigger word to prompt if not present"""
 
467
  resized_image = input_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
468
 
469
  # Generate depth map
470
+ print("Generating Midas depth map...")
471
  depth_image = self.get_depth_map(resized_image)
472
  if depth_image.size != (target_width, target_height):
473
  depth_image = depth_image.resize((int(target_width), int(target_height)), Image.LANCZOS)
 
483
  if using_multiple_controlnets and self.face_app is not None:
484
  print("Detecting faces and extracting keypoints...")
485
  img_array = cv2.cvtColor(np.array(resized_image), cv2.COLOR_RGB2BGR)
486
+ try:
487
+ faces = self.face_app.get(img_array)
488
+ except Exception as e:
489
+ print(f"[WARNING] Face detection failed: {e}")
490
+ faces = []
491
 
492
  if len(faces) > 0:
493
  has_detected_faces = True
 
555
  # Set LORA scale
556
  if hasattr(self.pipe, 'set_adapters') and self.models_loaded['lora']:
557
  try:
558
+ # Use correct adapter name - peft uses 'default_0' for single adapters
559
+ self.pipe.set_adapters(["default_0"], adapter_weights=[lora_scale])
560
  print(f"LORA scale: {lora_scale}")
561
  except Exception as e:
562
  print(f"Could not set LORA scale: {e}")
 
588
  conditioning = self.compel(prompt)
589
  negative_conditioning = self.compel(negative_prompt)
590
 
591
+ # Handle potential token length mismatches
592
+ prompt_embeds_0 = conditioning[0]
593
+ prompt_embeds_1 = conditioning[1]
594
+ neg_embeds_0 = negative_conditioning[0]
595
+ neg_embeds_1 = negative_conditioning[1]
596
+
597
+ # Ensure consistent shapes if needed
598
+ pipe_kwargs["prompt_embeds"] = prompt_embeds_0
599
+ pipe_kwargs["pooled_prompt_embeds"] = prompt_embeds_1
600
+ pipe_kwargs["negative_prompt_embeds"] = neg_embeds_0
601
+ pipe_kwargs["negative_pooled_prompt_embeds"] = neg_embeds_1
602
 
603
  print("[OK] Using Compel-encoded prompts")
604
  except Exception as e:
605
+ print(f"Compel encoding failed ({e}), falling back to standard prompts")
606
  pipe_kwargs["prompt"] = prompt
607
  pipe_kwargs["negative_prompt"] = negative_prompt
608
  else:
 
636
  # Reshape for Resampler: [1, 1, 512]
637
  face_emb_tensor = face_emb_tensor.reshape(1, -1, 512)
638
 
639
+ # Pass through Resampler: [1, 1, 512] → [1, 16, 2048]
640
  face_proj_embeds = self.image_proj_model(face_emb_tensor)
641
 
642
  # Scale with identity preservation
 
724
  return generated_image
725
 
726
 
727
+ print("[OK] Generator class ready")
gitattributes (1) ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
models.py CHANGED
@@ -13,7 +13,7 @@ from diffusers import (
13
  from diffusers.models.attention_processor import AttnProcessor2_0
14
  from transformers import CLIPVisionModelWithProjection
15
  from insightface.app import FaceAnalysis
16
- from controlnet_aux import ZoeDetector
17
  from huggingface_hub import hf_hub_download
18
  from compel import Compel, ReturnedEmbeddingsType
19
 
@@ -82,16 +82,25 @@ def load_face_analysis():
82
 
83
 
84
  def load_depth_detector():
85
- """Load Zoe Depth detector."""
86
- print("Loading Zoe Depth detector...")
87
  try:
88
- zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
89
- zoe_depth.to(device)
90
- print(" [OK] Zoe Depth loaded successfully")
91
- return zoe_depth, True
 
92
  except Exception as e:
93
- print(f" [WARNING] Zoe Depth not available: {e}")
94
- return None, False
 
 
 
 
 
 
 
 
95
 
96
 
97
  def load_controlnets():
@@ -276,7 +285,7 @@ def setup_ip_adapter(pipe, image_encoder):
276
 
277
  print(" [OK] IP-Adapter fully loaded with InstantID architecture")
278
  print(f" - Resampler: 4 layers, 20 heads, 16 output tokens")
279
- print(f" - Face embeddings: 512D → 16x2048D")
280
 
281
  return image_proj_model, True
282
 
@@ -288,19 +297,37 @@ def setup_ip_adapter(pipe, image_encoder):
288
 
289
 
290
  def setup_compel(pipe):
291
- """Setup Compel for better SDXL prompt handling."""
292
  print("Setting up Compel for enhanced prompt processing...")
293
  try:
 
294
  compel = Compel(
295
  tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
296
  text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
297
  returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
298
- requires_pooled=[False, True]
 
299
  )
300
- print(" [OK] Compel loaded successfully")
301
  return compel, True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
  except Exception as e:
303
  print(f" [WARNING] Compel not available: {e}")
 
304
  return None, False
305
 
306
 
 
13
  from diffusers.models.attention_processor import AttnProcessor2_0
14
  from transformers import CLIPVisionModelWithProjection
15
  from insightface.app import FaceAnalysis
16
+ from controlnet_aux import MidasDetector, LeresDetector
17
  from huggingface_hub import hf_hub_download
18
  from compel import Compel, ReturnedEmbeddingsType
19
 
 
82
 
83
 
84
  def load_depth_detector():
85
+ """Load LeRes++ Depth detector (superior to Midas/Zoe for detailed depth estimation)."""
86
+ print("Loading LeRes++ Depth detector...")
87
  try:
88
+ from controlnet_aux import LeresDetector
89
+ leres_depth = LeresDetector.from_pretrained("lllyasviel/Annotators")
90
+ leres_depth.to(device)
91
+ print(" [OK] LeRes++ Depth loaded successfully (+15-20% accuracy over Midas/Zoe)")
92
+ return leres_depth, True
93
  except Exception as e:
94
+ print(f" [WARNING] LeRes++ Depth not available: {e}")
95
+ print(" Attempting fallback to Midas Depth...")
96
+ try:
97
+ midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
98
+ midas_depth.to(device)
99
+ print(" [OK] Midas Depth loaded as fallback")
100
+ return midas_depth, True
101
+ except Exception as e2:
102
+ print(f" [ERROR] All depth detectors failed: {e2}")
103
+ return None, False
104
 
105
 
106
  def load_controlnets():
 
285
 
286
  print(" [OK] IP-Adapter fully loaded with InstantID architecture")
287
  print(f" - Resampler: 4 layers, 20 heads, 16 output tokens")
288
+ print(f" - Face embeddings: 512D → 16x2048D")
289
 
290
  return image_proj_model, True
291
 
 
297
 
298
 
299
  def setup_compel(pipe):
300
+ """Setup Compel for better SDXL prompt handling with robust error handling."""
301
  print("Setting up Compel for enhanced prompt processing...")
302
  try:
303
+ # FIXED: Handle SDXL dual tokenizer setup more carefully
304
  compel = Compel(
305
  tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
306
  text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
307
  returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
308
+ requires_pooled=[False, True],
309
+ padding_get_round_multiple=False # Disable padding that might cause mismatches
310
  )
311
+ print(" [OK] Compel loaded successfully with SDXL dual tokenizers")
312
  return compel, True
313
+ except TypeError:
314
+ # Fallback for older Compel versions without padding parameter
315
+ try:
316
+ compel = Compel(
317
+ tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
318
+ text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
319
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
320
+ requires_pooled=[False, True]
321
+ )
322
+ print(" [OK] Compel loaded (standard config)")
323
+ return compel, True
324
+ except Exception as e:
325
+ print(f" [WARNING] Compel not available: {e}")
326
+ print(" [INFO] Will use standard prompt encoding instead")
327
+ return None, False
328
  except Exception as e:
329
  print(f" [WARNING] Compel not available: {e}")
330
+ print(" [INFO] Will use standard prompt encoding instead")
331
  return None, False
332
 
333
 
utils.py CHANGED
@@ -300,11 +300,30 @@ def get_facial_attributes(face):
300
  confidence = float(emotion[emotion_idx])
301
 
302
  if confidence > 0.4: # Only add if confident
 
 
 
303
  if emotion_name == 'happiness':
304
- attributes['expression'] = 'smiling'
305
- attributes['description'].append('smiling')
306
- elif emotion_name not in ['neutral']:
307
- attributes['expression'] = emotion_name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  except (ValueError, TypeError, AttributeError, IndexError) as e:
309
  # Expression not available in this model
310
  pass
@@ -395,10 +414,10 @@ def get_demographic_description(age, gender_code):
395
 
396
  def calculate_optimal_size(original_width, original_height, recommended_sizes=None, max_dimension=1536):
397
  """
398
- Calculate optimal size maintaining aspect ratio with dimensions as multiples of 8.
399
 
400
  This updated version supports ANY aspect ratio (not just predefined ones),
401
- while ensuring dimensions are multiples of 8 and keeping total pixels reasonable.
402
 
403
  Args:
404
  original_width: Original image width
@@ -407,7 +426,7 @@ def calculate_optimal_size(original_width, original_height, recommended_sizes=No
407
  max_dimension: Maximum allowed dimension (default 1536)
408
 
409
  Returns:
410
- Tuple of (optimal_width, optimal_height) as multiples of 8
411
  """
412
  aspect_ratio = original_width / original_height
413
 
@@ -423,15 +442,15 @@ def calculate_optimal_size(original_width, original_height, recommended_sizes=No
423
  best_diff = diff
424
  best_match = (width, height)
425
 
426
- # Ensure dimensions are multiples of 8
427
  width, height = best_match
428
- width = int((width // 8) * 8)
429
- height = int((height // 8) * 8)
430
 
431
  return width, height
432
 
433
  # NEW: Support any aspect ratio
434
- # Strategy: Keep aspect ratio, scale to reasonable total pixels, round to multiples of 8
435
 
436
  # Target total pixels (around 1 megapixel for SDXL, adjustable)
437
  target_pixels = 1024 * 1024 # ~1MP, good balance for SDXL
@@ -455,23 +474,23 @@ def calculate_optimal_size(original_width, original_height, recommended_sizes=No
455
  optimal_height = max_dimension
456
  optimal_width = optimal_height * aspect_ratio
457
 
458
- # Round to nearest multiple of 8
459
- width = int(round(optimal_width / 8) * 8)
460
- height = int(round(optimal_height / 8) * 8)
461
 
462
  # Ensure minimum size (at least 512 on shortest side)
463
  min_dimension = 512
464
  if min(width, height) < min_dimension:
465
  if width < height:
466
  width = min_dimension
467
- height = int(round((width / aspect_ratio) / 8) * 8)
468
  else:
469
  height = min_dimension
470
- width = int(round((height * aspect_ratio) / 8) * 8)
471
 
472
- # Final safety check: ensure multiples of 8
473
- width = max(8, int((width // 8) * 8))
474
- height = max(8, int((height // 8) * 8))
475
 
476
  print(f"[SIZING] Aspect ratio: {aspect_ratio:.3f}, Output: {width}x{height} ({width*height/1e6:.2f}MP)")
477
 
@@ -506,4 +525,4 @@ def enhance_face_crop(face_crop):
506
  return face_crop_final
507
 
508
 
509
- print("[OK] Utilities loaded")
 
300
  confidence = float(emotion[emotion_idx])
301
 
302
  if confidence > 0.4: # Only add if confident
303
+
304
+ expression_desc = None
305
+
306
  if emotion_name == 'happiness':
307
+ expression_desc = 'smiling'
308
+ elif emotion_name == 'surprise':
309
+ expression_desc = 'surprised expression'
310
+ elif emotion_name == 'sadness':
311
+ expression_desc = 'sad expression'
312
+ elif emotion_name == 'anger':
313
+ expression_desc = 'angry expression'
314
+ elif emotion_name == 'neutral':
315
+ expression_desc = 'neutral expression'
316
+
317
+ # Add other emotions like 'disgust' or 'fear' if desired
318
+
319
+ if expression_desc:
320
+ attributes['expression'] = expression_desc
321
+
322
+ # Only add non-neutral expressions to the prompt description
323
+ if emotion_name != 'neutral':
324
+ if expression_desc not in attributes['description']:
325
+ attributes['description'].append(expression_desc)
326
+
327
  except (ValueError, TypeError, AttributeError, IndexError) as e:
328
  # Expression not available in this model
329
  pass
 
414
 
415
  def calculate_optimal_size(original_width, original_height, recommended_sizes=None, max_dimension=1536):
416
  """
417
+ Calculate optimal size maintaining aspect ratio with dimensions as multiples of 64.
418
 
419
  This updated version supports ANY aspect ratio (not just predefined ones),
420
+ while ensuring dimensions are multiples of 64 and keeping total pixels reasonable.
421
 
422
  Args:
423
  original_width: Original image width
 
426
  max_dimension: Maximum allowed dimension (default 1536)
427
 
428
  Returns:
429
+ Tuple of (optimal_width, optimal_height) as multiples of 64
430
  """
431
  aspect_ratio = original_width / original_height
432
 
 
442
  best_diff = diff
443
  best_match = (width, height)
444
 
445
+ # Ensure dimensions are multiples of 64
446
  width, height = best_match
447
+ width = int((width // 64) * 64)
448
+ height = int((height // 64) * 64)
449
 
450
  return width, height
451
 
452
  # NEW: Support any aspect ratio
453
+ # Strategy: Keep aspect ratio, scale to reasonable total pixels, round to multiples of 64
454
 
455
  # Target total pixels (around 1 megapixel for SDXL, adjustable)
456
  target_pixels = 1024 * 1024 # ~1MP, good balance for SDXL
 
474
  optimal_height = max_dimension
475
  optimal_width = optimal_height * aspect_ratio
476
 
477
+ # Round to nearest multiple of 64
478
+ width = int(round(optimal_width / 64) * 64)
479
+ height = int(round(optimal_height / 64) * 64)
480
 
481
  # Ensure minimum size (at least 512 on shortest side)
482
  min_dimension = 512
483
  if min(width, height) < min_dimension:
484
  if width < height:
485
  width = min_dimension
486
+ height = int(round((width / aspect_ratio) / 64) * 64)
487
  else:
488
  height = min_dimension
489
+ width = int(round((height * aspect_ratio) / 64) * 64)
490
 
491
+ # Final safety check: ensure multiples of 64
492
+ width = max(64, int((width // 64) * 64))
493
+ height = max(64, int((height // 64) * 64))
494
 
495
  print(f"[SIZING] Aspect ratio: {aspect_ratio:.3f}, Output: {width}x{height} ({width*height/1e6:.2f}MP)")
496
 
 
525
  return face_crop_final
526
 
527
 
528
+ print("[OK] Utilities loaded")