Spaces:

primerz
/

face-to-pixel-art

Running on Zero

App Files Files Community

primerz commited on Nov 13

Commit

eb833d8

verified ·

1 Parent(s): e456dc4

Update utils.py

Browse files

Files changed (1) hide show

utils.py +14 -21

utils.py CHANGED Viewed

@@ -1,12 +1,15 @@
-import cv2
-import numpy as np
 from PIL import Image
 from transformers import BlipProcessor, BlipForConditionalGeneration
 import torch
 from config import Config
 def resize_image_to_1mp(image):
     """Resizes image to approx 1MP (e.g., 1024x1024) preserving aspect ratio."""
     w, h = image.size
     target_pixels = 1024 * 1024
     aspect_ratio = w / h
@@ -15,35 +18,25 @@ def resize_image_to_1mp(image):
     new_h = int((target_pixels / aspect_ratio) ** 0.5)
     new_w = int(new_h * aspect_ratio)
-    # Ensure divisibility by 8 (vae requirement), usually 32 for safety
     new_w = (new_w // 32) * 32
     new_h = (new_h // 32) * 32
     return image.resize((new_w, new_h), Image.LANCZOS)
-# Simple caching for captioner
-captioner_processor = None
-captioner_model = None
 def get_caption(image):
     global captioner_processor, captioner_model
     if captioner_model is None:
-        print("Loading Captioner...")
-        captioner_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-        captioner_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(Config.DEVICE)
     inputs = captioner_processor(image, return_tensors="pt").to(Config.DEVICE)
     out = captioner_model.generate(**inputs)
     caption = captioner_processor.decode(out[0], skip_special_tokens=True)
-    return caption
-def prepare_control_images(image, zoe_detector, lineart_detector):
-    """Generates the conditioning maps from the input image."""
-    # 1. Zoe Depth Map
-    depth_map = zoe_detector(image, detect_resolution=1024, image_resolution=1024)
-    # 2. LineArt Map
-    lineart_map = lineart_detector(image, detect_resolution=1024, image_resolution=1024)
-    return depth_map, lineart_map

 from PIL import Image
 from transformers import BlipProcessor, BlipForConditionalGeneration
 import torch
 from config import Config
+# Simple global caching for the captioner
+captioner_processor = None
+captioner_model = None
 def resize_image_to_1mp(image):
     """Resizes image to approx 1MP (e.g., 1024x1024) preserving aspect ratio."""
+    image = image.convert("RGB")
     w, h = image.size
     target_pixels = 1024 * 1024
     aspect_ratio = w / h
     new_h = int((target_pixels / aspect_ratio) ** 0.5)
     new_w = int(new_h * aspect_ratio)
+    # Ensure divisibility by 32 for efficiency
     new_w = (new_w // 32) * 32
     new_h = (new_h // 32) * 32
+    if new_w == 0 or new_h == 0:
+        new_w, new_h = 1024, 1024 # Fallback
     return image.resize((new_w, new_h), Image.LANCZOS)
 def get_caption(image):
+    """Generates a caption for the image if one isn't provided."""
     global captioner_processor, captioner_model
     if captioner_model is None:
+        print("Loading Captioner (BLIP)...")
+        captioner_processor = BlipProcessor.from_pretrained(Config.CAPTIONER_REPO)
+        captioner_model = BlipForConditionalGeneration.from_pretrained(Config.CAPTIONER_REPO).to(Config.DEVICE)
     inputs = captioner_processor(image, return_tensors="pt").to(Config.DEVICE)
     out = captioner_model.generate(**inputs)
     caption = captioner_processor.decode(out[0], skip_special_tokens=True)
+    return caption