face-to-pixel-art / utils.py
primerz's picture
Update utils.py
ed5ed53 verified
raw
history blame
2.67 kB
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch
from config import Config
import cv2
import numpy as np
import math
# Simple global caching for the captioner
captioner_processor = None
captioner_model = None
def resize_image_to_1mp(image):
"""Resizes image to approx 1MP (e.g., 768x768) preserving aspect ratio."""
image = image.convert("RGB")
w, h = image.size
target_pixels = 768 * 768
aspect_ratio = w / h
# Calculate new dimensions
new_h = int((target_pixels / aspect_ratio) ** 0.5)
new_w = int(new_h * aspect_ratio)
# Ensure divisibility by 48 for efficiency
new_w = (new_w // 64) * 64
new_h = (new_h // 64) * 64
if new_w == 0 or new_h == 0:
new_w, new_h = 768 * 768 # Fallback
return image.resize((new_w, new_h), Image.LANCZOS)
def get_caption(image):
"""Generates a caption for the image if one isn't provided."""
global captioner_processor, captioner_model
if captioner_model is None:
print("Loading Captioner (BLIP)...")
captioner_processor = BlipProcessor.from_pretrained(Config.CAPTIONER_REPO)
captioner_model = BlipForConditionalGeneration.from_pretrained(Config.CAPTIONER_REPO).to(Config.DEVICE)
inputs = captioner_processor(image, return_tensors="pt").to(Config.DEVICE)
out = captioner_model.generate(**inputs)
caption = captioner_processor.decode(out[0], skip_special_tokens=True)
return caption
# --- ADDED: Function from your provided file ---
def draw_kps(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]):
stickwidth = 4
limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
kps = np.array(kps)
w, h = image_pil.size
out_img = np.zeros([h, w, 3])
for i in range(len(limbSeq)):
index = limbSeq[i]
color = color_list[index[0]]
x = kps[index][:, 0]
y = kps[index][:, 1]
length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
polygon = cv2.ellipse2Poly(
(int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1
)
out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
out_img = (out_img * 0.6).astype(np.uint8)
for idx_kp, kp in enumerate(kps):
color = color_list[idx_kp]
x, y = kp
out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)
out_img_pil = Image.fromarray(out_img.astype(np.uint8))
return out_img_pil
# --- END ADDED ---