|
|
from PIL import Image |
|
|
from transformers import BlipProcessor, BlipForConditionalGeneration |
|
|
import torch |
|
|
from config import Config |
|
|
import cv2 |
|
|
import numpy as np |
|
|
import math |
|
|
|
|
|
|
|
|
captioner_processor = None |
|
|
captioner_model = None |
|
|
|
|
|
def resize_image_to_1mp(image): |
|
|
"""Resizes image to approx 1MP (e.g., 1024x1024) preserving aspect ratio.""" |
|
|
image = image.convert("RGB") |
|
|
w, h = image.size |
|
|
target_pixels = 1024 * 1024 |
|
|
aspect_ratio = w / h |
|
|
|
|
|
|
|
|
new_h = int((target_pixels / aspect_ratio) ** 0.5) |
|
|
new_w = int(new_h * aspect_ratio) |
|
|
|
|
|
|
|
|
new_w = (new_w // 48) * 48 |
|
|
new_h = (new_h // 48) * 48 |
|
|
|
|
|
if new_w == 0 or new_h == 0: |
|
|
new_w, new_h = 1024, 1024 |
|
|
|
|
|
return image.resize((new_w, new_h), Image.LANCZOS) |
|
|
|
|
|
def get_caption(image): |
|
|
"""Generates a caption for the image if one isn't provided.""" |
|
|
global captioner_processor, captioner_model |
|
|
|
|
|
if captioner_model is None: |
|
|
print("Loading Captioner (BLIP)...") |
|
|
captioner_processor = BlipProcessor.from_pretrained(Config.CAPTIONER_REPO) |
|
|
captioner_model = BlipForConditionalGeneration.from_pretrained(Config.CAPTIONER_REPO).to(Config.DEVICE) |
|
|
|
|
|
inputs = captioner_processor(image, return_tensors="pt").to(Config.DEVICE) |
|
|
out = captioner_model.generate(**inputs) |
|
|
caption = captioner_processor.decode(out[0], skip_special_tokens=True) |
|
|
return caption |
|
|
|
|
|
|
|
|
def draw_kps(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]): |
|
|
stickwidth = 4 |
|
|
limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]]) |
|
|
kps = np.array(kps) |
|
|
|
|
|
w, h = image_pil.size |
|
|
out_img = np.zeros([h, w, 3]) |
|
|
|
|
|
for i in range(len(limbSeq)): |
|
|
index = limbSeq[i] |
|
|
color = color_list[index[0]] |
|
|
|
|
|
x = kps[index][:, 0] |
|
|
y = kps[index][:, 1] |
|
|
length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5 |
|
|
angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1])) |
|
|
polygon = cv2.ellipse2Poly( |
|
|
(int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1 |
|
|
) |
|
|
out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color) |
|
|
out_img = (out_img * 0.6).astype(np.uint8) |
|
|
|
|
|
for idx_kp, kp in enumerate(kps): |
|
|
color = color_list[idx_kp] |
|
|
x, y = kp |
|
|
out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1) |
|
|
|
|
|
out_img_pil = Image.fromarray(out_img.astype(np.uint8)) |
|
|
return out_img_pil |
|
|
|