brandably / engine.py
Saad5151's picture
feat: add manual logo drag-to-reposition feature
247d630
import cv2
import numpy as np
import os
from ultralytics import SAM
# ── Model loading ──────────────────────────────────────────────────────────
# On HF Spaces, HOME is set to /tmp so ultralytics caches models there.
# MODEL_PATH env var allows overriding (e.g. a pre-downloaded weights file).
MODEL_PATH = os.environ.get("MODEL_PATH", "sam_b.pt")
print(f"Initializing SAM model from: {MODEL_PATH} ...")
try:
model = SAM(MODEL_PATH)
print("SAM model loaded successfully.")
except Exception as e:
print(f"CRITICAL WARNING: SAM model failed to load ({e}). Falling back to traditional methods.")
model = None
# ── Product detection ──────────────────────────────────────────────────────
def detect_product_center(image, prompt_point=None, alpha_mask=None):
"""
Detects the product region in the image using SAM ViT-B with point prompting.
Falls back to contour detection if SAM is unavailable.
:param image: BGR numpy array.
:param prompt_point: (x, y) relative coordinate (0.0–1.0).
:param alpha_mask: Optional [H, W] float mask from original transparency.
:returns: (position, mask) where position is relative (cx, cy).
"""
h_img, w_img = image.shape[:2]
# Transparency-Aware Snapping
if alpha_mask is not None:
valid_pixels = np.where(alpha_mask > 0.1)
if len(valid_pixels[0]) > 0:
cy_alpha = np.mean(valid_pixels[0])
cx_alpha = np.mean(valid_pixels[1])
opaque_centroid = (cx_alpha / w_img, cy_alpha / h_img)
if prompt_point is None:
prompt_point = opaque_centroid
else:
py_idx = min(h_img - 1, max(0, int(prompt_point[1] * h_img)))
px_idx = min(w_img - 1, max(0, int(prompt_point[0] * w_img)))
if alpha_mask[py_idx, px_idx] < 0.1:
prompt_point = opaque_centroid
if prompt_point is None:
px, py = w_img // 2, h_img // 2
else:
px, py = int(prompt_point[0] * w_img), int(prompt_point[1] * h_img)
# SAM Inference
if model:
print(f"Attempting Prompted SAM segmentation at point ({px}, {py})...")
try:
results = model.predict(
image,
points=[[px, py]],
labels=[1],
device="cpu",
imgsz=640,
conf=0.3,
verbose=False,
)
best_mask = None
max_score = 0
if results and results[0].masks is not None:
masks = results[0].masks.data
for i, mask_tensor in enumerate(masks):
mask_np = (mask_tensor.cpu().numpy() * 255).astype(np.uint8)
if mask_np.shape != (h_img, w_img):
mask_np = cv2.resize(mask_np, (w_img, h_img), interpolation=cv2.INTER_NEAREST)
area = float(np.sum(mask_np > 0))
if area > (h_img * w_img * 0.95) or area < 1000:
continue
M = cv2.moments(mask_np)
if M["m00"] == 0:
continue
cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]
score = area * (1.1 if i == 0 else 1.0)
if alpha_mask is not None:
mask_float = mask_np.astype(float) / 255.0
alignment = np.sum(mask_float * alpha_mask) / (np.sum(mask_float) + 1)
score *= 1.0 + alignment
if score > max_score:
max_score = score
best_mask = mask_np
if best_mask is not None:
M = cv2.moments(best_mask)
cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]
print(f"SAM success: isolated prompted object (Area: {max_score:.0f})")
return (cx / w_img, cy / h_img), best_mask
except Exception as e:
print(f"SAM Prompted Inference failed: {e}. Falling back to contours.")
# Fallback: Contour Detection
print("Running contour-based detection fallback...")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
_, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
edges = cv2.Canny(blurred, 20, 150)
combined = cv2.bitwise_or(thresh, edges)
contours, _ = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
best_cnt = None
max_c_score = 0
target_p = np.array([px, py])
for cnt in contours:
area = cv2.contourArea(cnt)
if area < 500:
continue
M = cv2.moments(cnt)
if M["m00"] == 0:
continue
cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]
dist = np.linalg.norm(np.array([cx, cy]) - target_p)
score = area * (1.0 - (dist / w_img))
if score > max_c_score:
max_c_score = score
best_cnt = cnt
if best_cnt is not None:
M = cv2.moments(best_cnt)
cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]
mock_mask = np.zeros((h_img, w_img), dtype=np.uint8)
cv2.drawContours(mock_mask, [best_cnt], -1, 255, -1)
return (cx / w_img, cy / h_img), mock_mask
return (0.5, 0.5), np.zeros((h_img, w_img), dtype=np.uint8)
# ── Logo placement ──────────────────────────────────────────────────────────
def process_logo_placement(
product_path,
logo_path,
scale_percent,
output_path,
position=None,
mask_path=None,
logo_width_mm=None,
logo_height_mm=None,
position_preset=None,
):
"""
Places a logo on a product image with realistic texture blending.
:param product_path: Path to the product image.
:param logo_path: Path to the logo image.
:param scale_percent: Base scale (0.0–1.0) if mm sizing not provided.
:param output_path: Path to save the composited result.
:param position: (x, y) relative position (0.0–1.0). Auto-detected if None.
:param mask_path: Optional path to save the AI debug mask.
:param logo_width_mm: Target logo width in mm.
:param logo_height_mm: Target logo height in mm.
:param position_preset: 'top', 'center', or 'bottom'.
"""
product_raw = cv2.imread(product_path, cv2.IMREAD_UNCHANGED)
logo = cv2.imread(logo_path, cv2.IMREAD_UNCHANGED)
if product_raw is None or logo is None:
raise ValueError("Could not load one or both images.")
h_prod, w_prod = product_raw.shape[:2]
logo_h_orig, logo_w_orig = logo.shape[:2]
logo_aspect = logo_w_orig / logo_h_orig
# Professional mm-based sizing (reference: 500 mm product width)
if logo_width_mm or logo_height_mm:
REF_W_MM = 500.0
target_w_scale = logo_width_mm / REF_W_MM if logo_width_mm else None
target_h_scale = (logo_height_mm * logo_aspect) / REF_W_MM if logo_height_mm else None
if target_w_scale and target_h_scale:
scale_percent = min(target_w_scale, target_h_scale)
print(f"Fitting logo to {logo_width_mm}x{logo_height_mm}mm box → Scale: {scale_percent:.2f}")
elif target_w_scale:
scale_percent = target_w_scale
print(f"Sizing by width: {logo_width_mm}mm → Scale: {scale_percent:.2f}")
else:
scale_percent = target_h_scale
print(f"Sizing by height: {logo_height_mm}mm → Scale: {scale_percent:.2f}")
product = product_raw.copy()
has_alpha = len(product.shape) == 3 and product.shape[2] == 4
if has_alpha:
product_bgr = cv2.cvtColor(product, cv2.COLOR_BGRA2BGR)
product_alpha_orig = product[:, :, 3].astype(float) / 255.0
else:
product_bgr = product
product_alpha_orig = np.ones(product.shape[:2], dtype=float)
h_prod, w_prod = product.shape[:2]
det_pos, debug_mask = detect_product_center(
product_bgr, prompt_point=position, alpha_mask=product_alpha_orig
)
# Apply position preset relative to detected mask bounds
if position_preset and debug_mask is not None:
print(f"Applying position preset: {position_preset}")
y_indices, _ = np.where(debug_mask > 128)
if len(y_indices) > 0:
y_min, y_max = np.min(y_indices), np.max(y_indices)
cx = det_pos[0]
if position_preset == "top":
cy = (y_min + (y_max - y_min) * 0.2) / h_prod
elif position_preset == "bottom":
cy = (y_min + (y_max - y_min) * 0.8) / h_prod
else:
cy = det_pos[1]
position = (cx, cy)
print(f"Preset coordinate: {position}")
if position is None:
position = det_pos
print(f"Auto-detected placement center: {position}")
if mask_path and debug_mask is not None:
cv2.imwrite(mask_path, debug_mask)
print(f"Debug mask saved to {mask_path}")
# Ensure logo has alpha channel
if logo.shape[2] == 3:
logo = cv2.cvtColor(logo, cv2.COLOR_BGR2BGRA)
target_w = int(w_prod * scale_percent)
aspect_ratio = logo.shape[0] / logo.shape[1]
target_h = int(target_w * aspect_ratio)
logo_resized = cv2.resize(logo, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
center_x = int(w_prod * position[0])
center_y = int(h_prod * position[1])
x1, y1 = center_x - target_w // 2, center_y - target_h // 2
x2, y2 = x1 + target_w, y1 + target_h
x1_c, y1_c = max(0, x1), max(0, y1)
x2_c, y2_c = min(w_prod, x2), min(h_prod, y2)
logo_x1, logo_y1 = max(0, -x1), max(0, -y1)
logo_x2 = target_w - max(0, x2 - w_prod)
logo_y2 = target_h - max(0, y2 - h_prod)
if x1_c >= x2_c or y1_c >= y2_c:
cv2.imwrite(output_path, product)
return {
"output_path": output_path,
"position": position,
"scale_percent": scale_percent,
"logo_aspect": aspect_ratio
}
logo_crop = logo_resized[logo_y1:logo_y2, logo_x1:logo_x2]
logo_rgb = logo_crop[:, :, :3].astype(float)
logo_alpha = logo_crop[:, :, 3].astype(float) / 255.0
roi_alpha_orig = product_alpha_orig[y1_c:y2_c, x1_c:x2_c]
if debug_mask is not None:
mask_roi = debug_mask[y1_c:y2_c, x1_c:x2_c].astype(float) / 255.0
logo_alpha = logo_alpha * mask_roi * roi_alpha_orig
else:
logo_alpha = logo_alpha * roi_alpha_orig
logo_alpha_3d = np.stack([logo_alpha] * 3, axis=-1)
if has_alpha:
roi = product[y1_c:y2_c, x1_c:x2_c, :3].astype(float)
else:
roi = product[y1_c:y2_c, x1_c:x2_c].astype(float)
multiply_blend = (roi / 255.0) * (logo_rgb / 255.0) * 255.0
refined_logo = (logo_rgb * 0.4) + (multiply_blend * 0.6)
blended_roi = (refined_logo * logo_alpha_3d) + (roi * (1.0 - logo_alpha_3d))
if has_alpha:
product[y1_c:y2_c, x1_c:x2_c, :3] = blended_roi.astype(np.uint8)
else:
product[y1_c:y2_c, x1_c:x2_c] = blended_roi.astype(np.uint8)
cv2.imwrite(output_path, product)
print(f"Successfully generated preview at {output_path}")
return {
"output_path": output_path,
"position": position,
"scale_percent": scale_percent,
"logo_aspect": aspect_ratio
}