Spaces:

Saad5151
/

brandably

Sleeping

File size: 11,708 Bytes

import cv2
import numpy as np
import os
from ultralytics import SAM

# ── Model loading ──────────────────────────────────────────────────────────
# On HF Spaces, HOME is set to /tmp so ultralytics caches models there.
# MODEL_PATH env var allows overriding (e.g. a pre-downloaded weights file).
MODEL_PATH = os.environ.get("MODEL_PATH", "sam_b.pt")

print(f"Initializing SAM model from: {MODEL_PATH} ...")
try:
    model = SAM(MODEL_PATH)
    print("SAM model loaded successfully.")
except Exception as e:
    print(f"CRITICAL WARNING: SAM model failed to load ({e}). Falling back to traditional methods.")
    model = None


# ── Product detection ──────────────────────────────────────────────────────
def detect_product_center(image, prompt_point=None, alpha_mask=None):
    """
    Detects the product region in the image using SAM ViT-B with point prompting.
    Falls back to contour detection if SAM is unavailable.

    :param image: BGR numpy array.
    :param prompt_point: (x, y) relative coordinate (0.0–1.0).
    :param alpha_mask: Optional [H, W] float mask from original transparency.
    :returns: (position, mask) where position is relative (cx, cy).
    """
    h_img, w_img = image.shape[:2]

    # Transparency-Aware Snapping
    if alpha_mask is not None:
        valid_pixels = np.where(alpha_mask > 0.1)
        if len(valid_pixels[0]) > 0:
            cy_alpha = np.mean(valid_pixels[0])
            cx_alpha = np.mean(valid_pixels[1])
            opaque_centroid = (cx_alpha / w_img, cy_alpha / h_img)

            if prompt_point is None:
                prompt_point = opaque_centroid
            else:
                py_idx = min(h_img - 1, max(0, int(prompt_point[1] * h_img)))
                px_idx = min(w_img - 1, max(0, int(prompt_point[0] * w_img)))
                if alpha_mask[py_idx, px_idx] < 0.1:
                    prompt_point = opaque_centroid

    if prompt_point is None:
        px, py = w_img // 2, h_img // 2
    else:
        px, py = int(prompt_point[0] * w_img), int(prompt_point[1] * h_img)

    # SAM Inference
    if model:
        print(f"Attempting Prompted SAM segmentation at point ({px}, {py})...")
        try:
            results = model.predict(
                image,
                points=[[px, py]],
                labels=[1],
                device="cpu",
                imgsz=640,
                conf=0.3,
                verbose=False,
            )

            best_mask = None
            max_score = 0

            if results and results[0].masks is not None:
                masks = results[0].masks.data
                for i, mask_tensor in enumerate(masks):
                    mask_np = (mask_tensor.cpu().numpy() * 255).astype(np.uint8)
                    if mask_np.shape != (h_img, w_img):
                        mask_np = cv2.resize(mask_np, (w_img, h_img), interpolation=cv2.INTER_NEAREST)

                    area = float(np.sum(mask_np > 0))
                    if area > (h_img * w_img * 0.95) or area < 1000:
                        continue

                    M = cv2.moments(mask_np)
                    if M["m00"] == 0:
                        continue
                    cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]

                    score = area * (1.1 if i == 0 else 1.0)
                    if alpha_mask is not None:
                        mask_float = mask_np.astype(float) / 255.0
                        alignment = np.sum(mask_float * alpha_mask) / (np.sum(mask_float) + 1)
                        score *= 1.0 + alignment

                    if score > max_score:
                        max_score = score
                        best_mask = mask_np

            if best_mask is not None:
                M = cv2.moments(best_mask)
                cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]
                print(f"SAM success: isolated prompted object (Area: {max_score:.0f})")
                return (cx / w_img, cy / h_img), best_mask

        except Exception as e:
            print(f"SAM Prompted Inference failed: {e}. Falling back to contours.")

    # Fallback: Contour Detection
    print("Running contour-based detection fallback...")
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    edges = cv2.Canny(blurred, 20, 150)
    combined = cv2.bitwise_or(thresh, edges)
    contours, _ = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    best_cnt = None
    max_c_score = 0
    target_p = np.array([px, py])

    for cnt in contours:
        area = cv2.contourArea(cnt)
        if area < 500:
            continue
        M = cv2.moments(cnt)
        if M["m00"] == 0:
            continue
        cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]
        dist = np.linalg.norm(np.array([cx, cy]) - target_p)
        score = area * (1.0 - (dist / w_img))
        if score > max_c_score:
            max_c_score = score
            best_cnt = cnt

    if best_cnt is not None:
        M = cv2.moments(best_cnt)
        cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]
        mock_mask = np.zeros((h_img, w_img), dtype=np.uint8)
        cv2.drawContours(mock_mask, [best_cnt], -1, 255, -1)
        return (cx / w_img, cy / h_img), mock_mask

    return (0.5, 0.5), np.zeros((h_img, w_img), dtype=np.uint8)


# ── Logo placement ──────────────────────────────────────────────────────────
def process_logo_placement(
    product_path,
    logo_path,
    scale_percent,
    output_path,
    position=None,
    mask_path=None,
    logo_width_mm=None,
    logo_height_mm=None,
    position_preset=None,
):
    """
    Places a logo on a product image with realistic texture blending.

    :param product_path: Path to the product image.
    :param logo_path: Path to the logo image.
    :param scale_percent: Base scale (0.0–1.0) if mm sizing not provided.
    :param output_path: Path to save the composited result.
    :param position: (x, y) relative position (0.0–1.0). Auto-detected if None.
    :param mask_path: Optional path to save the AI debug mask.
    :param logo_width_mm: Target logo width in mm.
    :param logo_height_mm: Target logo height in mm.
    :param position_preset: 'top', 'center', or 'bottom'.
    """
    product_raw = cv2.imread(product_path, cv2.IMREAD_UNCHANGED)
    logo        = cv2.imread(logo_path,    cv2.IMREAD_UNCHANGED)

    if product_raw is None or logo is None:
        raise ValueError("Could not load one or both images.")

    h_prod, w_prod = product_raw.shape[:2]
    logo_h_orig, logo_w_orig = logo.shape[:2]
    logo_aspect = logo_w_orig / logo_h_orig

    # Professional mm-based sizing (reference: 500 mm product width)
    if logo_width_mm or logo_height_mm:
        REF_W_MM = 500.0
        target_w_scale = logo_width_mm / REF_W_MM if logo_width_mm else None
        target_h_scale = (logo_height_mm * logo_aspect) / REF_W_MM if logo_height_mm else None

        if target_w_scale and target_h_scale:
            scale_percent = min(target_w_scale, target_h_scale)
            print(f"Fitting logo to {logo_width_mm}x{logo_height_mm}mm box → Scale: {scale_percent:.2f}")
        elif target_w_scale:
            scale_percent = target_w_scale
            print(f"Sizing by width: {logo_width_mm}mm → Scale: {scale_percent:.2f}")
        else:
            scale_percent = target_h_scale
            print(f"Sizing by height: {logo_height_mm}mm → Scale: {scale_percent:.2f}")

    product = product_raw.copy()
    has_alpha = len(product.shape) == 3 and product.shape[2] == 4

    if has_alpha:
        product_bgr = cv2.cvtColor(product, cv2.COLOR_BGRA2BGR)
        product_alpha_orig = product[:, :, 3].astype(float) / 255.0
    else:
        product_bgr = product
        product_alpha_orig = np.ones(product.shape[:2], dtype=float)

    h_prod, w_prod = product.shape[:2]

    det_pos, debug_mask = detect_product_center(
        product_bgr, prompt_point=position, alpha_mask=product_alpha_orig
    )

    # Apply position preset relative to detected mask bounds
    if position_preset and debug_mask is not None:
        print(f"Applying position preset: {position_preset}")
        y_indices, _ = np.where(debug_mask > 128)
        if len(y_indices) > 0:
            y_min, y_max = np.min(y_indices), np.max(y_indices)
            cx = det_pos[0]
            if position_preset == "top":
                cy = (y_min + (y_max - y_min) * 0.2) / h_prod
            elif position_preset == "bottom":
                cy = (y_min + (y_max - y_min) * 0.8) / h_prod
            else:
                cy = det_pos[1]
            position = (cx, cy)
            print(f"Preset coordinate: {position}")

    if position is None:
        position = det_pos
        print(f"Auto-detected placement center: {position}")

    if mask_path and debug_mask is not None:
        cv2.imwrite(mask_path, debug_mask)
        print(f"Debug mask saved to {mask_path}")

    # Ensure logo has alpha channel
    if logo.shape[2] == 3:
        logo = cv2.cvtColor(logo, cv2.COLOR_BGR2BGRA)

    target_w = int(w_prod * scale_percent)
    aspect_ratio = logo.shape[0] / logo.shape[1]
    target_h = int(target_w * aspect_ratio)
    logo_resized = cv2.resize(logo, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)

    center_x = int(w_prod * position[0])
    center_y = int(h_prod * position[1])
    x1, y1 = center_x - target_w // 2, center_y - target_h // 2
    x2, y2 = x1 + target_w, y1 + target_h

    x1_c, y1_c = max(0, x1), max(0, y1)
    x2_c, y2_c = min(w_prod, x2), min(h_prod, y2)
    logo_x1, logo_y1 = max(0, -x1), max(0, -y1)
    logo_x2 = target_w - max(0, x2 - w_prod)
    logo_y2 = target_h - max(0, y2 - h_prod)

    if x1_c >= x2_c or y1_c >= y2_c:
        cv2.imwrite(output_path, product)
        return {
            "output_path": output_path,
            "position": position,
            "scale_percent": scale_percent,
            "logo_aspect": aspect_ratio
        }

    logo_crop = logo_resized[logo_y1:logo_y2, logo_x1:logo_x2]
    logo_rgb   = logo_crop[:, :, :3].astype(float)
    logo_alpha = logo_crop[:, :, 3].astype(float) / 255.0
    roi_alpha_orig = product_alpha_orig[y1_c:y2_c, x1_c:x2_c]

    if debug_mask is not None:
        mask_roi = debug_mask[y1_c:y2_c, x1_c:x2_c].astype(float) / 255.0
        logo_alpha = logo_alpha * mask_roi * roi_alpha_orig
    else:
        logo_alpha = logo_alpha * roi_alpha_orig

    logo_alpha_3d = np.stack([logo_alpha] * 3, axis=-1)

    if has_alpha:
        roi = product[y1_c:y2_c, x1_c:x2_c, :3].astype(float)
    else:
        roi = product[y1_c:y2_c, x1_c:x2_c].astype(float)

    multiply_blend = (roi / 255.0) * (logo_rgb / 255.0) * 255.0
    refined_logo  = (logo_rgb * 0.4) + (multiply_blend * 0.6)
    blended_roi   = (refined_logo * logo_alpha_3d) + (roi * (1.0 - logo_alpha_3d))

    if has_alpha:
        product[y1_c:y2_c, x1_c:x2_c, :3] = blended_roi.astype(np.uint8)
    else:
        product[y1_c:y2_c, x1_c:x2_c] = blended_roi.astype(np.uint8)

    cv2.imwrite(output_path, product)
    print(f"Successfully generated preview at {output_path}")
    return {
        "output_path": output_path,
        "position": position,
        "scale_percent": scale_percent,
        "logo_aspect": aspect_ratio
    }