| import cv2 |
| import numpy as np |
| import os |
| from ultralytics import SAM |
|
|
| |
| |
| |
| MODEL_PATH = os.environ.get("MODEL_PATH", "sam_b.pt") |
|
|
| print(f"Initializing SAM model from: {MODEL_PATH} ...") |
| try: |
| model = SAM(MODEL_PATH) |
| print("SAM model loaded successfully.") |
| except Exception as e: |
| print(f"CRITICAL WARNING: SAM model failed to load ({e}). Falling back to traditional methods.") |
| model = None |
|
|
|
|
| |
| def detect_product_center(image, prompt_point=None, alpha_mask=None): |
| """ |
| Detects the product region in the image using SAM ViT-B with point prompting. |
| Falls back to contour detection if SAM is unavailable. |
| |
| :param image: BGR numpy array. |
| :param prompt_point: (x, y) relative coordinate (0.0–1.0). |
| :param alpha_mask: Optional [H, W] float mask from original transparency. |
| :returns: (position, mask) where position is relative (cx, cy). |
| """ |
| h_img, w_img = image.shape[:2] |
|
|
| |
| if alpha_mask is not None: |
| valid_pixels = np.where(alpha_mask > 0.1) |
| if len(valid_pixels[0]) > 0: |
| cy_alpha = np.mean(valid_pixels[0]) |
| cx_alpha = np.mean(valid_pixels[1]) |
| opaque_centroid = (cx_alpha / w_img, cy_alpha / h_img) |
|
|
| if prompt_point is None: |
| prompt_point = opaque_centroid |
| else: |
| py_idx = min(h_img - 1, max(0, int(prompt_point[1] * h_img))) |
| px_idx = min(w_img - 1, max(0, int(prompt_point[0] * w_img))) |
| if alpha_mask[py_idx, px_idx] < 0.1: |
| prompt_point = opaque_centroid |
|
|
| if prompt_point is None: |
| px, py = w_img // 2, h_img // 2 |
| else: |
| px, py = int(prompt_point[0] * w_img), int(prompt_point[1] * h_img) |
|
|
| |
| if model: |
| print(f"Attempting Prompted SAM segmentation at point ({px}, {py})...") |
| try: |
| results = model.predict( |
| image, |
| points=[[px, py]], |
| labels=[1], |
| device="cpu", |
| imgsz=640, |
| conf=0.3, |
| verbose=False, |
| ) |
|
|
| best_mask = None |
| max_score = 0 |
|
|
| if results and results[0].masks is not None: |
| masks = results[0].masks.data |
| for i, mask_tensor in enumerate(masks): |
| mask_np = (mask_tensor.cpu().numpy() * 255).astype(np.uint8) |
| if mask_np.shape != (h_img, w_img): |
| mask_np = cv2.resize(mask_np, (w_img, h_img), interpolation=cv2.INTER_NEAREST) |
|
|
| area = float(np.sum(mask_np > 0)) |
| if area > (h_img * w_img * 0.95) or area < 1000: |
| continue |
|
|
| M = cv2.moments(mask_np) |
| if M["m00"] == 0: |
| continue |
| cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"] |
|
|
| score = area * (1.1 if i == 0 else 1.0) |
| if alpha_mask is not None: |
| mask_float = mask_np.astype(float) / 255.0 |
| alignment = np.sum(mask_float * alpha_mask) / (np.sum(mask_float) + 1) |
| score *= 1.0 + alignment |
|
|
| if score > max_score: |
| max_score = score |
| best_mask = mask_np |
|
|
| if best_mask is not None: |
| M = cv2.moments(best_mask) |
| cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"] |
| print(f"SAM success: isolated prompted object (Area: {max_score:.0f})") |
| return (cx / w_img, cy / h_img), best_mask |
|
|
| except Exception as e: |
| print(f"SAM Prompted Inference failed: {e}. Falling back to contours.") |
|
|
| |
| print("Running contour-based detection fallback...") |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
| blurred = cv2.GaussianBlur(gray, (5, 5), 0) |
| _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) |
| edges = cv2.Canny(blurred, 20, 150) |
| combined = cv2.bitwise_or(thresh, edges) |
| contours, _ = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
|
|
| best_cnt = None |
| max_c_score = 0 |
| target_p = np.array([px, py]) |
|
|
| for cnt in contours: |
| area = cv2.contourArea(cnt) |
| if area < 500: |
| continue |
| M = cv2.moments(cnt) |
| if M["m00"] == 0: |
| continue |
| cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"] |
| dist = np.linalg.norm(np.array([cx, cy]) - target_p) |
| score = area * (1.0 - (dist / w_img)) |
| if score > max_c_score: |
| max_c_score = score |
| best_cnt = cnt |
|
|
| if best_cnt is not None: |
| M = cv2.moments(best_cnt) |
| cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"] |
| mock_mask = np.zeros((h_img, w_img), dtype=np.uint8) |
| cv2.drawContours(mock_mask, [best_cnt], -1, 255, -1) |
| return (cx / w_img, cy / h_img), mock_mask |
|
|
| return (0.5, 0.5), np.zeros((h_img, w_img), dtype=np.uint8) |
|
|
|
|
| |
| def process_logo_placement( |
| product_path, |
| logo_path, |
| scale_percent, |
| output_path, |
| position=None, |
| mask_path=None, |
| logo_width_mm=None, |
| logo_height_mm=None, |
| position_preset=None, |
| ): |
| """ |
| Places a logo on a product image with realistic texture blending. |
| |
| :param product_path: Path to the product image. |
| :param logo_path: Path to the logo image. |
| :param scale_percent: Base scale (0.0–1.0) if mm sizing not provided. |
| :param output_path: Path to save the composited result. |
| :param position: (x, y) relative position (0.0–1.0). Auto-detected if None. |
| :param mask_path: Optional path to save the AI debug mask. |
| :param logo_width_mm: Target logo width in mm. |
| :param logo_height_mm: Target logo height in mm. |
| :param position_preset: 'top', 'center', or 'bottom'. |
| """ |
| product_raw = cv2.imread(product_path, cv2.IMREAD_UNCHANGED) |
| logo = cv2.imread(logo_path, cv2.IMREAD_UNCHANGED) |
|
|
| if product_raw is None or logo is None: |
| raise ValueError("Could not load one or both images.") |
|
|
| h_prod, w_prod = product_raw.shape[:2] |
| logo_h_orig, logo_w_orig = logo.shape[:2] |
| logo_aspect = logo_w_orig / logo_h_orig |
|
|
| |
| if logo_width_mm or logo_height_mm: |
| REF_W_MM = 500.0 |
| target_w_scale = logo_width_mm / REF_W_MM if logo_width_mm else None |
| target_h_scale = (logo_height_mm * logo_aspect) / REF_W_MM if logo_height_mm else None |
|
|
| if target_w_scale and target_h_scale: |
| scale_percent = min(target_w_scale, target_h_scale) |
| print(f"Fitting logo to {logo_width_mm}x{logo_height_mm}mm box → Scale: {scale_percent:.2f}") |
| elif target_w_scale: |
| scale_percent = target_w_scale |
| print(f"Sizing by width: {logo_width_mm}mm → Scale: {scale_percent:.2f}") |
| else: |
| scale_percent = target_h_scale |
| print(f"Sizing by height: {logo_height_mm}mm → Scale: {scale_percent:.2f}") |
|
|
| product = product_raw.copy() |
| has_alpha = len(product.shape) == 3 and product.shape[2] == 4 |
|
|
| if has_alpha: |
| product_bgr = cv2.cvtColor(product, cv2.COLOR_BGRA2BGR) |
| product_alpha_orig = product[:, :, 3].astype(float) / 255.0 |
| else: |
| product_bgr = product |
| product_alpha_orig = np.ones(product.shape[:2], dtype=float) |
|
|
| h_prod, w_prod = product.shape[:2] |
|
|
| det_pos, debug_mask = detect_product_center( |
| product_bgr, prompt_point=position, alpha_mask=product_alpha_orig |
| ) |
|
|
| |
| if position_preset and debug_mask is not None: |
| print(f"Applying position preset: {position_preset}") |
| y_indices, _ = np.where(debug_mask > 128) |
| if len(y_indices) > 0: |
| y_min, y_max = np.min(y_indices), np.max(y_indices) |
| cx = det_pos[0] |
| if position_preset == "top": |
| cy = (y_min + (y_max - y_min) * 0.2) / h_prod |
| elif position_preset == "bottom": |
| cy = (y_min + (y_max - y_min) * 0.8) / h_prod |
| else: |
| cy = det_pos[1] |
| position = (cx, cy) |
| print(f"Preset coordinate: {position}") |
|
|
| if position is None: |
| position = det_pos |
| print(f"Auto-detected placement center: {position}") |
|
|
| if mask_path and debug_mask is not None: |
| cv2.imwrite(mask_path, debug_mask) |
| print(f"Debug mask saved to {mask_path}") |
|
|
| |
| if logo.shape[2] == 3: |
| logo = cv2.cvtColor(logo, cv2.COLOR_BGR2BGRA) |
|
|
| target_w = int(w_prod * scale_percent) |
| aspect_ratio = logo.shape[0] / logo.shape[1] |
| target_h = int(target_w * aspect_ratio) |
| logo_resized = cv2.resize(logo, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4) |
|
|
| center_x = int(w_prod * position[0]) |
| center_y = int(h_prod * position[1]) |
| x1, y1 = center_x - target_w // 2, center_y - target_h // 2 |
| x2, y2 = x1 + target_w, y1 + target_h |
|
|
| x1_c, y1_c = max(0, x1), max(0, y1) |
| x2_c, y2_c = min(w_prod, x2), min(h_prod, y2) |
| logo_x1, logo_y1 = max(0, -x1), max(0, -y1) |
| logo_x2 = target_w - max(0, x2 - w_prod) |
| logo_y2 = target_h - max(0, y2 - h_prod) |
|
|
| if x1_c >= x2_c or y1_c >= y2_c: |
| cv2.imwrite(output_path, product) |
| return { |
| "output_path": output_path, |
| "position": position, |
| "scale_percent": scale_percent, |
| "logo_aspect": aspect_ratio |
| } |
|
|
| logo_crop = logo_resized[logo_y1:logo_y2, logo_x1:logo_x2] |
| logo_rgb = logo_crop[:, :, :3].astype(float) |
| logo_alpha = logo_crop[:, :, 3].astype(float) / 255.0 |
| roi_alpha_orig = product_alpha_orig[y1_c:y2_c, x1_c:x2_c] |
|
|
| if debug_mask is not None: |
| mask_roi = debug_mask[y1_c:y2_c, x1_c:x2_c].astype(float) / 255.0 |
| logo_alpha = logo_alpha * mask_roi * roi_alpha_orig |
| else: |
| logo_alpha = logo_alpha * roi_alpha_orig |
|
|
| logo_alpha_3d = np.stack([logo_alpha] * 3, axis=-1) |
|
|
| if has_alpha: |
| roi = product[y1_c:y2_c, x1_c:x2_c, :3].astype(float) |
| else: |
| roi = product[y1_c:y2_c, x1_c:x2_c].astype(float) |
|
|
| multiply_blend = (roi / 255.0) * (logo_rgb / 255.0) * 255.0 |
| refined_logo = (logo_rgb * 0.4) + (multiply_blend * 0.6) |
| blended_roi = (refined_logo * logo_alpha_3d) + (roi * (1.0 - logo_alpha_3d)) |
|
|
| if has_alpha: |
| product[y1_c:y2_c, x1_c:x2_c, :3] = blended_roi.astype(np.uint8) |
| else: |
| product[y1_c:y2_c, x1_c:x2_c] = blended_roi.astype(np.uint8) |
|
|
| cv2.imwrite(output_path, product) |
| print(f"Successfully generated preview at {output_path}") |
| return { |
| "output_path": output_path, |
| "position": position, |
| "scale_percent": scale_percent, |
| "logo_aspect": aspect_ratio |
| } |
|
|