import cv2 import numpy as np import os from ultralytics import SAM # ── Model loading ────────────────────────────────────────────────────────── # On HF Spaces, HOME is set to /tmp so ultralytics caches models there. # MODEL_PATH env var allows overriding (e.g. a pre-downloaded weights file). MODEL_PATH = os.environ.get("MODEL_PATH", "sam_b.pt") print(f"Initializing SAM model from: {MODEL_PATH} ...") try: model = SAM(MODEL_PATH) print("SAM model loaded successfully.") except Exception as e: print(f"CRITICAL WARNING: SAM model failed to load ({e}). Falling back to traditional methods.") model = None # ── Product detection ────────────────────────────────────────────────────── def detect_product_center(image, prompt_point=None, alpha_mask=None): """ Detects the product region in the image using SAM ViT-B with point prompting. Falls back to contour detection if SAM is unavailable. :param image: BGR numpy array. :param prompt_point: (x, y) relative coordinate (0.0–1.0). :param alpha_mask: Optional [H, W] float mask from original transparency. :returns: (position, mask) where position is relative (cx, cy). """ h_img, w_img = image.shape[:2] # Transparency-Aware Snapping if alpha_mask is not None: valid_pixels = np.where(alpha_mask > 0.1) if len(valid_pixels[0]) > 0: cy_alpha = np.mean(valid_pixels[0]) cx_alpha = np.mean(valid_pixels[1]) opaque_centroid = (cx_alpha / w_img, cy_alpha / h_img) if prompt_point is None: prompt_point = opaque_centroid else: py_idx = min(h_img - 1, max(0, int(prompt_point[1] * h_img))) px_idx = min(w_img - 1, max(0, int(prompt_point[0] * w_img))) if alpha_mask[py_idx, px_idx] < 0.1: prompt_point = opaque_centroid if prompt_point is None: px, py = w_img // 2, h_img // 2 else: px, py = int(prompt_point[0] * w_img), int(prompt_point[1] * h_img) # SAM Inference if model: print(f"Attempting Prompted SAM segmentation at point ({px}, {py})...") try: results = model.predict( image, points=[[px, py]], labels=[1], device="cpu", imgsz=640, conf=0.3, verbose=False, ) best_mask = None max_score = 0 if results and results[0].masks is not None: masks = results[0].masks.data for i, mask_tensor in enumerate(masks): mask_np = (mask_tensor.cpu().numpy() * 255).astype(np.uint8) if mask_np.shape != (h_img, w_img): mask_np = cv2.resize(mask_np, (w_img, h_img), interpolation=cv2.INTER_NEAREST) area = float(np.sum(mask_np > 0)) if area > (h_img * w_img * 0.95) or area < 1000: continue M = cv2.moments(mask_np) if M["m00"] == 0: continue cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"] score = area * (1.1 if i == 0 else 1.0) if alpha_mask is not None: mask_float = mask_np.astype(float) / 255.0 alignment = np.sum(mask_float * alpha_mask) / (np.sum(mask_float) + 1) score *= 1.0 + alignment if score > max_score: max_score = score best_mask = mask_np if best_mask is not None: M = cv2.moments(best_mask) cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"] print(f"SAM success: isolated prompted object (Area: {max_score:.0f})") return (cx / w_img, cy / h_img), best_mask except Exception as e: print(f"SAM Prompted Inference failed: {e}. Falling back to contours.") # Fallback: Contour Detection print("Running contour-based detection fallback...") gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) blurred = cv2.GaussianBlur(gray, (5, 5), 0) _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) edges = cv2.Canny(blurred, 20, 150) combined = cv2.bitwise_or(thresh, edges) contours, _ = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) best_cnt = None max_c_score = 0 target_p = np.array([px, py]) for cnt in contours: area = cv2.contourArea(cnt) if area < 500: continue M = cv2.moments(cnt) if M["m00"] == 0: continue cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"] dist = np.linalg.norm(np.array([cx, cy]) - target_p) score = area * (1.0 - (dist / w_img)) if score > max_c_score: max_c_score = score best_cnt = cnt if best_cnt is not None: M = cv2.moments(best_cnt) cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"] mock_mask = np.zeros((h_img, w_img), dtype=np.uint8) cv2.drawContours(mock_mask, [best_cnt], -1, 255, -1) return (cx / w_img, cy / h_img), mock_mask return (0.5, 0.5), np.zeros((h_img, w_img), dtype=np.uint8) # ── Logo placement ────────────────────────────────────────────────────────── def process_logo_placement( product_path, logo_path, scale_percent, output_path, position=None, mask_path=None, logo_width_mm=None, logo_height_mm=None, position_preset=None, ): """ Places a logo on a product image with realistic texture blending. :param product_path: Path to the product image. :param logo_path: Path to the logo image. :param scale_percent: Base scale (0.0–1.0) if mm sizing not provided. :param output_path: Path to save the composited result. :param position: (x, y) relative position (0.0–1.0). Auto-detected if None. :param mask_path: Optional path to save the AI debug mask. :param logo_width_mm: Target logo width in mm. :param logo_height_mm: Target logo height in mm. :param position_preset: 'top', 'center', or 'bottom'. """ product_raw = cv2.imread(product_path, cv2.IMREAD_UNCHANGED) logo = cv2.imread(logo_path, cv2.IMREAD_UNCHANGED) if product_raw is None or logo is None: raise ValueError("Could not load one or both images.") h_prod, w_prod = product_raw.shape[:2] logo_h_orig, logo_w_orig = logo.shape[:2] logo_aspect = logo_w_orig / logo_h_orig # Professional mm-based sizing (reference: 500 mm product width) if logo_width_mm or logo_height_mm: REF_W_MM = 500.0 target_w_scale = logo_width_mm / REF_W_MM if logo_width_mm else None target_h_scale = (logo_height_mm * logo_aspect) / REF_W_MM if logo_height_mm else None if target_w_scale and target_h_scale: scale_percent = min(target_w_scale, target_h_scale) print(f"Fitting logo to {logo_width_mm}x{logo_height_mm}mm box → Scale: {scale_percent:.2f}") elif target_w_scale: scale_percent = target_w_scale print(f"Sizing by width: {logo_width_mm}mm → Scale: {scale_percent:.2f}") else: scale_percent = target_h_scale print(f"Sizing by height: {logo_height_mm}mm → Scale: {scale_percent:.2f}") product = product_raw.copy() has_alpha = len(product.shape) == 3 and product.shape[2] == 4 if has_alpha: product_bgr = cv2.cvtColor(product, cv2.COLOR_BGRA2BGR) product_alpha_orig = product[:, :, 3].astype(float) / 255.0 else: product_bgr = product product_alpha_orig = np.ones(product.shape[:2], dtype=float) h_prod, w_prod = product.shape[:2] det_pos, debug_mask = detect_product_center( product_bgr, prompt_point=position, alpha_mask=product_alpha_orig ) # Apply position preset relative to detected mask bounds if position_preset and debug_mask is not None: print(f"Applying position preset: {position_preset}") y_indices, _ = np.where(debug_mask > 128) if len(y_indices) > 0: y_min, y_max = np.min(y_indices), np.max(y_indices) cx = det_pos[0] if position_preset == "top": cy = (y_min + (y_max - y_min) * 0.2) / h_prod elif position_preset == "bottom": cy = (y_min + (y_max - y_min) * 0.8) / h_prod else: cy = det_pos[1] position = (cx, cy) print(f"Preset coordinate: {position}") if position is None: position = det_pos print(f"Auto-detected placement center: {position}") if mask_path and debug_mask is not None: cv2.imwrite(mask_path, debug_mask) print(f"Debug mask saved to {mask_path}") # Ensure logo has alpha channel if logo.shape[2] == 3: logo = cv2.cvtColor(logo, cv2.COLOR_BGR2BGRA) target_w = int(w_prod * scale_percent) aspect_ratio = logo.shape[0] / logo.shape[1] target_h = int(target_w * aspect_ratio) logo_resized = cv2.resize(logo, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4) center_x = int(w_prod * position[0]) center_y = int(h_prod * position[1]) x1, y1 = center_x - target_w // 2, center_y - target_h // 2 x2, y2 = x1 + target_w, y1 + target_h x1_c, y1_c = max(0, x1), max(0, y1) x2_c, y2_c = min(w_prod, x2), min(h_prod, y2) logo_x1, logo_y1 = max(0, -x1), max(0, -y1) logo_x2 = target_w - max(0, x2 - w_prod) logo_y2 = target_h - max(0, y2 - h_prod) if x1_c >= x2_c or y1_c >= y2_c: cv2.imwrite(output_path, product) return { "output_path": output_path, "position": position, "scale_percent": scale_percent, "logo_aspect": aspect_ratio } logo_crop = logo_resized[logo_y1:logo_y2, logo_x1:logo_x2] logo_rgb = logo_crop[:, :, :3].astype(float) logo_alpha = logo_crop[:, :, 3].astype(float) / 255.0 roi_alpha_orig = product_alpha_orig[y1_c:y2_c, x1_c:x2_c] if debug_mask is not None: mask_roi = debug_mask[y1_c:y2_c, x1_c:x2_c].astype(float) / 255.0 logo_alpha = logo_alpha * mask_roi * roi_alpha_orig else: logo_alpha = logo_alpha * roi_alpha_orig logo_alpha_3d = np.stack([logo_alpha] * 3, axis=-1) if has_alpha: roi = product[y1_c:y2_c, x1_c:x2_c, :3].astype(float) else: roi = product[y1_c:y2_c, x1_c:x2_c].astype(float) multiply_blend = (roi / 255.0) * (logo_rgb / 255.0) * 255.0 refined_logo = (logo_rgb * 0.4) + (multiply_blend * 0.6) blended_roi = (refined_logo * logo_alpha_3d) + (roi * (1.0 - logo_alpha_3d)) if has_alpha: product[y1_c:y2_c, x1_c:x2_c, :3] = blended_roi.astype(np.uint8) else: product[y1_c:y2_c, x1_c:x2_c] = blended_roi.astype(np.uint8) cv2.imwrite(output_path, product) print(f"Successfully generated preview at {output_path}") return { "output_path": output_path, "position": position, "scale_percent": scale_percent, "logo_aspect": aspect_ratio }