Spaces:

Saad5151
/

brandably

Sleeping

App Files Files Community

brandably / engine.py

Saad5151

feat: add manual logo drag-to-reposition feature

247d630 3 months ago

raw

history blame contribute delete

11.7 kB

	import cv2
	import numpy as np
	import os
	from ultralytics import SAM

	# ── Model loading ──────────────────────────────────────────────────────────
	# On HF Spaces, HOME is set to /tmp so ultralytics caches models there.
	# MODEL_PATH env var allows overriding (e.g. a pre-downloaded weights file).
	MODEL_PATH = os.environ.get("MODEL_PATH", "sam_b.pt")

	print(f"Initializing SAM model from: {MODEL_PATH} ...")
	try:
	model = SAM(MODEL_PATH)
	print("SAM model loaded successfully.")
	except Exception as e:
	print(f"CRITICAL WARNING: SAM model failed to load ({e}). Falling back to traditional methods.")
	model = None


	# ── Product detection ──────────────────────────────────────────────────────
	def detect_product_center(image, prompt_point=None, alpha_mask=None):
	"""
	Detects the product region in the image using SAM ViT-B with point prompting.
	Falls back to contour detection if SAM is unavailable.

	:param image: BGR numpy array.
	:param prompt_point: (x, y) relative coordinate (0.0–1.0).
	:param alpha_mask: Optional [H, W] float mask from original transparency.
	:returns: (position, mask) where position is relative (cx, cy).
	"""
	h_img, w_img = image.shape[:2]

	# Transparency-Aware Snapping
	if alpha_mask is not None:
	valid_pixels = np.where(alpha_mask > 0.1)
	if len(valid_pixels[0]) > 0:
	cy_alpha = np.mean(valid_pixels[0])
	cx_alpha = np.mean(valid_pixels[1])
	opaque_centroid = (cx_alpha / w_img, cy_alpha / h_img)

	if prompt_point is None:
	prompt_point = opaque_centroid
	else:
	py_idx = min(h_img - 1, max(0, int(prompt_point[1] * h_img)))
	px_idx = min(w_img - 1, max(0, int(prompt_point[0] * w_img)))
	if alpha_mask[py_idx, px_idx] < 0.1:
	prompt_point = opaque_centroid

	if prompt_point is None:
	px, py = w_img // 2, h_img // 2
	else:
	px, py = int(prompt_point[0] * w_img), int(prompt_point[1] * h_img)

	# SAM Inference
	if model:
	print(f"Attempting Prompted SAM segmentation at point ({px}, {py})...")
	try:
	results = model.predict(
	image,
	points=[[px, py]],
	labels=[1],
	device="cpu",
	imgsz=640,
	conf=0.3,
	verbose=False,
	)

	best_mask = None
	max_score = 0

	if results and results[0].masks is not None:
	masks = results[0].masks.data
	for i, mask_tensor in enumerate(masks):
	mask_np = (mask_tensor.cpu().numpy() * 255).astype(np.uint8)
	if mask_np.shape != (h_img, w_img):
	mask_np = cv2.resize(mask_np, (w_img, h_img), interpolation=cv2.INTER_NEAREST)

	area = float(np.sum(mask_np > 0))
	if area > (h_img * w_img * 0.95) or area < 1000:
	continue

	M = cv2.moments(mask_np)
	if M["m00"] == 0:
	continue
	cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]

	score = area * (1.1 if i == 0 else 1.0)
	if alpha_mask is not None:
	mask_float = mask_np.astype(float) / 255.0
	alignment = np.sum(mask_float * alpha_mask) / (np.sum(mask_float) + 1)
	score *= 1.0 + alignment

	if score > max_score:
	max_score = score
	best_mask = mask_np

	if best_mask is not None:
	M = cv2.moments(best_mask)
	cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]
	print(f"SAM success: isolated prompted object (Area: {max_score:.0f})")
	return (cx / w_img, cy / h_img), best_mask

	except Exception as e:
	print(f"SAM Prompted Inference failed: {e}. Falling back to contours.")

	# Fallback: Contour Detection
	print("Running contour-based detection fallback...")
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	blurred = cv2.GaussianBlur(gray, (5, 5), 0)
	_, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
	edges = cv2.Canny(blurred, 20, 150)
	combined = cv2.bitwise_or(thresh, edges)
	contours, _ = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	best_cnt = None
	max_c_score = 0
	target_p = np.array([px, py])

	for cnt in contours:
	area = cv2.contourArea(cnt)
	if area < 500:
	continue
	M = cv2.moments(cnt)
	if M["m00"] == 0:
	continue
	cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]
	dist = np.linalg.norm(np.array([cx, cy]) - target_p)
	score = area * (1.0 - (dist / w_img))
	if score > max_c_score:
	max_c_score = score
	best_cnt = cnt

	if best_cnt is not None:
	M = cv2.moments(best_cnt)
	cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]
	mock_mask = np.zeros((h_img, w_img), dtype=np.uint8)
	cv2.drawContours(mock_mask, [best_cnt], -1, 255, -1)
	return (cx / w_img, cy / h_img), mock_mask

	return (0.5, 0.5), np.zeros((h_img, w_img), dtype=np.uint8)


	# ── Logo placement ──────────────────────────────────────────────────────────
	def process_logo_placement(
	product_path,
	logo_path,
	scale_percent,
	output_path,
	position=None,
	mask_path=None,
	logo_width_mm=None,
	logo_height_mm=None,
	position_preset=None,
	):
	"""
	Places a logo on a product image with realistic texture blending.

	:param product_path: Path to the product image.
	:param logo_path: Path to the logo image.
	:param scale_percent: Base scale (0.0–1.0) if mm sizing not provided.
	:param output_path: Path to save the composited result.
	:param position: (x, y) relative position (0.0–1.0). Auto-detected if None.
	:param mask_path: Optional path to save the AI debug mask.
	:param logo_width_mm: Target logo width in mm.
	:param logo_height_mm: Target logo height in mm.
	:param position_preset: 'top', 'center', or 'bottom'.
	"""
	product_raw = cv2.imread(product_path, cv2.IMREAD_UNCHANGED)
	logo = cv2.imread(logo_path, cv2.IMREAD_UNCHANGED)

	if product_raw is None or logo is None:
	raise ValueError("Could not load one or both images.")

	h_prod, w_prod = product_raw.shape[:2]
	logo_h_orig, logo_w_orig = logo.shape[:2]
	logo_aspect = logo_w_orig / logo_h_orig

	# Professional mm-based sizing (reference: 500 mm product width)
	if logo_width_mm or logo_height_mm:
	REF_W_MM = 500.0
	target_w_scale = logo_width_mm / REF_W_MM if logo_width_mm else None
	target_h_scale = (logo_height_mm * logo_aspect) / REF_W_MM if logo_height_mm else None

	if target_w_scale and target_h_scale:
	scale_percent = min(target_w_scale, target_h_scale)
	print(f"Fitting logo to {logo_width_mm}x{logo_height_mm}mm box → Scale: {scale_percent:.2f}")
	elif target_w_scale:
	scale_percent = target_w_scale
	print(f"Sizing by width: {logo_width_mm}mm → Scale: {scale_percent:.2f}")
	else:
	scale_percent = target_h_scale
	print(f"Sizing by height: {logo_height_mm}mm → Scale: {scale_percent:.2f}")

	product = product_raw.copy()
	has_alpha = len(product.shape) == 3 and product.shape[2] == 4

	if has_alpha:
	product_bgr = cv2.cvtColor(product, cv2.COLOR_BGRA2BGR)
	product_alpha_orig = product[:, :, 3].astype(float) / 255.0
	else:
	product_bgr = product
	product_alpha_orig = np.ones(product.shape[:2], dtype=float)

	h_prod, w_prod = product.shape[:2]

	det_pos, debug_mask = detect_product_center(
	product_bgr, prompt_point=position, alpha_mask=product_alpha_orig
	)

	# Apply position preset relative to detected mask bounds
	if position_preset and debug_mask is not None:
	print(f"Applying position preset: {position_preset}")
	y_indices, _ = np.where(debug_mask > 128)
	if len(y_indices) > 0:
	y_min, y_max = np.min(y_indices), np.max(y_indices)
	cx = det_pos[0]
	if position_preset == "top":
	cy = (y_min + (y_max - y_min) * 0.2) / h_prod
	elif position_preset == "bottom":
	cy = (y_min + (y_max - y_min) * 0.8) / h_prod
	else:
	cy = det_pos[1]
	position = (cx, cy)
	print(f"Preset coordinate: {position}")

	if position is None:
	position = det_pos
	print(f"Auto-detected placement center: {position}")

	if mask_path and debug_mask is not None:
	cv2.imwrite(mask_path, debug_mask)
	print(f"Debug mask saved to {mask_path}")

	# Ensure logo has alpha channel
	if logo.shape[2] == 3:
	logo = cv2.cvtColor(logo, cv2.COLOR_BGR2BGRA)

	target_w = int(w_prod * scale_percent)
	aspect_ratio = logo.shape[0] / logo.shape[1]
	target_h = int(target_w * aspect_ratio)
	logo_resized = cv2.resize(logo, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)

	center_x = int(w_prod * position[0])
	center_y = int(h_prod * position[1])
	x1, y1 = center_x - target_w // 2, center_y - target_h // 2
	x2, y2 = x1 + target_w, y1 + target_h

	x1_c, y1_c = max(0, x1), max(0, y1)
	x2_c, y2_c = min(w_prod, x2), min(h_prod, y2)
	logo_x1, logo_y1 = max(0, -x1), max(0, -y1)
	logo_x2 = target_w - max(0, x2 - w_prod)
	logo_y2 = target_h - max(0, y2 - h_prod)

	if x1_c >= x2_c or y1_c >= y2_c:
	cv2.imwrite(output_path, product)
	return {
	"output_path": output_path,
	"position": position,
	"scale_percent": scale_percent,
	"logo_aspect": aspect_ratio
	}

	logo_crop = logo_resized[logo_y1:logo_y2, logo_x1:logo_x2]
	logo_rgb = logo_crop[:, :, :3].astype(float)
	logo_alpha = logo_crop[:, :, 3].astype(float) / 255.0
	roi_alpha_orig = product_alpha_orig[y1_c:y2_c, x1_c:x2_c]

	if debug_mask is not None:
	mask_roi = debug_mask[y1_c:y2_c, x1_c:x2_c].astype(float) / 255.0
	logo_alpha = logo_alpha * mask_roi * roi_alpha_orig
	else:
	logo_alpha = logo_alpha * roi_alpha_orig

	logo_alpha_3d = np.stack([logo_alpha] * 3, axis=-1)

	if has_alpha:
	roi = product[y1_c:y2_c, x1_c:x2_c, :3].astype(float)
	else:
	roi = product[y1_c:y2_c, x1_c:x2_c].astype(float)

	multiply_blend = (roi / 255.0) * (logo_rgb / 255.0) * 255.0
	refined_logo = (logo_rgb * 0.4) + (multiply_blend * 0.6)
	blended_roi = (refined_logo * logo_alpha_3d) + (roi * (1.0 - logo_alpha_3d))

	if has_alpha:
	product[y1_c:y2_c, x1_c:x2_c, :3] = blended_roi.astype(np.uint8)
	else:
	product[y1_c:y2_c, x1_c:x2_c] = blended_roi.astype(np.uint8)

	cv2.imwrite(output_path, product)
	print(f"Successfully generated preview at {output_path}")
	return {
	"output_path": output_path,
	"position": position,
	"scale_percent": scale_percent,
	"logo_aspect": aspect_ratio
	}