File size: 11,708 Bytes
0441525 247d630 0441525 247d630 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 | import cv2
import numpy as np
import os
from ultralytics import SAM
# ββ Model loading ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# On HF Spaces, HOME is set to /tmp so ultralytics caches models there.
# MODEL_PATH env var allows overriding (e.g. a pre-downloaded weights file).
MODEL_PATH = os.environ.get("MODEL_PATH", "sam_b.pt")
print(f"Initializing SAM model from: {MODEL_PATH} ...")
try:
model = SAM(MODEL_PATH)
print("SAM model loaded successfully.")
except Exception as e:
print(f"CRITICAL WARNING: SAM model failed to load ({e}). Falling back to traditional methods.")
model = None
# ββ Product detection ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def detect_product_center(image, prompt_point=None, alpha_mask=None):
"""
Detects the product region in the image using SAM ViT-B with point prompting.
Falls back to contour detection if SAM is unavailable.
:param image: BGR numpy array.
:param prompt_point: (x, y) relative coordinate (0.0β1.0).
:param alpha_mask: Optional [H, W] float mask from original transparency.
:returns: (position, mask) where position is relative (cx, cy).
"""
h_img, w_img = image.shape[:2]
# Transparency-Aware Snapping
if alpha_mask is not None:
valid_pixels = np.where(alpha_mask > 0.1)
if len(valid_pixels[0]) > 0:
cy_alpha = np.mean(valid_pixels[0])
cx_alpha = np.mean(valid_pixels[1])
opaque_centroid = (cx_alpha / w_img, cy_alpha / h_img)
if prompt_point is None:
prompt_point = opaque_centroid
else:
py_idx = min(h_img - 1, max(0, int(prompt_point[1] * h_img)))
px_idx = min(w_img - 1, max(0, int(prompt_point[0] * w_img)))
if alpha_mask[py_idx, px_idx] < 0.1:
prompt_point = opaque_centroid
if prompt_point is None:
px, py = w_img // 2, h_img // 2
else:
px, py = int(prompt_point[0] * w_img), int(prompt_point[1] * h_img)
# SAM Inference
if model:
print(f"Attempting Prompted SAM segmentation at point ({px}, {py})...")
try:
results = model.predict(
image,
points=[[px, py]],
labels=[1],
device="cpu",
imgsz=640,
conf=0.3,
verbose=False,
)
best_mask = None
max_score = 0
if results and results[0].masks is not None:
masks = results[0].masks.data
for i, mask_tensor in enumerate(masks):
mask_np = (mask_tensor.cpu().numpy() * 255).astype(np.uint8)
if mask_np.shape != (h_img, w_img):
mask_np = cv2.resize(mask_np, (w_img, h_img), interpolation=cv2.INTER_NEAREST)
area = float(np.sum(mask_np > 0))
if area > (h_img * w_img * 0.95) or area < 1000:
continue
M = cv2.moments(mask_np)
if M["m00"] == 0:
continue
cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]
score = area * (1.1 if i == 0 else 1.0)
if alpha_mask is not None:
mask_float = mask_np.astype(float) / 255.0
alignment = np.sum(mask_float * alpha_mask) / (np.sum(mask_float) + 1)
score *= 1.0 + alignment
if score > max_score:
max_score = score
best_mask = mask_np
if best_mask is not None:
M = cv2.moments(best_mask)
cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]
print(f"SAM success: isolated prompted object (Area: {max_score:.0f})")
return (cx / w_img, cy / h_img), best_mask
except Exception as e:
print(f"SAM Prompted Inference failed: {e}. Falling back to contours.")
# Fallback: Contour Detection
print("Running contour-based detection fallback...")
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
_, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
edges = cv2.Canny(blurred, 20, 150)
combined = cv2.bitwise_or(thresh, edges)
contours, _ = cv2.findContours(combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
best_cnt = None
max_c_score = 0
target_p = np.array([px, py])
for cnt in contours:
area = cv2.contourArea(cnt)
if area < 500:
continue
M = cv2.moments(cnt)
if M["m00"] == 0:
continue
cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]
dist = np.linalg.norm(np.array([cx, cy]) - target_p)
score = area * (1.0 - (dist / w_img))
if score > max_c_score:
max_c_score = score
best_cnt = cnt
if best_cnt is not None:
M = cv2.moments(best_cnt)
cx, cy = M["m10"] / M["m00"], M["m01"] / M["m00"]
mock_mask = np.zeros((h_img, w_img), dtype=np.uint8)
cv2.drawContours(mock_mask, [best_cnt], -1, 255, -1)
return (cx / w_img, cy / h_img), mock_mask
return (0.5, 0.5), np.zeros((h_img, w_img), dtype=np.uint8)
# ββ Logo placement ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def process_logo_placement(
product_path,
logo_path,
scale_percent,
output_path,
position=None,
mask_path=None,
logo_width_mm=None,
logo_height_mm=None,
position_preset=None,
):
"""
Places a logo on a product image with realistic texture blending.
:param product_path: Path to the product image.
:param logo_path: Path to the logo image.
:param scale_percent: Base scale (0.0β1.0) if mm sizing not provided.
:param output_path: Path to save the composited result.
:param position: (x, y) relative position (0.0β1.0). Auto-detected if None.
:param mask_path: Optional path to save the AI debug mask.
:param logo_width_mm: Target logo width in mm.
:param logo_height_mm: Target logo height in mm.
:param position_preset: 'top', 'center', or 'bottom'.
"""
product_raw = cv2.imread(product_path, cv2.IMREAD_UNCHANGED)
logo = cv2.imread(logo_path, cv2.IMREAD_UNCHANGED)
if product_raw is None or logo is None:
raise ValueError("Could not load one or both images.")
h_prod, w_prod = product_raw.shape[:2]
logo_h_orig, logo_w_orig = logo.shape[:2]
logo_aspect = logo_w_orig / logo_h_orig
# Professional mm-based sizing (reference: 500 mm product width)
if logo_width_mm or logo_height_mm:
REF_W_MM = 500.0
target_w_scale = logo_width_mm / REF_W_MM if logo_width_mm else None
target_h_scale = (logo_height_mm * logo_aspect) / REF_W_MM if logo_height_mm else None
if target_w_scale and target_h_scale:
scale_percent = min(target_w_scale, target_h_scale)
print(f"Fitting logo to {logo_width_mm}x{logo_height_mm}mm box β Scale: {scale_percent:.2f}")
elif target_w_scale:
scale_percent = target_w_scale
print(f"Sizing by width: {logo_width_mm}mm β Scale: {scale_percent:.2f}")
else:
scale_percent = target_h_scale
print(f"Sizing by height: {logo_height_mm}mm β Scale: {scale_percent:.2f}")
product = product_raw.copy()
has_alpha = len(product.shape) == 3 and product.shape[2] == 4
if has_alpha:
product_bgr = cv2.cvtColor(product, cv2.COLOR_BGRA2BGR)
product_alpha_orig = product[:, :, 3].astype(float) / 255.0
else:
product_bgr = product
product_alpha_orig = np.ones(product.shape[:2], dtype=float)
h_prod, w_prod = product.shape[:2]
det_pos, debug_mask = detect_product_center(
product_bgr, prompt_point=position, alpha_mask=product_alpha_orig
)
# Apply position preset relative to detected mask bounds
if position_preset and debug_mask is not None:
print(f"Applying position preset: {position_preset}")
y_indices, _ = np.where(debug_mask > 128)
if len(y_indices) > 0:
y_min, y_max = np.min(y_indices), np.max(y_indices)
cx = det_pos[0]
if position_preset == "top":
cy = (y_min + (y_max - y_min) * 0.2) / h_prod
elif position_preset == "bottom":
cy = (y_min + (y_max - y_min) * 0.8) / h_prod
else:
cy = det_pos[1]
position = (cx, cy)
print(f"Preset coordinate: {position}")
if position is None:
position = det_pos
print(f"Auto-detected placement center: {position}")
if mask_path and debug_mask is not None:
cv2.imwrite(mask_path, debug_mask)
print(f"Debug mask saved to {mask_path}")
# Ensure logo has alpha channel
if logo.shape[2] == 3:
logo = cv2.cvtColor(logo, cv2.COLOR_BGR2BGRA)
target_w = int(w_prod * scale_percent)
aspect_ratio = logo.shape[0] / logo.shape[1]
target_h = int(target_w * aspect_ratio)
logo_resized = cv2.resize(logo, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4)
center_x = int(w_prod * position[0])
center_y = int(h_prod * position[1])
x1, y1 = center_x - target_w // 2, center_y - target_h // 2
x2, y2 = x1 + target_w, y1 + target_h
x1_c, y1_c = max(0, x1), max(0, y1)
x2_c, y2_c = min(w_prod, x2), min(h_prod, y2)
logo_x1, logo_y1 = max(0, -x1), max(0, -y1)
logo_x2 = target_w - max(0, x2 - w_prod)
logo_y2 = target_h - max(0, y2 - h_prod)
if x1_c >= x2_c or y1_c >= y2_c:
cv2.imwrite(output_path, product)
return {
"output_path": output_path,
"position": position,
"scale_percent": scale_percent,
"logo_aspect": aspect_ratio
}
logo_crop = logo_resized[logo_y1:logo_y2, logo_x1:logo_x2]
logo_rgb = logo_crop[:, :, :3].astype(float)
logo_alpha = logo_crop[:, :, 3].astype(float) / 255.0
roi_alpha_orig = product_alpha_orig[y1_c:y2_c, x1_c:x2_c]
if debug_mask is not None:
mask_roi = debug_mask[y1_c:y2_c, x1_c:x2_c].astype(float) / 255.0
logo_alpha = logo_alpha * mask_roi * roi_alpha_orig
else:
logo_alpha = logo_alpha * roi_alpha_orig
logo_alpha_3d = np.stack([logo_alpha] * 3, axis=-1)
if has_alpha:
roi = product[y1_c:y2_c, x1_c:x2_c, :3].astype(float)
else:
roi = product[y1_c:y2_c, x1_c:x2_c].astype(float)
multiply_blend = (roi / 255.0) * (logo_rgb / 255.0) * 255.0
refined_logo = (logo_rgb * 0.4) + (multiply_blend * 0.6)
blended_roi = (refined_logo * logo_alpha_3d) + (roi * (1.0 - logo_alpha_3d))
if has_alpha:
product[y1_c:y2_c, x1_c:x2_c, :3] = blended_roi.astype(np.uint8)
else:
product[y1_c:y2_c, x1_c:x2_c] = blended_roi.astype(np.uint8)
cv2.imwrite(output_path, product)
print(f"Successfully generated preview at {output_path}")
return {
"output_path": output_path,
"position": position,
"scale_percent": scale_percent,
"logo_aspect": aspect_ratio
}
|