GenMake-Crystal-Engine / genmake_engine.py
mhtbhatia's picture
Update genmake_engine.py
17d94be verified
import os
import time
import numpy as np
from PIL import Image
import torch
from transformers import pipeline
import cv2
from rembg import remove, new_session
from photo_prep import studio_enhance_image
from glb_export import export_pointcloud_glb
from genmake_voxel_engine import build_importance_map, build_voxel_volume, sample_points_from_volume
PRODUCTS = {
"keychain_rectangle_20x30x15": {"size": (17.0, 25.5, 12.0)},
"keychain_square_30x30x10": {"size": (25.2, 24.48, 7.82)},
"keychain_heart_30x30x12": {"size": (25.0, 25.0, 9.0)},
"cube_50x50x50": {"size": (42.0, 40.80, 39.10)},
"cube_60x60x60": {"size": (51.0, 51.0, 48.0)},
"cube_80x80x80": {"size": (68.0, 68.0, 65.0)},
"block_50x50x80_portrait": {"size": (42.0, 68.0, 40.80)},
"block_60x60x100_portrait": {"size": (51.0, 85.0, 51.0)},
"block_80x80x120_portrait": {"size": (68.0, 102.0, 68.0)},
"block_80x50x50_landscape": {"size": (68.0, 42.0, 40.80)},
"block_100x60x60_landscape": {"size": (85.0, 51.0, 51.0)},
"plaque_100x100x20": {"size": (85.0, 85.0, 15.0)},
"plaque_150x100x30_landscape": {"size": (125.0, 85.0, 24.0)},
"plaque_100x150x30_portrait": {"size": (85.0, 125.0, 24.0)},
}
QUALITIES = {
"fast": {"points": 900000, "voxel": 0.020},
"balanced": {"points": 1500000, "voxel": 0.015},
"premium": {"points": 2200000, "voxel": 0.012},
}
_DEPTH_MODEL = None
_REMBG_SESSION = None
def _ts():
return time.strftime("%Y%m%d_%H%M%S")
def _ensure(p: str):
os.makedirs(p, exist_ok=True)
def _pil_to_bgr(pil_image):
rgb = np.array(pil_image.convert("RGB"))
return cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
def _resize_for_ai(pil_image: Image.Image, max_size=1024) -> Image.Image:
w, h = pil_image.size
if max(w, h) > max_size:
scale = max_size / float(max(w, h))
pil_image = pil_image.resize((int(w * scale), int(h * scale)), Image.Resampling.LANCZOS)
return pil_image
def _generate_shape_mask(w, h, shape_type, scale, offset_x_pct, offset_y_pct):
mask = np.zeros((h, w), dtype=np.float32)
cx = (w // 2) + int((offset_x_pct / 100.0) * w)
cy = (h // 2) + int((offset_y_pct / 100.0) * h)
if shape_type == "Round":
r = int(min(w, h) * 0.48 * scale)
cv2.circle(mask, (cx, cy), r, 1.0, -1)
elif shape_type == "Square":
s = int(min(w, h) * 0.95 * scale)
x1, y1 = cx - s//2, cy - s//2
x2, y2 = cx + s//2, cy + s//2
cv2.rectangle(mask, (x1, y1), (x2, y2), 1.0, -1)
elif shape_type == "Heart":
t = np.linspace(0, 2 * np.pi, 100)
x = 16 * np.sin(t)**3
y = 13 * np.cos(t) - 5 * np.cos(2*t) - 2 * np.cos(3*t) - np.cos(4*t)
x = (x / 16.0) * (min(w, h) * 0.45 * scale)
y = -(y / 16.0) * (min(w, h) * 0.45 * scale)
pts = np.vstack((x + cx, y + cy)).astype(np.int32).T
cv2.fillPoly(mask, [pts], 1.0)
elif shape_type == "Star":
pts = []
outer_r = min(w, h) * 0.48 * scale
inner_r = outer_r * 0.4
for i in range(10):
angle = i * np.pi / 5 - np.pi / 2
r = outer_r if i % 2 == 0 else inner_r
pts.append([cx + r * np.cos(angle), cy + r * np.sin(angle)])
pts = np.array(pts, dtype=np.int32)
cv2.fillPoly(mask, [pts], 1.0)
else:
mask = np.ones((h, w), dtype=np.float32)
return mask
def _make_depth01(pil_image: Image.Image, device: str, fp16: bool) -> np.ndarray:
global _DEPTH_MODEL
if _DEPTH_MODEL is None:
print("⏳ Loading Depth AI into GPU...")
_DEPTH_MODEL = pipeline("depth-estimation", model="Intel/dpt-hybrid-midas", device=0 if device.startswith("cuda") else -1, torch_dtype=torch.float16 if (fp16 and device.startswith("cuda")) else torch.float32)
print("⚙️ Running Depth Inference...")
out = _DEPTH_MODEL(pil_image)
d = np.array(out["depth"]).astype(np.float32)
mn, mx = float(d.min()), float(d.max())
if mx - mn < 1e-8: return np.zeros_like(d, dtype=np.float32)
return (d - mn) / (mx - mn)
def generate_crystal(image_path, product, face_mode, bg_mode, shape_mask, mask_scale, mask_x, mask_y, diorama_mode, quality, use_enhanced, fp16, detail_boost, depth_vol, out_root="GenMake_Designs"):
global _REMBG_SESSION
print(f"\n🚀 STARTING GENERATION FOR: {product}")
start_time = time.time()
if product not in PRODUCTS: product = "block_50x50x80_portrait"
sx, sy, sz = PRODUCTS[product]["size"]
if quality not in QUALITIES: quality = "balanced"
base_points = QUALITIES[quality]["points"]
voxel_mm = QUALITIES[quality]["voxel"]
max_area = 42.0 * 68.0
area_ratio = (sx * sy) / max_area
dynamic_total_points = int(base_points * area_ratio)
print("📸 Loading Image...")
raw_pil = Image.open(image_path).convert("RGB")
if use_enhanced: raw_pil = studio_enhance_image(raw_pil)
pil = _resize_for_ai(raw_pil, max_size=1024)
bgr = _pil_to_bgr(pil)
device = "cuda" if torch.cuda.is_available() else "cpu"
depth01 = _make_depth01(pil, device=device, fp16=fp16)
# --- UPGRADE 1: Depth Confidence Laplacian Filter ---
print("🧹 Applying Depth Confidence Filter...")
depth_grad = cv2.Laplacian(depth01, cv2.CV_32F)
depth01[np.abs(depth_grad) < 0.002] *= 0.8
print("✂️ Masking...")
if bg_mode != "off":
if _REMBG_SESSION is None: _REMBG_SESSION = new_session("u2net_human_seg")
bg_removed = remove(pil, session=_REMBG_SESSION)
fg_mask = (np.array(bg_removed)[:, :, 3] > 128).astype(np.float32)
fg_mask = cv2.GaussianBlur(fg_mask, (5, 5), 0)
else:
fg_mask = np.ones_like(depth01, np.float32)
if shape_mask not in ["None", "Rectangle"]:
print(f"✂️ Slicing 3D Volume to Shape: {shape_mask}...")
geom = _generate_shape_mask(pil.size[0], pil.size[1], shape_mask, mask_scale, mask_x, mask_y)
fg_mask = fg_mask * geom
if not diorama_mode:
depth01[fg_mask < 0.5] = 0.0
valid_depths = depth01[fg_mask > 0.5]
if len(valid_depths) > 0:
d_min, d_max = float(valid_depths.min()), float(valid_depths.max())
depth01 = np.clip((depth01 - d_min) / (d_max - d_min + 1e-8), 0.0, 1.0)
base = os.path.splitext(os.path.basename(image_path))[0]
out_dir = os.path.join(out_root, f"{base}_{_ts()}", "engine")
_ensure(out_dir)
depth_png = os.path.join(out_dir, "GenMake_Depth.png")
weight_png = os.path.join(out_dir, "GenMake_WeightPreview.png")
glb_path = os.path.join(out_dir, "GenMake_EngraveReady.glb")
Image.fromarray((depth01 * 255).astype(np.uint8)).save(depth_png)
print("🧠 Building HDR Maps...")
imp01 = build_importance_map(bgr, depth01, fg_mask)
Image.fromarray((imp01 * 255).astype(np.uint8)).save(weight_png)
V = build_voxel_volume(depth01, imp01, fg_mask, z_layers=60, thickness_layers=6)
print(f"⚡ Processing 3D Shell...")
points = sample_points_from_volume(V, dynamic_total_points, voxel_mm, sx, sy, sz, detail_boost, depth_vol, diorama_mode=diorama_mode, seed=123)
print("💾 Exporting .GLB...")
export_pointcloud_glb(points, glb_path)
elapsed = time.time() - start_time
print(f"✅ DONE in {elapsed:.2f}s.")
log_text = f"✅ SUCCESS: {product}\nShape Output: {shape_mask}\nTotal Points: {len(points):,}\nTime: {elapsed:.2f}s"
return log_text, [glb_path, depth_png, weight_png], depth_png, weight_png