Spaces:

Khan19970
/

Compositor

Sleeping

App Files Files Community

Compositor / main.py

Khan19970

Update main.py

ae8fd5e verified 23 days ago

raw

history blame contribute delete

18.4 kB

	import io
	import logging
	import traceback
	import numpy as np
	import cv2
	import torch
	from PIL import Image, ImageEnhance
	from fastapi import FastAPI, File, UploadFile, HTTPException
	from fastapi.responses import StreamingResponse
	from fastapi.middleware.cors import CORSMiddleware
	from skimage import color

	# ──────────────────────────────────────────────────────────────────
	# INITIALIZATION & CONFIG
	# ──────────────────────────────────────────────────────────────────

	logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
	logger = logging.getLogger(__name__)

	app = FastAPI(title="Automotive Compositor API - Spyne Pro Edition", version="5.0.0")
	app.add_middleware(CORSMiddleware, allow_origins=[""], allow_methods=[""], allow_headers=["*"])

	_models: dict = {}

	# ──────────────────────────────────────────────────────────────────
	# MODEL MANAGEMENT (Optimized for HuggingFace Spaces)
	# ──────────────────────────────────────────────────────────────────

	def get_model(name: str):
	"""Lazy-loads models into GPU/CPU memory to optimize deployment."""
	if name not in _models:
	device = "cuda" if torch.cuda.is_available() else "cpu"

	if name == "birefnet":
	logger.info("Loading BiRefNet for Segmentation...")
	from transformers import AutoModelForImageSegmentation
	from torchvision import transforms
	# Model load karne ke baad explicitly float32 par force karen, aur cuda agar available ho to
	model = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet_dynamic", trust_remote_code=True)
	model.to(device).eval().float() # Force FP32 to avoid runtime mismatch errors
	transform = transforms.Compose([
	transforms.Resize((1024, 1024)),
	transforms.ToTensor(),
	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
	])
	_models[name] = {"model": model, "transform": transform, "device": device}

	elif name == "yolo_cls":
	logger.info("Loading YOLOv8 Classification...")
	from ultralytics import YOLO
	# YOLO ko load karte hi device par bhein aur float32 par force karen
	model = YOLO("yolov8n-cls.pt")
	model.to(device).float() # Force FP32
	_models[name] = {"model": model}

	elif name == "depth":
	logger.info("Loading Depth Estimator (MiDaS DPT)...")
	# trust_repo=True to avoid security prompt, force to float32
	midas = torch.hub.load("intel-isl/MiDaS", "MiDaS", trust_repo=True)
	midas.to(device).eval().float() # Force FP32
	transforms = torch.hub.load("intel-isl/MiDaS", "transforms", trust_repo=True)
	transform = transforms.default_transform
	_models[name] = {"model": midas, "transform": transform, "device": device}

	return _models[name]

	# ──────────────────────────────────────────────────────────────────
	# INTELLIGENCE & GEOMETRY
	# ──────────────────────────────────────────────────────────────────

	def classify_vehicle(pil_img: Image.Image) -> str:
	"""Identifies high clearance (SUV/Truck) vs low (Sedan/Sports) for shadow calibration."""
	try:
	bundle = get_model("yolo_cls")
	bundle["model"].model.float() # Safety cast
	results = bundle["model"](pil_img, half=False, verbose=False)

	top_class = results[0].probs.top1
	class_name = results[0].names[top_class].lower()

	high_clearance_keywords = ['suv', 'truck', 'pickup', 'bus', 'van', 'jeep']
	return "high" if any(x in class_name for x in high_clearance_keywords) else "low"
	except Exception as e:
	logger.warning(f"Classification failed: {e}. Defaulting to low clearance.")
	return "low"

	def refine_mask(mask: np.ndarray) -> np.ndarray:
	"""Anti-aliasing, edge feathering, and morphological cleanup for production-grade cutouts."""
	# Ensure binary format for morphology
	binary_mask = (mask > 128).astype(np.uint8) * 255

	# Morphological closing for internal hole preservation (wheels/grille)
	kernel_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
	closed_mask = cv2.morphologyEx(binary_mask, cv2.MORPH_CLOSE, kernel_close)

	# Morphological opening for floating artifact removal
	kernel_open = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
	cleaned_mask = cv2.morphologyEx(closed_mask, cv2.MORPH_OPEN, kernel_open)

	float_mask = cleaned_mask.astype(np.float32) / 255.0

	# Feather edges gently with Gaussian blur
	blurred_mask = cv2.GaussianBlur(float_mask, (3, 3), 0)

	# Increase edge crispness
	refined_alpha = np.power(blurred_mask, 1.2) * 255
	return np.clip(refined_alpha, 0, 255).astype(np.uint8)

	def estimate_ground_plane(bg_pil: Image.Image) -> float:
	"""Uses Depth Estimation to locate the physical ground plane vanishing point."""
	try:
	bundle = get_model("depth")
	img_cv = cv2.cvtColor(np.array(bg_pil), cv2.COLOR_RGB2BGR)
	# explicitly cast input to float
	input_batch = bundle["transform"](img_cv).to(bundle["device"]).float()

	with torch.no_grad():
	bundle["model"].float() # Safety cast
	prediction = bundle["model"](input_batch)
	prediction = torch.nn.functional.interpolate(
	prediction.unsqueeze(1),
	size=bg_pil.size[::-1],
	mode="bicubic",
	align_corners=False,
	).squeeze()

	depth_map = prediction.cpu().numpy()

	# Ground plane usually has a smooth depth gradient in the lower half
	h, w = depth_map.shape
	lower_half = depth_map[int(h*0.5):, :]
	gradient_y = cv2.Sobel(lower_half, cv2.CV_64F, 0, 1, ksize=3)

	# Highest vertical gradient area indicates ground perspective change
	y_profile = np.abs(gradient_y).mean(axis=1)
	peak_y = np.argmax(y_profile) + int(h*0.5)

	# Place car slightly below the horizon transition
	return min((peak_y + int(h*0.1)) / h, 0.90)
	except Exception as e:
	logger.warning(f"Depth estimation failed: {e}. Falling back to 0.85.")
	return 0.85

	def apply_multiply_shadow(bg_rgb: np.ndarray, shadow_mask: np.ndarray, base_color: tuple = (20, 25, 30)) -> np.ndarray:
	"""Applies multiply blending for physically accurate shadows."""
	# Ensure background is also float to avoid overflow
	bg_float = bg_rgb.astype(np.float32)
	alpha = np.clip(shadow_mask.astype(np.float32) / 255.0, 0, 1)

	shadow_rgb = np.full_like(bg_rgb, base_color, dtype=np.float32)

	# Multiply Formula: BG * ( (1 - Alpha) + (Shadow_Color/255 * Alpha) )
	# This maintains background texture inside the shadow.
	normalized_shadow = shadow_rgb / 255.0
	multiply_factor = (1.0 - alpha[:, :, None]) + (normalized_shadow * alpha[:, :, None])

	result = bg_float * multiply_factor
	return np.clip(result, 0, 255).astype(np.uint8)

	# ──────────────────────────────────────────────────────────────────
	# CORE PIPELINE ENGINE
	# ──────────────────────────────────────────────────────────────────

	def harmonized_color_lab(car_rgb: np.ndarray, car_mask: np.ndarray, bg_rgb: np.ndarray) -> np.ndarray:
	"""
	Advanced LAB Color Space Relighting Engine.
	Matched ambient color, temperature, and luminance, avoiding 'pasted' cutout.
	"""
	# Extract background ambient lighting (bottom 40% where car will sit)
	bg_h, bg_w = bg_rgb.shape[:2]
	bg_ambient_zone = bg_rgb[int(bg_h*0.6):, :]

	car_lab = color.rgb2lab(car_rgb)
	bg_lab = color.rgb2lab(bg_ambient_zone)

	# Calculate statistics
	car_pixels = car_lab[car_mask > 0.5]
	if len(car_pixels) == 0: return car_rgb

	car_l_mean, car_a_mean, car_b_mean = np.mean(car_pixels, axis=0)
	bg_l_mean, bg_a_mean, bg_b_mean = np.mean(bg_lab, axis=(0,1))

	# FIX: NumPy broadcasting Value Error (removed [:, :, None])
	# Gently shift temperature (30% strength)
	car_lab[:, :, 1] = np.where(car_mask > 0.5, car_lab[:, :, 1] + (bg_a_mean - car_a_mean) * 0.3, car_lab[:, :, 1])
	car_lab[:, :, 2] = np.where(car_mask > 0.5, car_lab[:, :, 2] + (bg_b_mean - car_b_mean) * 0.3, car_lab[:, :, 2])

	# Gentle shift luminance (15% strength)
	l_shift = (bg_l_mean - car_l_mean) * 0.15
	car_lab[:, :, 0] = np.where(car_mask > 0.5, np.clip(car_lab[:, :, 0] + l_shift, 0, 100), car_lab[:, :, 0])

	# Convert back to RGB
	harmonized_rgb = color.lab2rgb(car_lab) * 255.0
	return np.clip(harmonized_rgb, 0, 255).astype(np.uint8)

	def generate_dealership_shadows(bg_np: np.ndarray, car_alpha: np.ndarray, pos: tuple, v_type: str) -> np.ndarray:
	"""Uses the Photoshop Alpha-Shift method to create flawless, perspective-perfect drop shadows."""
	bg_h, bg_w = bg_np.shape[:2]
	cw, ch = car_alpha.shape[::-1]
	px, py = pos

	mask_canvas = np.zeros((bg_h, bg_w), dtype=np.float32)
	y1, y2 = max(py, 0), min(py + ch, bg_h)
	x1, x2 = max(px, 0), min(px + cw, bg_w)

	# Isolate only the bottom 30% of the car mask so the roof doesn't cast a glowing halo
	crop_h = int(ch * 0.30)
	y_start = max(py + ch - crop_h, 0)

	if y2 > y_start and x2 > x1:
	# Slice the bottom of the alpha channel and map it to the canvas
	alpha_crop = car_alpha[ch - (y2 - y_start) : ch, : (x2 - x1)] / 255.0
	mask_canvas[y_start:y2, x1:x2] = alpha_crop

	# 1. Contact Shadow (Tight, dark line right under the rubber)
	shift_c = max(int(ch * 0.015), 2) # Shift mask down ~1.5%
	contact = np.roll(mask_canvas, shift_c, axis=0)
	blur_c = int(cw * 0.02) \| 1
	contact = cv2.GaussianBlur(contact, (blur_c, blur_c), 0)

	# 2. Ambient Undercarriage Shadow (Wide, soft pool)
	shift_a = max(int(ch * 0.04), 5) # Shift mask down ~4%
	ambient = np.roll(mask_canvas, shift_a, axis=0)
	# Anisotropic blur: massive horizontal spread, tight vertical
	blur_ax = int(cw * 0.12) \| 1
	blur_ay = int(ch * 0.05) \| 1
	ambient = cv2.GaussianBlur(ambient, (blur_ax, blur_ay), 0)

	# 3. Combine and Multiply Blend
	combined = (ambient * 0.5) + (contact * 0.9)
	shadow_mask = (np.clip(combined, 0, 1) * 255).astype(np.uint8)

	# Use a realistic, cool slate-grey base color for the multiply blend
	return apply_multiply_shadow(bg_np, shadow_mask, base_color=(15, 20, 25))

	def generate_showroom_reflection(bg_np: np.ndarray, car_rgba: Image.Image, pos: tuple) -> Image.Image:
	"""Creates a seamless reflection that precisely touches the actual tires."""
	bg_h, bg_w = bg_np.shape[:2]
	cw, ch = car_rgba.size
	px, py = pos

	# 1. Flip the tightly cropped car
	car_flipped = car_rgba.transpose(Image.FLIP_TOP_BOTTOM)

	# 2. Squash for perspective distance
	ref_h = int(ch * 0.35)
	car_flipped = car_flipped.resize((cw, ref_h), Image.LANCZOS)
	ref_np = np.array(car_flipped)

	# 3. Soft gradient fade-out
	gradient = np.linspace(1.0, 0.0, ref_h).reshape(-1, 1)
	gradient = np.repeat(gradient, cw, axis=1)
	ref_np[..., 3] = (ref_np[..., 3] * gradient * 0.40).astype(np.uint8)

	# 4. Motion Blur (mimics physical showroom floor texture)
	k_size = int(ref_h * 0.1) \| 1
	ref_bgr = cv2.GaussianBlur(ref_np[..., :3], (7, k_size), 0)
	ref_alpha = cv2.GaussianBlur(ref_np[..., 3], (7, k_size), 0)
	blurred_ref = np.dstack([ref_bgr, ref_alpha])

	canvas = Image.new("RGBA", (bg_w, bg_h), (0, 0, 0, 0))

	# 5. Anchor. Because of the strict crop, py + ch is the absolute physical bottom.
	# We overlap it by 2 pixels to cleanly fuse the shadow and reflection seams.
	target_y = py + ch - 2

	if target_y < bg_h:
	canvas.paste(Image.fromarray(blurred_ref, "RGBA"), (px, target_y))

	return canvas

	def auto_position_car(car_rgba: Image.Image, bg: Image.Image, ground_y_ratio: float):
	"""Calculates perspective-accurate scaling and positioning."""
	bg_w, bg_h = bg.size
	cw, ch = car_rgba.size

	# 1. FIXED SCALESweet spot for a cropped vehicle in a warehouse
	target_w = int(bg_w * 0.72)
	scale = target_w / cw

	# Give it breathing room up top so it doesn't hit the ceiling
	if (ch * scale) > (bg_h * 0.60):
	scale = (bg_h * 0.60) / ch
	target_w = int(cw * scale)

	target_h = int(ch * scale)
	car_res = car_rgba.resize((target_w, target_h), Image.LANCZOS)

	ground_y = int(bg_h * ground_y_ratio)

	px = (bg_w - target_w) // 2
	# target_h IS the absolute bottom of the tires due to the ruthles bounding box crop
	py = ground_y - target_h

	# 2. Safety constraints to prevent boundary pasting errors
	py = max(int(bg_h * 0.15), min(py, bg_h - target_h - int(bg_h * 0.05)))

	return car_res, (px, py)

	def run_pipeline(car_pil: Image.Image, bg_pil: Image.Image) -> Image.Image:
	# Standardize Resolutions (High-Res Output)
	car_pil = car_pil.resize((1536, int(1536 * car_pil.height/car_pil.width)), Image.LANCZOS)
	bg_pil = bg_pil.resize((1920, int(1920 * bg_pil.height/bg_pil.width)), Image.LANCZOS)

	logger.info("1. Classifying Vehicle Geometry...")
	v_type = classify_vehicle(car_pil)

	logger.info("2. Estimating Scene Depth & Ground Plane...")
	ground_ratio = estimate_ground_plane(bg_pil)

	logger.info("3. Executing BiRefNet Segmentation...")
	bundle = get_model("birefnet")
	inp = bundle["transform"](car_pil.convert("RGB")).unsqueeze(0).to(bundle["device"]).float()
	with torch.no_grad():
	bundle["model"].float() # FP32 safety cast
	preds = bundle["model"](inp)

	raw_mask = torch.sigmoid(preds[-1]).squeeze().cpu().numpy()
	raw_mask = (cv2.resize(raw_mask, car_pil.size) * 255).astype(np.uint8)

	logger.info("4. Refining Mask & Edges...")
	refined_alpha = refine_mask(raw_mask)

	# Combine Initial RGBA
	car_rgba_temp = Image.fromarray(np.dstack([np.array(car_pil), refined_alpha]), "RGBA")

	# THE RUTHLESS CROP: Strip every single pixel of transparent padding left by BiRefNet.
	# This guarantees the image boundaries are solid rubber and metal for geometry calcs.
	alpha_np = np.array(car_rgba_temp)[..., 3]
	ys, xs = np.where(alpha_np > 10) # ruthles threshold for solid body
	if len(ys) > 0 and len(xs) > 0:
	strict_bbox = (np.min(xs), np.min(ys), np.max(xs) + 1, np.max(ys) + 1)
	car_rgba_temp = car_rgba_temp.crop(strict_bbox)

	logger.info("5. Calculating Perspective Position...")
	# Because of the crop, py + ch is now mathematically guaranteed to be the lowest tire
	car_positioned, pos = auto_position_car(car_rgba_temp, bg_pil, ground_ratio)

	logger.info("6. Applying LAB Ambient Relighting...")
	c_arr = np.array(car_positioned)
	car_rgb = c_arr[..., :3]
	car_alpha = c_arr[..., 3]
	bg_np = np.array(bg_pil.convert("RGB"))

	harmonized_rgb = harmonized_color_lab(car_rgb, car_alpha / 255.0, bg_np)
	car_final = Image.fromarray(np.dstack([harmonized_rgb, car_alpha]), "RGBA")

	logger.info("7. Rendering Physical Shadows (Alpha-Shift)...")
	bg_with_shadows = generate_dealership_shadows(bg_np, car_alpha, pos, v_type)
	bg_layered = Image.fromarray(bg_with_shadows, "RGB").convert("RGBA")

	logger.info("8. Generating Showroom Floor Reflections...")
	reflection_layer = generate_showroom_reflection(bg_np, car_final, pos)
	bg_layered = Image.alpha_composite(bg_layered, reflection_layer)

	logger.info("9. Finalizing Composition...")
	# Paste car last (non-destructive layering)
	bg_layered.paste(car_final, pos, car_final)

	# Final localized contrast pop (HDR style simulation)
	enhancer = ImageEnhance.Contrast(bg_layered.convert("RGB"))
	final_output = enhancer.enhance(1.05)

	return final_output

	# ──────────────────────────────────────────────────────────────────
	# API ENDPOINTS (FastAPI async structure preserved)
	# ──────────────────────────────────────────────────────────────────

	@app.post("/composite")
	async def composite(car_image: UploadFile = File(...), background_image: UploadFile = File(...)):
	try:
	# FastAPI Preserved structure, internals upgraded
	c_pil = Image.open(io.BytesIO(await car_image.read())).convert("RGB")
	b_pil = Image.open(io.BytesIO(await background_image.read())).convert("RGB")

	result = run_pipeline(c_pil, b_pil)

	buf = io.BytesIO()
	# High quality JPEG output with preserved resolution
	result.save(buf, format="JPEG", quality=95, subsampling=0)
	buf.seek(0)

	return StreamingResponse(buf, media_type="image/jpeg")

	except Exception as e:
	logger.error(f"Pipeline Failure: {traceback.format_exc()}")
	raise HTTPException(status_code=500, detail=str(e))