Spaces:
Sleeping
Sleeping
| import base64 | |
| import gc | |
| import io | |
| import uuid | |
| from pathlib import Path | |
| import gradio as gr | |
| import torch | |
| from PIL import Image | |
| from diffusers import ( | |
| ControlNetModel, | |
| StableDiffusionControlNetPipeline, | |
| UniPCMultistepScheduler, | |
| ) | |
| from transformers import pipeline as hf_pipeline | |
| # Базовые модели. Потом можно заменить на свои локальные / любимые. | |
| BASE_SD_ID = "runwayml/stable-diffusion-v1-5" | |
| CONTROLNET_ID = "lllyasviel/sd-controlnet-depth" | |
| DEPTH_MODEL_ID = "Intel/dpt-hybrid-midas" | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32 | |
| ROOT_DIR = Path(__file__).resolve().parent | |
| DATA_DIR = ROOT_DIR / "data" | |
| SD_OUTPUTS_DIR = DATA_DIR / "sd_outputs" | |
| SD_OUTPUTS_DIR.mkdir(parents=True, exist_ok=True) | |
| sd_pipe = None | |
| depth_estimator = None | |
| def get_depth_estimator(): | |
| global depth_estimator | |
| if depth_estimator is None: | |
| depth_estimator = hf_pipeline( | |
| "depth-estimation", | |
| model=DEPTH_MODEL_ID, | |
| device=0 if DEVICE == "cuda" else -1, | |
| ) | |
| return depth_estimator | |
| def get_sd_pipe(): | |
| global sd_pipe | |
| if sd_pipe is None: | |
| controlnet = ControlNetModel.from_pretrained( | |
| CONTROLNET_ID, | |
| torch_dtype=DTYPE, | |
| ) | |
| kwargs = { | |
| "controlnet": controlnet, | |
| "torch_dtype": DTYPE, | |
| "safety_checker": None, | |
| } | |
| if DEVICE == "cuda": | |
| kwargs["variant"] = "fp16" | |
| sd_pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| BASE_SD_ID, | |
| **kwargs, | |
| ) | |
| sd_pipe.scheduler = UniPCMultistepScheduler.from_config(sd_pipe.scheduler.config) | |
| sd_pipe = sd_pipe.to(DEVICE) | |
| return sd_pipe | |
| def decode_data_url_to_image(data_url: str) -> Image.Image: | |
| if not data_url or "," not in data_url: | |
| raise gr.Error("Canvas is empty. Add assets to the scene first.") | |
| _, encoded = data_url.split(",", 1) | |
| binary = base64.b64decode(encoded) | |
| img = Image.open(io.BytesIO(binary)).convert("RGBA") | |
| return img | |
| def flatten_rgba_on_white(img: Image.Image) -> Image.Image: | |
| bg = Image.new("RGBA", img.size, (255, 255, 255, 255)) | |
| merged = Image.alpha_composite(bg, img.convert("RGBA")) | |
| return merged.convert("RGB") | |
| def resize_for_depth_and_sd(img: Image.Image, target_max_side: int = 768) -> Image.Image: | |
| w, h = img.size | |
| scale = min(target_max_side / max(w, h), 1.0) if max(w, h) > 0 else 1.0 | |
| new_w = max(64, int(round((w * scale) / 8) * 8)) | |
| new_h = max(64, int(round((h * scale) / 8) * 8)) | |
| if (new_w, new_h) == (w, h): | |
| return img | |
| return img.resize((new_w, new_h), Image.LANCZOS) | |
| def make_depth_image(scene_image: Image.Image) -> Image.Image: | |
| estimator = get_depth_estimator() | |
| result = estimator(scene_image) | |
| depth = result["depth"] | |
| if not isinstance(depth, Image.Image): | |
| depth = Image.fromarray(depth) | |
| depth = depth.convert("RGB") | |
| if depth.size != scene_image.size: | |
| depth = depth.resize(scene_image.size, Image.LANCZOS) | |
| return depth | |
| def save_image(img: Image.Image, prefix: str) -> str: | |
| path = SD_OUTPUTS_DIR / f"{prefix}_{uuid.uuid4().hex[:8]}.png" | |
| img.save(path) | |
| return str(path) | |
| def generate_with_depth_from_scene( | |
| scene_png_data: str, | |
| prompt: str, | |
| negative_prompt: str, | |
| steps: int, | |
| guidance_scale: float, | |
| controlnet_scale: float, | |
| seed: int, | |
| ): | |
| prompt = (prompt or "").strip() | |
| if not prompt: | |
| raise gr.Error("Prompt is empty.") | |
| rgba_scene = decode_data_url_to_image(scene_png_data) | |
| # Только для depth-оценки. В сам SD эта картинка уже не идёт. | |
| scene_rgb = flatten_rgba_on_white(rgba_scene) | |
| scene_rgb = resize_for_depth_and_sd(scene_rgb, target_max_side=768) | |
| depth_image = make_depth_image(scene_rgb) | |
| pipe = get_sd_pipe() | |
| generator = torch.Generator(device=DEVICE).manual_seed(int(seed)) | |
| result = pipe( | |
| prompt=prompt, | |
| negative_prompt=negative_prompt or None, | |
| image=depth_image, | |
| num_inference_steps=int(steps), | |
| guidance_scale=float(guidance_scale), | |
| controlnet_conditioning_scale=float(controlnet_scale), | |
| generator=generator, | |
| width=depth_image.width, | |
| height=depth_image.height, | |
| ) | |
| output_image = result.images[0] | |
| scene_path = save_image(scene_rgb, "scene_for_depth") | |
| depth_path = save_image(depth_image, "depth") | |
| output_path = save_image(output_image, "sd") | |
| gc.collect() | |
| if DEVICE == "cuda": | |
| torch.cuda.empty_cache() | |
| return scene_path, depth_path, output_path |