Spaces:
Sleeping
Sleeping
CORE-ONLY: skip detail sharpener stage (Lotus-1-style single-stage), median disabled for clean experiment
fd9fd6a verified | import spaces # must be first! | |
| import sys | |
| import os | |
| import torch | |
| from PIL import Image | |
| import gradio as gr | |
| from glob import glob | |
| from contextlib import nullcontext | |
| import numpy as np | |
| import cv2 | |
| import tempfile | |
| from pipeline import Lotus2Pipeline | |
| from diffusers import ( | |
| FlowMatchEulerDiscreteScheduler, | |
| FluxTransformer2DModel, | |
| ) | |
| from infer import ( | |
| load_all_task_weights, | |
| process_single_image, | |
| ) | |
| from huggingface_hub import login | |
| login(token=os.getenv("HF_TOKEN")) | |
| pipeline = None | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| weight_dtype = torch.bfloat16 | |
| TASKS = ("depth", "normal") | |
| def load_pipeline(): | |
| global pipeline, device, weight_dtype | |
| noise_scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained( | |
| 'black-forest-labs/FLUX.1-dev', subfolder="scheduler", num_train_timesteps=10 | |
| ) | |
| transformer = FluxTransformer2DModel.from_pretrained( | |
| 'black-forest-labs/FLUX.1-dev', subfolder="transformer", revision=None, variant=None | |
| ) | |
| transformer.requires_grad_(False) | |
| transformer.to(device=device, dtype=weight_dtype) | |
| # Load BOTH tasks' adapters (depth + normal) + both LCMs onto the one | |
| # shared FLUX transformer. set_adapter() switches between them per pass. | |
| transformer, lcms = load_all_task_weights(transformer, TASKS) | |
| pipeline = Lotus2Pipeline.from_pretrained( | |
| 'black-forest-labs/FLUX.1-dev', | |
| scheduler=noise_scheduler, | |
| transformer=transformer, | |
| revision=None, | |
| variant=None, | |
| torch_dtype=weight_dtype, | |
| ) | |
| pipeline._lcms = lcms | |
| pipeline = pipeline.to(device) | |
| def _save_raw_outputs(output_npy, task): | |
| """Lossless raw float32 .npy + 16-bit PNG. Depth .npy is (H,W) in [0,1]; | |
| normal .npy is (H,W,3) in [0,1] = (n+1)/2 (directly load_normals-compatible).""" | |
| out_dir = tempfile.mkdtemp(prefix="lotus2_") | |
| npy_path = os.path.join(out_dir, f"{task}.npy") | |
| np.save(npy_path, output_npy.astype(np.float32)) | |
| arr16 = (np.clip(output_npy, 0.0, 1.0) * 65535.0 + 0.5).astype(np.uint16) | |
| png16_path = os.path.join(out_dir, f"{task}_16bit.png") | |
| if arr16.ndim == 3: # normal: RGB array, cv2 writes BGR -> swap | |
| cv2.imwrite(png16_path, arr16[:, :, ::-1]) | |
| else: # depth: single-channel 16-bit grayscale | |
| cv2.imwrite(png16_path, arr16) | |
| return npy_path, png16_path | |
| def _run_task(image_path, task, process_res): | |
| # Activate this task's adapters + LCM, then run the standard pipeline. | |
| pipeline._core_adapter_name = f"{task}_core_predictor" | |
| pipeline._sharpener_adapter_name = f"{task}_detail_sharpener" | |
| pipeline.local_continuity_module = pipeline._lcms[task] | |
| _, output_vis, output_npy = process_single_image( | |
| image_path, pipeline, | |
| task_name=task, | |
| device=device, | |
| num_inference_steps=10, | |
| process_res=int(process_res), | |
| core_only=True, # this Space = core-only experiment (skip detail sharpener) | |
| ) | |
| npy_path, png16_path = _save_raw_outputs(output_npy, task) | |
| return output_vis, npy_path, png16_path | |
| def fn(image_path, process_res=1024): | |
| global pipeline | |
| pipeline.set_progress_bar_config(disable=True) | |
| # process_res is a CEILING: the model predicts at min(input long side, | |
| # process_res). Output array = input size. So a larger process_res only | |
| # helps if the input is at least that large (upscale it first). | |
| process_res = int(process_res) if process_res else 1024 | |
| with nullcontext(): | |
| depth_vis, depth_npy, depth_png16 = _run_task(image_path, "depth", process_res) | |
| normal_vis, normal_npy, normal_png16 = _run_task(image_path, "normal", process_res) | |
| inp = Image.open(image_path) | |
| return ( | |
| [inp, depth_vis], | |
| [inp, normal_vis], | |
| depth_npy, depth_png16, | |
| normal_npy, normal_png16, | |
| ) | |
| def build_demo(): | |
| inputs = [ | |
| gr.Image(label="Image", type="filepath"), | |
| gr.Slider(512, 2048, value=1024, step=128, | |
| label="Process resolution (detail cap). Output detail = min(input long side, this). " | |
| "Higher = finer but much more VRAM/time; >1536 may OOM on this GPU. " | |
| "Feed an input at least this large (upscale first) to benefit."), | |
| ] | |
| outputs = [ | |
| gr.ImageSlider(label="Depth", type="pil", slider_position=20), | |
| gr.ImageSlider(label="Normal", type="pil", slider_position=20), | |
| gr.File(label="Raw float32 depth.npy (lossless, [0,1])"), | |
| gr.File(label="16-bit depth PNG"), | |
| gr.File(label="Raw float32 normal.npy (lossless, [0,1])"), | |
| gr.File(label="16-bit normal PNG"), | |
| ] | |
| _ex = (glob("assets/demo_examples/depth/*.png") | |
| + glob("assets/demo_examples/depth/*.jpg")) | |
| examples = [[p, 1024] for p in _ex] # [image, process_res] per the 2 inputs | |
| demo = gr.Interface( | |
| fn=fn, | |
| title="Lotus-2 Geometry CORE-ONLY (experiment): Depth + Normal, single-stage", | |
| description=""" | |
| <strong>Core-only variant of Lotus-2 Geometry.</strong> Skips the detail-sharpener | |
| second stage entirely — the LCM output goes straight to the VAE decoder. | |
| Mirrors Lotus-1's single-stage smoothness, on Lotus-2's improved base predictor. | |
| <br><br> | |
| No post-processing (no median de-grid) so the raw model behaviour is visible. | |
| Returns previews + raw float32 <strong>.npy</strong> + 16-bit PNG for both depth and normal. | |
| """, | |
| inputs=inputs, | |
| outputs=outputs, | |
| examples=examples, | |
| examples_per_page=10, | |
| cache_examples=False, | |
| ) | |
| return demo | |
| def main(): | |
| load_pipeline() | |
| demo = build_demo() | |
| demo.launch( | |
| # server_name="0.0.0.0", | |
| # server_port=6383, | |
| ) | |
| if __name__ == "__main__": | |
| main() | |