PSHuman

Sleeping

App Files Files Community

painter3000 commited on Mar 31

Commit

66043e5

verified ·

1 Parent(s): d233634

Update inference.py

Browse files

- New Version for Up- and Download Fotoset

Files changed (1) hide show

inference.py +218 -190

inference.py CHANGED Viewed

@@ -1,24 +1,28 @@
 import argparse
-import json
 import os
 from pathlib import Path
-from typing import Dict, Optional, List
 from omegaconf import OmegaConf
 from PIL import Image
-from dataclasses import dataclass
-from collections import defaultdict
 import torch
 import torch.utils.checkpoint
 from torchvision.utils import make_grid
 from accelerate.utils import set_seed
 from tqdm.auto import tqdm
-import torch.nn.functional as F
 from einops import rearrange
 from rembg import remove, new_session
 from mvdiffusion.pipelines.pipeline_mvdiffusion_unclip import StableUnCLIPImg2ImgPipeline
 from econdataset import SMPLDataset
 from reconstruct import ReMesh
 providers = [
     ('CUDAExecutionProvider', {
         'device_id': 0,
@@ -32,24 +36,9 @@ session = new_session(providers=providers)
 weight_dtype = torch.float16
-def convert_to_numpy(tensor):
-    return tensor.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to("cpu", torch.uint8).numpy()
-def convert_to_pil(tensor):
-    return Image.fromarray(convert_to_numpy(tensor))
-def save_tensor_image(tensor, fp):
-    ndarr = convert_to_numpy(tensor)
-    save_image_numpy(ndarr, fp)
-    return ndarr
-def save_image_numpy(ndarr, fp):
-    im = Image.fromarray(ndarr)
-    im.save(fp)
 @dataclass
 class TestConfig:
@@ -72,172 +61,200 @@ class TestConfig:
     num_views: int
     enable_xformers_memory_efficient_attention: bool
     with_smpl: Optional[bool]
     recon_opt: Dict
-    # new two-stage settings
-    run_mode: str = "full"               # full | generate | reconstruct
-    multiview_tmp_dir: str = ""
     prefer_edited_views: bool = True
-    save_multiview_metadata: bool = True
-def ensure_rgba(img: Image.Image) -> Image.Image:
-    return img.convert("RGBA") if img.mode != "RGBA" else img
-def get_scene_name(batch, sample_index: int) -> str:
-    return Path(batch['filename'][sample_index]).stem
-def get_scene_dir(base_dir: str, scene: str) -> Path:
-    return Path(base_dir) / scene
-def save_multiview_scene(base_dir: str, scene: str, colors: List[Image.Image], normals: List[Image.Image], meta: Optional[dict] = None):
-    scene_dir = get_scene_dir(base_dir, scene)
     raw_dir = scene_dir / "raw"
     edit_dir = scene_dir / "edit"
-    raw_dir.mkdir(parents=True, exist_ok=True)
-    edit_dir.mkdir(parents=True, exist_ok=True)
     for idx, img in enumerate(colors):
-        img = ensure_rgba(img)
-        img.save(raw_dir / f"color_{idx:02d}.png")
-        img.save(edit_dir / f"color_{idx:02d}.png")
     for idx, img in enumerate(normals):
-        img = ensure_rgba(img)
-        img.save(raw_dir / f"normal_{idx:02d}.png")
-        img.save(edit_dir / f"normal_{idx:02d}.png")
-    if meta is not None:
-        with open(scene_dir / "meta.json", "w", encoding="utf-8") as f:
-            json.dump(meta, f, indent=2)
-def load_multiview_scene(base_dir: str, scene: str, prefer_edit=True):
-    scene_dir = get_scene_dir(base_dir, scene)
-    candidate_dirs = [scene_dir / ("edit" if prefer_edit else "raw"), scene_dir / ("raw" if prefer_edit else "edit")]
-    data_dir = None
-    for cdir in candidate_dirs:
-        if cdir.exists():
-            data_dir = cdir
-            break
-    if data_dir is None:
-        raise FileNotFoundError(f"No multiview directory found for scene '{scene}' under {scene_dir}")
-    color_paths = sorted(data_dir.glob("color_*.png"))
-    normal_paths = sorted(data_dir.glob("normal_*.png"))
-    if not color_paths or not normal_paths:
-        raise FileNotFoundError(f"No color/normal images found in {data_dir}")
-    colors = [ensure_rgba(Image.open(p)) for p in color_paths]
-    normals = [ensure_rgba(Image.open(p)) for p in normal_paths]
     return colors, normals
-def prepare_scene_views(batch, imgs_in, normals_pred, images_pred, out, cfg: TestConfig, save_dir, images_cond, case_id):
-    guidance_scale = cfg.validation_guidance_scales
-    num_views = imgs_in.shape[0] // (out.shape[0] // 2 // cfg.num_views) if False else None  # unused safeguard
-    bsz = out.shape[0] // 2
-    num_views = cfg.num_views
-    scene_results = []
-    if cfg.save_mode == 'concat':
-        cur_dir = os.path.join(save_dir, f"cropsize-{cfg.validation_dataset.crop_size}-cfg{guidance_scale:.1f}-seed{cfg.seed}-smpl-{cfg.with_smpl}")
-        os.makedirs(cur_dir, exist_ok=True)
-        for i in range(bsz // num_views):
-            scene = get_scene_name(batch, i)
-            img_in_ = images_cond[i].to(out.device)
-            vis_ = [img_in_]
-            for j in range(num_views):
-                idx = i * num_views + j
-                normal = normals_pred[idx]
-                color = images_pred[idx]
-                vis_.append(color)
-                vis_.append(normal)
-            out_filename = f"{cur_dir}/{scene}.png"
-            vis_ = torch.stack(vis_, dim=0)
-            vis_ = make_grid(vis_, nrow=len(vis_), padding=0, value_range=(0, 1))
-            save_tensor_image(vis_, out_filename)
-        return scene_results
-    if cfg.save_mode != 'rgb':
-        raise ValueError(f"Unsupported save_mode for two-stage workflow: {cfg.save_mode}")
-    for i in range(bsz // num_views):
-        scene = get_scene_name(batch, i)
-        normals, colors = [], []
-        for j in range(num_views):
-            idx = i * num_views + j
-            normal = normals_pred[idx]
-            if j == 0:
-                color = imgs_in[i * num_views].to(out.device)
-            else:
-                color = images_pred[idx]
-            if j in [3, 4]:
-                normal = torch.flip(normal, dims=[2])
-                color = torch.flip(color, dims=[2])
-            colors.append(color)
-            if j == 6:
-                normal = F.interpolate(normal.unsqueeze(0), size=(256, 256), mode='bilinear', align_corners=False).squeeze(0)
-            normals.append(normal)
-        normals[0][:, :256, 256:512] = normals[-1]
-        color_pils = [ensure_rgba(remove(convert_to_pil(tensor), session=session)) for tensor in colors[:6]]
-        normal_pils = [ensure_rgba(remove(convert_to_pil(tensor), session=session)) for tensor in normals[:6]]
-        meta = None
-        if cfg.save_multiview_metadata:
-            meta = {
-                "scene": scene,
-                "case_id": case_id,
-                "num_colors": len(color_pils),
-                "num_normals": len(normal_pils),
-                "seed": cfg.seed,
-                "run_mode": cfg.run_mode,
-                "crop_size": cfg.validation_dataset.crop_size,
-                "with_smpl": cfg.with_smpl,
-            }
-        scene_results.append((scene, color_pils, normal_pils, meta))
-    return scene_results
 def run_inference(dataloader, econdata, pipeline, carving, cfg: TestConfig, save_dir):
-    if pipeline is not None:
-        pipeline.set_progress_bar_config(disable=True)
     if cfg.seed is None:
         generator = None
     else:
-        device = pipeline.unet.device if pipeline is not None else "cuda"
-        generator = torch.Generator(device=device).manual_seed(cfg.seed)
     for case_id, batch in tqdm(enumerate(dataloader)):
         if cfg.run_mode == "reconstruct":
-            batch_size = len(batch['filename'])
-            for i in range(batch_size):
-                scene = get_scene_name(batch, i)
-                colors, normals = load_multiview_scene(
-                    cfg.multiview_tmp_dir,
-                    scene,
-                    prefer_edit=cfg.prefer_edited_views,
-                )
-                pose = econdata.__getitem__(case_id + i)
-                carving.optimize_case(scene, pose, colors, normals)
-                torch.cuda.empty_cache()
             continue
-        images_cond = batch['imgs_in'][:, 0]
         imgs_in = torch.cat([batch['imgs_in']] * 2, dim=0)
         num_views = imgs_in.shape[1]
         imgs_in = rearrange(imgs_in, "B Nv C H W -> (B Nv) C H W")
@@ -248,8 +265,7 @@ def run_inference(dataloader, econdata, pipeline, carving, cfg: TestConfig, save
         else:
             smpl_in = None
-        normal_prompt_embeddings = batch['normal_prompt_embeddings']
-        clr_prompt_embeddings = batch['color_prompt_embeddings']
         prompt_embeddings = torch.cat([normal_prompt_embeddings, clr_prompt_embeddings], dim=0)
         prompt_embeddings = rearrange(prompt_embeddings, "B Nv N C -> (B Nv) N C")
@@ -265,54 +281,68 @@ def run_inference(dataloader, econdata, pipeline, carving, cfg: TestConfig, save
                 guidance_scale=guidance_scale,
                 output_type='pt',
                 num_images_per_prompt=1,
-                **cfg.pipe_validation_kwargs,
             )
             out = unet_out.images
             bsz = out.shape[0] // 2
             normals_pred = out[:bsz]
             images_pred = out[bsz:]
-        scene_results = prepare_scene_views(
-            batch=batch,
-            imgs_in=imgs_in,
-            normals_pred=normals_pred,
-            images_pred=images_pred,
-            out=out,
-            cfg=cfg,
-            save_dir=save_dir,
-            images_cond=images_cond,
-            case_id=case_id,
-        )
-        if cfg.save_mode == 'concat':
-            continue
-        for i, (scene, colors, normals, meta) in enumerate(scene_results):
-            if cfg.run_mode == "generate":
-                save_multiview_scene(cfg.multiview_tmp_dir, scene, colors, normals, meta=meta)
-                print(f"[PSHuman] Saved multiview scene '{scene}' to {get_scene_dir(cfg.multiview_tmp_dir, scene)}")
                 continue
-            pose = econdata.__getitem__(case_id + i)
-            carving.optimize_case(scene, pose, colors, normals)
-            torch.cuda.empty_cache()
-def load_pshuman_pipeline(cfg):
-    pipeline = StableUnCLIPImg2ImgPipeline.from_pretrained(cfg.pretrained_model_name_or_path, torch_dtype=weight_dtype)
-    pipeline.unet.enable_xformers_memory_efficient_attention()
-    if torch.cuda.is_available():
-        pipeline.to('cuda')
-    return pipeline
 def main(cfg: TestConfig):
     if cfg.seed is not None:
         set_seed(cfg.seed)
     pipeline = None if cfg.run_mode == "reconstruct" else load_pshuman_pipeline(cfg)
     if cfg.with_smpl:
@@ -325,7 +355,7 @@ def main(cfg: TestConfig):
         validation_dataset,
         batch_size=cfg.validation_batch_size,
         shuffle=False,
-        num_workers=cfg.dataloader_num_workers,
     )
     dataset_param = {
@@ -333,14 +363,11 @@ def main(cfg: TestConfig):
         'seg_dir': None,
         'colab': False,
         'has_det': True,
-        'hps_type': 'pixie',
     }
     econdata = SMPLDataset(dataset_param, device='cuda')
     carving = ReMesh(cfg.recon_opt, econ_dataset=econdata)
-    if cfg.run_mode in {"generate", "reconstruct"} and not cfg.multiview_tmp_dir:
-        raise ValueError("multiview_tmp_dir must be provided for run_mode='generate' or 'reconstruct'.")
     run_inference(validation_dataloader, econdata, pipeline, carving, cfg, cfg.save_dir)
@@ -348,6 +375,7 @@ if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('--config', type=str, required=True)
     args, extras = parser.parse_known_args()
     from utils.misc import load_config
     cfg = load_config(args.config, cli_args=extras)

 import argparse
 import os
+import shutil
 from pathlib import Path
+from typing import Dict, Optional, List, Tuple
+from collections import defaultdict
+from dataclasses import dataclass
 from omegaconf import OmegaConf
 from PIL import Image
 import torch
 import torch.utils.checkpoint
+import torch.nn.functional as F
 from torchvision.utils import make_grid
 from accelerate.utils import set_seed
 from tqdm.auto import tqdm
 from einops import rearrange
 from rembg import remove, new_session
 from mvdiffusion.pipelines.pipeline_mvdiffusion_unclip import StableUnCLIPImg2ImgPipeline
 from econdataset import SMPLDataset
 from reconstruct import ReMesh
 providers = [
     ('CUDAExecutionProvider', {
         'device_id': 0,
 weight_dtype = torch.float16
+# ============================================================
+# Config
+# ============================================================
 @dataclass
 class TestConfig:
     num_views: int
     enable_xformers_memory_efficient_attention: bool
     with_smpl: Optional[bool]
     recon_opt: Dict
+    # New two-stage fields
+    run_mode: str = "full"  # full | generate | reconstruct
+    multiview_tmp_dir: str = "./multiview"
     prefer_edited_views: bool = True
+# ============================================================
+# Image helpers
+# ============================================================
+def convert_to_numpy(tensor):
+    return tensor.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to("cpu", torch.uint8).numpy()
+def convert_to_pil(tensor):
+    return Image.fromarray(convert_to_numpy(tensor))
+def save_image_numpy(ndarr, fp):
+    im = Image.fromarray(ndarr)
+    im.save(fp)
+def save_image_tensor(tensor, fp):
+    ndarr = convert_to_numpy(tensor)
+    save_image_numpy(ndarr, fp)
+    return ndarr
+# ============================================================
+# Multiview storage helpers
+# ============================================================
+def ensure_dir(path: Path):
+    path.mkdir(parents=True, exist_ok=True)
+def save_multiview_scene(multiview_root: str, scene: str, colors: List[Image.Image], normals: List[Image.Image]):
+    scene_dir = Path(multiview_root) / scene
     raw_dir = scene_dir / "raw"
     edit_dir = scene_dir / "edit"
+    ensure_dir(raw_dir)
+    ensure_dir(edit_dir)
+    # Clean previous files to avoid stale leftovers
+    for folder in (raw_dir, edit_dir):
+        for p in folder.glob("*"):
+            if p.is_file():
+                p.unlink()
     for idx, img in enumerate(colors):
+        raw_color = raw_dir / f"color_{idx:02d}.png"
+        edit_color = edit_dir / f"color_{idx:02d}.png"
+        img.save(raw_color)
+        img.save(edit_color)
     for idx, img in enumerate(normals):
+        raw_normal = raw_dir / f"normal_{idx:02d}.png"
+        edit_normal = edit_dir / f"normal_{idx:02d}.png"
+        img.save(raw_normal)
+        img.save(edit_normal)
+    meta = {
+        "scene": scene,
+        "num_colors": len(colors),
+        "num_normals": len(normals),
+        "source": "PSHuman two-stage inference",
+    }
+    with open(scene_dir / "meta.json", "w", encoding="utf-8") as f:
+        import json
+        json.dump(meta, f, indent=2)
+def load_multiview_scene(multiview_root: str, scene: str, prefer_edit=True) -> Tuple[List[Image.Image], List[Image.Image]]:
+    scene_dir = Path(multiview_root) / scene
+    preferred = scene_dir / ("edit" if prefer_edit else "raw")
+    fallback = scene_dir / ("raw" if prefer_edit else "edit")
+    base_dir = preferred if preferred.exists() else fallback
+    if not base_dir.exists():
+        raise FileNotFoundError(f"Kein Multiview-Ordner für Szene '{scene}' gefunden: {preferred}")
+    color_paths = sorted(base_dir.glob("color_*.png"))
+    normal_paths = sorted(base_dir.glob("normal_*.png"))
+    if not color_paths:
+        raise FileNotFoundError(f"Keine Color-Bilder gefunden in: {base_dir}")
+    if not normal_paths:
+        raise FileNotFoundError(f"Keine Normalmaps gefunden in: {base_dir}")
+    colors = [Image.open(p).convert("RGBA") for p in color_paths]
+    normals = [Image.open(p).convert("RGBA") for p in normal_paths]
     return colors, normals
+# ============================================================
+# Pipeline helpers
+# ============================================================
+def load_pshuman_pipeline(cfg):
+    pipeline = StableUnCLIPImg2ImgPipeline.from_pretrained(
+        cfg.pretrained_model_name_or_path,
+        torch_dtype=weight_dtype
+    )
+    pipeline.unet.enable_xformers_memory_efficient_attention()
+    if torch.cuda.is_available():
+        pipeline.to('cuda')
+    return pipeline
+def extract_scene_views_for_case(
+    batch,
+    out,
+    imgs_in,
+    i: int,
+    num_views: int,
+):
+    normals_pred = out[: out.shape[0] // 2]
+    images_pred = out[out.shape[0] // 2:]
+    scene = batch['filename'][i].split('.')[0]
+    normals, colors = [], []
+    for j in range(num_views):
+        idx = i * num_views + j
+        normal = normals_pred[idx]
+        # Fix from original code: use scene-local first input image
+        if j == 0:
+            color = imgs_in[i * num_views].to(out.device)
+        else:
+            color = images_pred[idx]
+        if j in [3, 4]:
+            normal = torch.flip(normal, dims=[2])
+            color = torch.flip(color, dims=[2])
+        colors.append(color)
+        if j == 6:
+            normal = F.interpolate(
+                normal.unsqueeze(0),
+                size=(256, 256),
+                mode='bilinear',
+                align_corners=False
+            ).squeeze(0)
+        normals.append(normal)
+    # Preserve original PSHuman behavior
+    if len(normals) >= 2:
+        normals[0][:, :256, 256:512] = normals[-1]
+    # Original code keeps first 6 views only
+    colors_pil = [remove(convert_to_pil(tensor), session=session) for tensor in colors[:6]]
+    normals_pil = [remove(convert_to_pil(tensor), session=session) for tensor in normals[:6]]
+    return scene, colors_pil, normals_pil
+# ============================================================
+# Main inference logic
+# ============================================================
 def run_inference(dataloader, econdata, pipeline, carving, cfg: TestConfig, save_dir):
+    pipeline.set_progress_bar_config(disable=True)
     if cfg.seed is None:
         generator = None
     else:
+        generator = torch.Generator(device=pipeline.unet.device).manual_seed(cfg.seed)
+    images_cond, pred_cat = [], defaultdict(list)
     for case_id, batch in tqdm(enumerate(dataloader)):
+        images_cond.append(batch['imgs_in'][:, 0])
+        # Reconstruct-only path: skip diffusion, load saved views instead
         if cfg.run_mode == "reconstruct":
+            scene = batch['filename'][0].split('.')[0]
+            colors, normals = load_multiview_scene(
+                cfg.multiview_tmp_dir,
+                scene,
+                prefer_edit=cfg.prefer_edited_views
+            )
+            pose = econdata.__getitem__(case_id)
+            carving.optimize_case(scene, pose, colors, normals)
+            torch.cuda.empty_cache()
             continue
         imgs_in = torch.cat([batch['imgs_in']] * 2, dim=0)
         num_views = imgs_in.shape[1]
         imgs_in = rearrange(imgs_in, "B Nv C H W -> (B Nv) C H W")
         else:
             smpl_in = None
+        normal_prompt_embeddings, clr_prompt_embeddings = batch['normal_prompt_embeddings'], batch['color_prompt_embeddings']
         prompt_embeddings = torch.cat([normal_prompt_embeddings, clr_prompt_embeddings], dim=0)
         prompt_embeddings = rearrange(prompt_embeddings, "B Nv N C -> (B Nv) N C")
                 guidance_scale=guidance_scale,
                 output_type='pt',
                 num_images_per_prompt=1,
+                **cfg.pipe_validation_kwargs
             )
             out = unet_out.images
             bsz = out.shape[0] // 2
             normals_pred = out[:bsz]
             images_pred = out[bsz:]
+            if cfg.save_mode == 'concat':
+                pred_cat[f"cfg{guidance_scale:.1f}"].append(torch.cat([normals_pred, images_pred], dim=-1))
+                cur_dir = os.path.join(
+                    save_dir,
+                    f"cropsize-{cfg.validation_dataset.crop_size}-cfg{guidance_scale:.1f}-seed{cfg.seed}-smpl-{cfg.with_smpl}"
+                )
+                os.makedirs(cur_dir, exist_ok=True)
+                for i in range(bsz // num_views):
+                    scene = batch['filename'][i].split('.')[0]
+                    img_in_ = images_cond[-1][i].to(out.device)
+                    vis_ = [img_in_]
+                    for j in range(num_views):
+                        idx = i * num_views + j
+                        normal = normals_pred[idx]
+                        color = images_pred[idx]
+                        vis_.append(color)
+                        vis_.append(normal)
+                    out_filename = f"{cur_dir}/{scene}.png"
+                    vis_ = torch.stack(vis_, dim=0)
+                    vis_ = make_grid(vis_, nrow=len(vis_), padding=0, value_range=(0, 1))
+                    save_image_tensor(vis_, out_filename)
+                # concat mode is only for legacy visualization
                 continue
+            elif cfg.save_mode == 'rgb':
+                for i in range(bsz // num_views):
+                    scene, colors, normals = extract_scene_views_for_case(
+                        batch=batch,
+                        out=out,
+                        imgs_in=imgs_in,
+                        i=i,
+                        num_views=num_views,
+                    )
+                    if cfg.run_mode == "generate":
+                        save_multiview_scene(cfg.multiview_tmp_dir, scene, colors, normals)
+                        continue
+                    # full mode: original one-pass behavior
+                    pose = econdata.__getitem__(case_id)
+                    carving.optimize_case(scene, pose, colors, normals)
+                    torch.cuda.empty_cache()
 def main(cfg: TestConfig):
     if cfg.seed is not None:
         set_seed(cfg.seed)
+    # Reconstruct mode does not need the diffusion pipeline at all
     pipeline = None if cfg.run_mode == "reconstruct" else load_pshuman_pipeline(cfg)
     if cfg.with_smpl:
         validation_dataset,
         batch_size=cfg.validation_batch_size,
         shuffle=False,
+        num_workers=cfg.dataloader_num_workers
     )
     dataset_param = {
         'seg_dir': None,
         'colab': False,
         'has_det': True,
+        'hps_type': 'pixie'
     }
     econdata = SMPLDataset(dataset_param, device='cuda')
     carving = ReMesh(cfg.recon_opt, econ_dataset=econdata)
     run_inference(validation_dataloader, econdata, pipeline, carving, cfg, cfg.save_dir)
     parser = argparse.ArgumentParser()
     parser.add_argument('--config', type=str, required=True)
     args, extras = parser.parse_known_args()
     from utils.misc import load_config
     cfg = load_config(args.config, cli_args=extras)