Spaces:

RyanHangZhou
/

PICS

Sleeping

App Files Files Community

RyanHangZhou commited on Mar 2

Commit

35c4eee

verified ·

1 Parent(s): ff8dd04

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -46

app.py CHANGED Viewed

@@ -140,67 +140,83 @@ def process_composition(item, obj_thr):
 def pics_pairwise_inference(background, img_a, mask_a, img_b, mask_b):
     device = "cuda"
     model.to(device)
-    ddim_sampler = DDIMSampler(model)
-    # 将 Gradio 的 PIL 转为 OpenCV 格式
     back_image = cv2.cvtColor(np.array(background), cv2.COLOR_RGB2BGR)
-    # 模拟你的 run_inference 循环逻辑，组装 item_with_collage
     item_with_collage = {}
-    # 处理 Object 0 和 Object 1
     objs = [(img_a, mask_a), (img_b, mask_b)]
     for j, (img, mask) in enumerate(objs):
-        # 将 PIL 转存为临时文件以适配你的 process_pairs_multiple (或者直接改写该函数接受 numpy)
         temp_patch = f"temp_obj_{j}.png"
         cv2.imwrite(temp_patch, cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR))
         tar_mask = (np.array(mask)[:, :, 0] > 128).astype(np.uint8)
-        # 调用你的原生函数
         item = process_pairs_multiple(tar_mask, back_image, temp_patch, counter=j)
         item_with_collage.update(item)
-    # 调用你的合成函数
     item_with_collage = process_composition(item_with_collage, obj_thr=2)
-    # 执行 Sampling (对应你的 inference 函数内容)
     H, W = 512, 512
-    with torch.no_grad():
-        xc = [get_input_tensor(item_with_collage, f"view{i}", device) for i in range(2)]
-        xc_mask = [get_input_tensor(item_with_collage, f"mask{i}", device) for i in range(2)]
-        c_list = [model.get_learned_conditioning(xc_i) for xc_i in xc]
-        cond_cross = {"pch_code": torch.stack(c_list).permute(1, 2, 3, 0)}
-        c_mask = torch.stack(xc_mask).permute(1, 2, 3, 4, 0).to(device) # 这里保证是 5 维
-        hint = item_with_collage['hint']
-        control = torch.from_numpy(hint.copy()).float().to(device).unsqueeze(0)
-        control = einops.rearrange(control, 'b h w c -> b c h w').clone()
-        cond = {"c_concat": [control], "c_crossattn": [cond_cross], "c_mask": [c_mask]}
-        uc_pch = get_unconditional_conditioning(1, 2, device)
-        un_cond = {"c_concat": [control], "c_crossattn": [uc_pch], "c_mask": [c_mask]}
-        shape = (4, H // 8, W // 8)
-        model.control_scales = [1.0] * 13
-        samples, _ = ddim_sampler.sample(50, 1, shape, cond, verbose=False,
-                                         unconditional_guidance_scale=5.0, unconditional_conditioning=un_cond)
-        x_samples = model.decode_first_stage(samples)
-        x_samples = (einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 127.5).cpu().numpy()
-        pred_rgb = np.clip(x_samples[0], 0, 255).astype(np.uint8)
-        pred_bgr = cv2.cvtColor(pred_rgb, cv2.COLOR_RGB2BGR)
-    # 结果后处理
     side = max(back_image.shape[0], back_image.shape[1])
-    pred = cv2.resize(pred_bgr, (side, side))
-    pred = crop_back(pred, back_image, item_with_collage['extra_sizes'],
-                     item_with_collage['hint_sizes0'], item_with_collage['hint_sizes1'], is_masked=True)
-    return cv2.cvtColor(pred, cv2.COLOR_BGR2RGB)
 with gr.Blocks(title="PICS: Pairwise Spatial Compositing") as demo:
     gr.Markdown("# 🚀 PICS: Pairwise Image Compositing (5-Input Framework)")

 def pics_pairwise_inference(background, img_a, mask_a, img_b, mask_b):
     device = "cuda"
     model.to(device)
+    # 必须在函数内部重新定义一次，确保它拿到的是 cuda 上的 model
+    ddim_sampler = DDIMSampler(model)
+    # 1. 转换 Gradio 输入为 OpenCV BGR 格式 (因为你 process_pairs 内部用的是 BGR 背景)
     back_image = cv2.cvtColor(np.array(background), cv2.COLOR_RGB2BGR)
+    # 2. 模拟 run_inference 的循环，构造 item
     item_with_collage = {}
     objs = [(img_a, mask_a), (img_b, mask_b)]
     for j, (img, mask) in enumerate(objs):
         temp_patch = f"temp_obj_{j}.png"
         cv2.imwrite(temp_patch, cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR))
         tar_mask = (np.array(mask)[:, :, 0] > 128).astype(np.uint8)
+        # 调用你那段“正确”的 process 函数
         item = process_pairs_multiple(tar_mask, back_image, temp_patch, counter=j)
         item_with_collage.update(item)
+    # 3. 合成 Hint
     item_with_collage = process_composition(item_with_collage, obj_thr=2)
+    # 4. 执行你那段“正确”的 inference 函数逻辑
+    # --- START 原装逻辑 ---
+    obj_thr = 2
+    num_samples = 1
     H, W = 512, 512
+    guidance_scale = 5.0
+    xc = []
+    xc_mask = []
+    for i in range(obj_thr):
+        xc.append(get_input(item_with_collage, f"view{i}").to(device))
+        xc_mask.append(get_input(item_with_collage, f"mask{i}"))
+    c_list = [model.get_learned_conditioning(xc_i) for xc_i in xc]
+    c_tensor = torch.stack(c_list).permute(1, 2, 3, 0)
+    cond_cross = {"pch_code": c_tensor}
+    c_mask = torch.stack(xc_mask).permute(1, 2, 3, 4, 0).to(device)
+    hint = item_with_collage['hint']
+    control = torch.from_numpy(hint.copy()).float().to(device)
+    control = torch.stack([control] * num_samples, dim=0)
+    control = einops.rearrange(control, 'b h w c -> b c h w').clone()
+    cond = {"c_concat": [control], "c_crossattn": [cond_cross], "c_mask": [c_mask]}
+    # 这里的 UC 逻辑极其关键，决定了 BBox 会不会变蓝
+    uc_pch = get_unconditional_conditioning(num_samples, obj_thr)
+    # 这里的 get_unconditional_conditioning 内部会用到 model.device，确保它已经是 cuda
+    un_cond = {"c_concat": [control], "c_crossattn": [uc_pch], "c_mask": [c_mask]}
+    shape = (4, H // 8, W // 8)
+    model.control_scales = [1.0] * 13
+    samples, _ = ddim_sampler.sample(
+        50, num_samples, shape, cond,
+        verbose=False, eta=0.0,
+        unconditional_guidance_scale=guidance_scale,
+        unconditional_conditioning=un_cond
+    )
+    x_samples = model.decode_first_stage(samples)
+    x_samples = (einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 127.5).cpu().numpy()
+    pred_rgb = np.clip(x_samples[0], 0, 255).astype(np.uint8)
+    # --- END 原装逻辑 ---
+    # 5. 后处理 (注意 RGB/BGR 转换以适配 crop_back)
+    pred_bgr = cv2.cvtColor(pred_rgb, cv2.COLOR_RGB2BGR)
     side = max(back_image.shape[0], back_image.shape[1])
+    pred_res = cv2.resize(pred_bgr, (side, side))
+    # 这里的 crop_back 依赖 BGR 格式
+    pred_final = crop_back(pred_res, back_image, item_with_collage['extra_sizes'],
+                           item_with_collage['hint_sizes0'], item_with_collage['hint_sizes1'], is_masked=True)
+    # 最后转回 RGB 给 Gradio
+    return cv2.cvtColor(pred_final, cv2.COLOR_BGR2RGB)
 with gr.Blocks(title="PICS: Pairwise Spatial Compositing") as demo:
     gr.Markdown("# 🚀 PICS: Pairwise Image Compositing (5-Input Framework)")