Spaces:

RyanHangZhou
/

PICS

Running on Zero

App Files Files Community

RyanHangZhou commited on Mar 2

Commit

4d9ba5b

verified ·

1 Parent(s): 8ece528

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -18

app.py CHANGED Viewed

@@ -1,20 +1,51 @@
 import gradio as gr
 import numpy as np
 def pics_pairwise_inference(background, img_a, mask_a, img_b, mask_b):
     """
-    Standard PICS Inference with 5 Explicit Inputs:
-    1. Background Scene
-    2. Object A Image
-    3. Object A Mask
-    4. Object B Image
-    5. Object B Mask
     """
-    # In a real scenario, we would preprocess these 5 inputs here
-    # e.g., result = model.forward(background, img_a, mask_a, img_b, mask_b)
-    # Verification: Returning background to confirm the 5-input pipeline is live
-    return background
 with gr.Blocks(title="PICS: Pairwise Spatial Compositing") as demo:
     gr.Markdown("# 🚀 PICS: Pairwise Image Compositing (5-Input Framework)")
@@ -22,17 +53,14 @@ with gr.Blocks(title="PICS: Pairwise Spatial Compositing") as demo:
     with gr.Row():
         with gr.Column(scale=2):
-            # 1. Background Input
             bg_input = gr.Image(label="1. Scene Background", type="pil")
             with gr.Row():
-                # 2 & 3. Object A Pair
                 with gr.Column():
                     gr.Markdown("### Object A")
                     obj_a_img = gr.Image(label="Image A", type="pil")
                     obj_a_mask = gr.Image(label="Mask A", type="pil")
-                # 4 & 5. Object B Pair
                 with gr.Column():
                     gr.Markdown("### Object B")
                     obj_b_img = gr.Image(label="Image B", type="pil")
@@ -41,16 +69,14 @@ with gr.Blocks(title="PICS: Pairwise Spatial Compositing") as demo:
             run_btn = gr.Button("Execute PICS Inference ✨", variant="primary")
         with gr.Column(scale=1):
-            # Result Section
             output_img = gr.Image(label="PICS Composite Result")
             gr.Markdown("""
             ### 🔬 Technical Requirements
-            * **Pairwise Reasoning**: The model takes 5 distinct inputs to compute depth, occlusion, and lighting.
-            * **Mask Alignment**: Ensure Mask A/B perfectly align with Image A/B.
-            * **InfiniKin Powered**: Trained on high-fidelity synthetic pairs generated by the InfiniKin engine.
             """)
-    # --- Linking all 5 inputs to the inference function ---
     run_btn.click(
         fn=pics_pairwise_inference,
         inputs=[bg_input, obj_a_img, obj_a_mask, obj_b_img, obj_b_mask],

 import gradio as gr
 import numpy as np
+import os
+import sys
+import torch
+from omegaconf import OmegaConf
+from huggingface_hub import snapshot_download
+# 1. Download assets (Code + Checkpoints) from your Model Repo
+# This ensures we have the same environment as your training setup
+REPO_DIR = snapshot_download(repo_id="Hang2991/PICS")
+sys.path.append(REPO_DIR)
+# 2. Import components from your uploaded code
+# Note: Ensure these paths match your folder structure in the PICS repo
+from cldm.model import create_model, load_state_dict
+from cldm.ddim_hacked import DDIMSampler
+# 3. Hardware Adaptive Setup
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# 4. Load Configurations and Initialize Model
+# We use os.path.join to handle the dynamic directory from snapshot_download
+config = OmegaConf.load(os.path.join(REPO_DIR, 'configs/inference.yaml'))
+model_ckpt = os.path.join(REPO_DIR, config.pretrained_model)
+model_config = os.path.join(REPO_DIR, config.config_file)
+# Initialize the model (using your specific 'create_model' function)
+model = create_model(model_config).to(device)
+model.load_state_dict(load_state_dict(model_ckpt, location=device))
+model.eval() # Set to evaluation mode
+ddim_sampler = DDIMSampler(model)
 def pics_pairwise_inference(background, img_a, mask_a, img_b, mask_b):
     """
+    Main PICS Inference Logic.
+    This function bridges the Gradio UI with your core algorithm.
     """
+    # NOTE: You might need to add image preprocessing here
+    # (e.g., resizing to 512x512, converting to tensors, etc.)
+    with torch.no_grad():
+        # Using 'model' (the variable we initialized above)
+        # Assuming your model has a .inference() or similar method
+        # If your actual inference logic is different, replace this line:
+        result = model.inference(background, img_a, mask_a, img_b, mask_b)
+    return result
 with gr.Blocks(title="PICS: Pairwise Spatial Compositing") as demo:
     gr.Markdown("# 🚀 PICS: Pairwise Image Compositing (5-Input Framework)")
     with gr.Row():
         with gr.Column(scale=2):
             bg_input = gr.Image(label="1. Scene Background", type="pil")
             with gr.Row():
                 with gr.Column():
                     gr.Markdown("### Object A")
                     obj_a_img = gr.Image(label="Image A", type="pil")
                     obj_a_mask = gr.Image(label="Mask A", type="pil")
                 with gr.Column():
                     gr.Markdown("### Object B")
                     obj_b_img = gr.Image(label="Image B", type="pil")
             run_btn = gr.Button("Execute PICS Inference ✨", variant="primary")
         with gr.Column(scale=1):
             output_img = gr.Image(label="PICS Composite Result")
             gr.Markdown("""
             ### 🔬 Technical Requirements
+            * **Pairwise Reasoning**: Computing occlusion and interaction for A & B.
+            * **Mask Alignment**: Masks must perfectly match the objects.
             """)
+    # Linking the 5 inputs to the inference function
     run_btn.click(
         fn=pics_pairwise_inference,
         inputs=[bg_input, obj_a_img, obj_a_mask, obj_b_img, obj_b_mask],