Spaces:

aadarsh99
/

ConvSeg

Running on Zero

App Files Files Community

aadarsh99 commited on 28 days ago

Commit

b6001f7

1 Parent(s): eb03911

update app

Browse files

Files changed (1) hide show

app.py +52 -44

app.py CHANGED Viewed

@@ -36,8 +36,11 @@ SQUARE_DIM = 1024
 logging.basicConfig(level=logging.INFO)
-# ----------------- Overlay Style Helpers -----------------
 EDGE_COLORS_HEX = ["#3A86FF", "#FF006E", "#43AA8B", "#F3722C", "#8338EC", "#90BE6D"]
 def _hex_to_rgb(h: str):
@@ -176,8 +179,6 @@ def _unpad_and_resize_pred_to_gt(logit_sq: torch.Tensor, meta: dict, out_hw: tup
     up = F.interpolate(crop, size=out_hw, mode="bilinear", align_corners=False)
     return up[0, 0]
-# ----------------- Model Logic -----------------
 def get_text_to_image_attention(decoder: MaskDecoder):
     two_way = decoder.transformer
     attn_blocks = []
@@ -198,13 +199,29 @@ def get_text_to_image_attention(decoder: MaskDecoder):
     text_attn = attn[..., n_output_tokens:, :]
     return text_attn
-def load_models_cpu():
-    print("Loading models on CPU...")
-    # 1. Base SAM2 Model (Raw Model, not Predictor)
     if not os.path.exists(BASE_CKPT_NAME):
         raise FileNotFoundError(f"{BASE_CKPT_NAME} not found")
     model = build_sam2(SAM2_CONFIG, BASE_CKPT_NAME, device="cpu")
     # 2. Fine-tuned Weights
@@ -212,9 +229,11 @@ def load_models_cpu():
         raise FileNotFoundError(f"{FINAL_CKPT_NAME} not found")
     sd = torch.load(FINAL_CKPT_NAME, map_location="cpu")
-    # Load into the model directly
     model.load_state_dict(sd.get("model", sd), strict=True)
-    model.eval()
     # 3. PLM Adapter
     C = model.sam_mask_decoder.transformer_dim
@@ -228,10 +247,9 @@ def load_models_cpu():
         lora_alpha=32,
         lora_dropout=0.05,
         dtype=torch.bfloat16,
-        device="cpu",
-    ).to("cpu")
-    plm.eval()
     if not os.path.exists(PLM_CKPT_NAME):
         raise FileNotFoundError(f"{PLM_CKPT_NAME} not found")
@@ -240,38 +258,34 @@ def load_models_cpu():
     if LORA_CKPT_NAME and os.path.exists(LORA_CKPT_NAME):
         plm.load_lora(LORA_CKPT_NAME)
-    print("Models loaded successfully (CPU).")
-    return model, plm
-# Initialize global models on CPU
-try:
-    # NOTE: We hold the raw MODEL_SAM here, not the predictor
-    MODEL_SAM, PLM = load_models_cpu()
-except Exception as e:
-    print(f"Error loading models: {e}")
-    traceback.print_exc()
-    MODEL_SAM, PLM = None, None
-@spaces.GPU(duration=60) # Ensure we have enough time (default is often 60s)
 def run_prediction(image_pil, text_prompt):
-    if MODEL_SAM is None or PLM is None:
-        return None, None, None
     if image_pil is None or not text_prompt:
         return None, None, None
     predictor = None
     try:
-        # 1. Move models to GPU
-        print("Moving models to CUDA...")
-        MODEL_SAM.to("cuda")
-        PLM.to("cuda")
-        # 2. Instantiate Predictor ON GPU (Crucial Fix)
-        # This ensures the predictor knows it's on CUDA
-        predictor = SAM2ImagePredictor(MODEL_SAM)
         # 3. Preprocess Image
         rgb_orig = np.array(image_pil.convert("RGB"))
@@ -280,7 +294,6 @@ def run_prediction(image_pil, text_prompt):
         rgb_sq = _resize_pad_square(rgb_orig, SQUARE_DIM, is_mask=False)
         # 4. SAM2 Image Encoding
-        # set_image puts features on the model's device
         predictor.set_image(rgb_sq)
         image_emb = predictor._features["image_embed"][-1].unsqueeze(0)
         hi = [lvl[-1].unsqueeze(0) for lvl in predictor._features["high_res_feats"]]
@@ -290,11 +303,9 @@ def run_prediction(image_pil, text_prompt):
         temp_path = "temp_input.jpg"
         image_pil.save(temp_path)
-        # PLM inference usually handles device mapping internally if written well,
-        # but we ensure inputs are passed cleanly.
-        sp, dp = PLM([text_prompt], H_feat, W_feat, [temp_path])
-        # 6. Prepare SAM2 Decoder inputs (ensure they are on CUDA)
         dec = predictor.model.sam_mask_decoder
         dev = next(dec.parameters()).device
         dtype = next(dec.parameters()).dtype
@@ -354,13 +365,10 @@ def run_prediction(image_pil, text_prompt):
     except Exception as e:
         print("An error occurred during inference:")
         traceback.print_exc()
-        raise e # Let Gradio show the error
     finally:
-        # Cleanup: Move models back to CPU
-        print("Moving models back to CPU...")
-        MODEL_SAM.to("cpu")
-        PLM.to("cpu")
         if predictor:
             del predictor
         torch.cuda.empty_cache()

 logging.basicConfig(level=logging.INFO)
+# ----------------- Globals (Lazy Loading) -----------------
+MODEL_SAM = None
+PLM = None
+# ----------------- Overlay Style Helpers -----------------
 EDGE_COLORS_HEX = ["#3A86FF", "#FF006E", "#43AA8B", "#F3722C", "#8338EC", "#90BE6D"]
 def _hex_to_rgb(h: str):
     up = F.interpolate(crop, size=out_hw, mode="bilinear", align_corners=False)
     return up[0, 0]
 def get_text_to_image_attention(decoder: MaskDecoder):
     two_way = decoder.transformer
     attn_blocks = []
     text_attn = attn[..., n_output_tokens:, :]
     return text_attn
+# ----------------- Model Loading -----------------
+def load_models_lazy():
+    """
+    Loads the models. This must be called INSIDE the @spaces.GPU context
+    so that devices match (everything on 'cuda' or 'zero').
+    """
+    global MODEL_SAM, PLM
+    if MODEL_SAM is not None and PLM is not None:
+        return MODEL_SAM, PLM
+    print("Lazy loading models inside GPU context...")
+    # 1. Base SAM2 Model
     if not os.path.exists(BASE_CKPT_NAME):
         raise FileNotFoundError(f"{BASE_CKPT_NAME} not found")
+    # On ZeroGPU, we can load to 'cuda' directly, or 'cpu' then move.
+    # To be safe against the deepcopy error, we load to cpu then move.
+    # If the deepcopy error persists, we might need to load directly to 'cuda'.
+    # Let's try CPU load -> move to cuda.
     model = build_sam2(SAM2_CONFIG, BASE_CKPT_NAME, device="cpu")
     # 2. Fine-tuned Weights
         raise FileNotFoundError(f"{FINAL_CKPT_NAME} not found")
     sd = torch.load(FINAL_CKPT_NAME, map_location="cpu")
     model.load_state_dict(sd.get("model", sd), strict=True)
+    # Move SAM to CUDA now
+    model.to("cuda")
+    MODEL_SAM = model
     # 3. PLM Adapter
     C = model.sam_mask_decoder.transformer_dim
         lora_alpha=32,
         lora_dropout=0.05,
         dtype=torch.bfloat16,
+        device="cpu", # Init on CPU
+    )
     if not os.path.exists(PLM_CKPT_NAME):
         raise FileNotFoundError(f"{PLM_CKPT_NAME} not found")
     if LORA_CKPT_NAME and os.path.exists(LORA_CKPT_NAME):
         plm.load_lora(LORA_CKPT_NAME)
+    # Move PLM to CUDA
+    plm.to("cuda")
+    plm.eval()
+    PLM = plm
+    print("Models loaded successfully.")
+    return MODEL_SAM, PLM
+@spaces.GPU(duration=120) # Increased duration for first-time load
 def run_prediction(image_pil, text_prompt):
     if image_pil is None or not text_prompt:
         return None, None, None
     predictor = None
     try:
+        # 1. Ensure models are loaded (Lazy Load)
+        model_sam, plm = load_models_lazy()
+        # 2. Instantiate Predictor
+        # We assume models are already on CUDA from load_models_lazy
+        # Just to be sure, we can call .to("cuda") again (cheap if already there)
+        model_sam.to("cuda")
+        plm.to("cuda")
+        predictor = SAM2ImagePredictor(model_sam)
         # 3. Preprocess Image
         rgb_orig = np.array(image_pil.convert("RGB"))
         rgb_sq = _resize_pad_square(rgb_orig, SQUARE_DIM, is_mask=False)
         # 4. SAM2 Image Encoding
         predictor.set_image(rgb_sq)
         image_emb = predictor._features["image_embed"][-1].unsqueeze(0)
         hi = [lvl[-1].unsqueeze(0) for lvl in predictor._features["high_res_feats"]]
         temp_path = "temp_input.jpg"
         image_pil.save(temp_path)
+        sp, dp = plm([text_prompt], H_feat, W_feat, [temp_path])
+        # 6. Prepare SAM2 Decoder inputs
         dec = predictor.model.sam_mask_decoder
         dev = next(dec.parameters()).device
         dtype = next(dec.parameters()).dtype
     except Exception as e:
         print("An error occurred during inference:")
         traceback.print_exc()
+        raise e
     finally:
+        # Cleanup
         if predictor:
             del predictor
         torch.cuda.empty_cache()