Spaces:

aadarsh99
/

ConverSeg

Running on Zero

App Files Files Community

aadarsh99 commited on Jan 6

Commit

9d1694a

1 Parent(s): 8f65995

update app

Browse files

Files changed (1) hide show

app.py +42 -34

app.py CHANGED Viewed

@@ -32,9 +32,11 @@ LORA_CKPT_NAME = None
 SQUARE_DIM = 1024
 logging.basicConfig(level=logging.INFO)
-# ----------------- Globals (Lazy Loading) -----------------
-MODEL_SAM = None
-PLM = None
 # ----------------- Helper: Download Logic -----------------
 def download_if_needed(filename):
@@ -46,10 +48,11 @@ def download_if_needed(filename):
         logging.info(f"Found local file: {filename}")
         return filename
-    logging.info(f"{filename} not found locally. Downloading from {HF_REPO_ID}...")
     try:
         path = hf_hub_download(repo_id=HF_REPO_ID, filename=filename)
-        logging.info(f"Downloaded to: {path}")
         return path
     except Exception as e:
         raise FileNotFoundError(f"Could not find {filename} locally or in HF repo {HF_REPO_ID}. Error: {e}")
@@ -170,23 +173,24 @@ def _unpad_and_resize_pred_to_gt(logit_sq: torch.Tensor, meta: dict, out_hw: tup
     up = F.interpolate(crop, size=out_hw, mode="bilinear", align_corners=False)
     return up[0, 0]
-# ----------------- Model Loading -----------------
-def load_models_lazy():
     """
-    Loads the models. This must be called INSIDE the @spaces.GPU context.
     """
-    global MODEL_SAM, PLM
-    if MODEL_SAM is not None and PLM is not None:
-        return MODEL_SAM, PLM
-    print("Lazy loading models inside GPU context...")
     # 1. Base SAM2 Model
     base_path = download_if_needed(BASE_CKPT_NAME)
-    # Init on CPU
     model = build_sam2(SAM2_CONFIG, base_path, device="cpu")
     # 2. Fine-tuned Weights
@@ -194,9 +198,8 @@ def load_models_lazy():
     sd = torch.load(final_path, map_location="cpu")
     model.load_state_dict(sd.get("model", sd), strict=True)
-    # Move SAM to CUDA now
-    model.to("cuda")
-    MODEL_SAM = model
     # 3. PLM Adapter
     C = model.sam_mask_decoder.transformer_dim
@@ -210,7 +213,7 @@ def load_models_lazy():
         lora_alpha=32,
         lora_dropout=0.05,
         dtype=torch.bfloat16,
-        device="cpu", # Init on CPU
     )
     plm_path = download_if_needed(PLM_CKPT_NAME)
@@ -221,31 +224,30 @@ def load_models_lazy():
         lora_path = download_if_needed(LORA_CKPT_NAME)
         plm.load_lora(lora_path)
-    # Move PLM to CUDA
-    plm.to("cuda")
     plm.eval()
-    PLM = plm
-    print("Models loaded successfully.")
-    return MODEL_SAM, PLM
-@spaces.GPU(duration=180)
 def run_prediction(image_pil, text_prompt):
     if image_pil is None or not text_prompt:
         return None, None
     predictor = None
     try:
-        # 1. Ensure models are loaded (Lazy Load)
-        model_sam, plm = load_models_lazy()
-        # 2. Instantiate Predictor
-        model_sam.to("cuda")
-        plm.to("cuda")
-        predictor = SAM2ImagePredictor(model_sam)
         # 3. Preprocess Image
         rgb_orig = np.array(image_pil.convert("RGB"))
@@ -263,7 +265,7 @@ def run_prediction(image_pil, text_prompt):
         temp_path = "temp_input.jpg"
         image_pil.save(temp_path)
-        sp, dp = plm([text_prompt], H_feat, W_feat, [temp_path])
         # 6. Prepare SAM2 Decoder inputs
         dec = predictor.model.sam_mask_decoder
@@ -306,7 +308,13 @@ def run_prediction(image_pil, text_prompt):
         raise e
     finally:
-        # Cleanup
         if predictor:
             del predictor
         torch.cuda.empty_cache()

 SQUARE_DIM = 1024
 logging.basicConfig(level=logging.INFO)
+# ----------------- Globals (Ram Cache) -----------------
+# We keep these on CPU globally so they persist between runs
+# without taking up GPU memory (which gets reset).
+MODEL_SAM_CPU = None
+PLM_CPU = None
 # ----------------- Helper: Download Logic -----------------
 def download_if_needed(filename):
         logging.info(f"Found local file: {filename}")
         return filename
+    # hf_hub_download checks the cache automatically.
+    # It won't re-download if the file is already in the HF cache.
+    logging.info(f"Checking HF Cache for {filename}...")
     try:
         path = hf_hub_download(repo_id=HF_REPO_ID, filename=filename)
         return path
     except Exception as e:
         raise FileNotFoundError(f"Could not find {filename} locally or in HF repo {HF_REPO_ID}. Error: {e}")
     up = F.interpolate(crop, size=out_hw, mode="bilinear", align_corners=False)
     return up[0, 0]
+# ----------------- Model Loading (CPU Caching) -----------------
+def ensure_models_loaded_on_cpu():
     """
+    Ensures models are loaded in Global CPU RAM.
+    This avoids re-reading from disk/cache on every run.
     """
+    global MODEL_SAM_CPU, PLM_CPU
+    if MODEL_SAM_CPU is not None and PLM_CPU is not None:
+        return # Already loaded in RAM
+    logging.info("Loading models into CPU RAM (this happens once)...")
     # 1. Base SAM2 Model
     base_path = download_if_needed(BASE_CKPT_NAME)
+    # Build on CPU
     model = build_sam2(SAM2_CONFIG, base_path, device="cpu")
     # 2. Fine-tuned Weights
     sd = torch.load(final_path, map_location="cpu")
     model.load_state_dict(sd.get("model", sd), strict=True)
+    # Save to Global (CPU)
+    MODEL_SAM_CPU = model
     # 3. PLM Adapter
     C = model.sam_mask_decoder.transformer_dim
         lora_alpha=32,
         lora_dropout=0.05,
         dtype=torch.bfloat16,
+        device="cpu",
     )
     plm_path = download_if_needed(PLM_CKPT_NAME)
         lora_path = download_if_needed(LORA_CKPT_NAME)
         plm.load_lora(lora_path)
     plm.eval()
+    PLM_CPU = plm
+    logging.info("Models successfully loaded into CPU RAM.")
+@spaces.GPU(duration=120)
 def run_prediction(image_pil, text_prompt):
     if image_pil is None or not text_prompt:
         return None, None
+    # 1. Ensure models are in RAM (Fast check)
+    ensure_models_loaded_on_cpu()
+    # 2. Move to GPU (The only 'loading' cost per run)
+    # We rely on the global variables
+    logging.info("Moving models to GPU...")
+    MODEL_SAM_CPU.to("cuda")
+    PLM_CPU.to("cuda")
     predictor = None
     try:
+        # Instantiate Predictor on GPU
+        predictor = SAM2ImagePredictor(MODEL_SAM_CPU)
         # 3. Preprocess Image
         rgb_orig = np.array(image_pil.convert("RGB"))
         temp_path = "temp_input.jpg"
         image_pil.save(temp_path)
+        sp, dp = PLM_CPU([text_prompt], H_feat, W_feat, [temp_path])
         # 6. Prepare SAM2 Decoder inputs
         dec = predictor.model.sam_mask_decoder
         raise e
     finally:
+        # CRITICAL: Move models back to CPU
+        # This preserves the Global Variable on CPU RAM for the next run.
+        # If we leave them on CUDA, they might be lost when ZeroGPU releases the device.
+        logging.info("Moving models back to CPU...")
+        MODEL_SAM_CPU.to("cpu")
+        PLM_CPU.to("cpu")
         if predictor:
             del predictor
         torch.cuda.empty_cache()