Spaces:

aadarsh99
/

ConvSeg

Running on Zero

App Files Files Community

aadarsh99 commited on 21 days ago

Commit

6221bbd

1 Parent(s): 3d4a272

update app

Browse files

Files changed (1) hide show

app.py +39 -20

app.py CHANGED Viewed

@@ -21,24 +21,34 @@ from sam2.sam2_image_predictor import SAM2ImagePredictor
 from plm_adapter_lora_with_image_input_only_text_positions import PLMLanguageAdapter
 # ----------------- Configuration -----------------
-REPO_MAP = {
-    "Stage 1": "aadarsh99/ConvSeg-Stage1",
-    "Stage 2": "aadarsh99/ConvSeg-Stage2"
-}
 SAM2_CONFIG = "sam2_hiera_l.yaml"
 BASE_CKPT_NAME = "sam2_hiera_large.pt"
-FINAL_CKPT_NAME = "fine_tuned_sam2_batched_100000.torch"
-PLM_CKPT_NAME = "fine_tuned_sam2_batched_plm_100000.torch"
 SQUARE_DIM = 1024
 logging.basicConfig(level=logging.INFO)
-MODEL_CACHE = {
-    "Stage 1": {"sam": None, "plm": None},
-    "Stage 2": {"sam": None, "plm": None}
 }
 # ----------------- Helper Functions -----------------
 def download_if_needed(repo_id, filename):
     try:
@@ -70,29 +80,34 @@ def make_overlay(rgb: np.ndarray, mask: np.ndarray, key: str = "mask") -> Image.
     stroke_layer = Image.new("RGBA", base.size, color + (255,))
     stroke_layer.putalpha(edges)
-    # Composite safely (Module-level returns new images, no in-place None issues)
     out = Image.alpha_composite(base, fill_layer)
     out = Image.alpha_composite(out, stroke_layer)
     return out.convert("RGB")
-def ensure_models_loaded(stage):
     global MODEL_CACHE
-    if MODEL_CACHE[stage]["sam"] is not None:
         return
-    repo_id = REPO_MAP[stage]
-    logging.info(f"Loading {stage} models from {repo_id} into CPU RAM...")
     # SAM2
     base_path = download_if_needed(repo_id, BASE_CKPT_NAME)
     model = build_sam2(SAM2_CONFIG, base_path, device="cpu")
-    final_path = download_if_needed(repo_id, FINAL_CKPT_NAME)
     sd = torch.load(final_path, map_location="cpu")
     model.load_state_dict(sd.get("model", sd), strict=True)
     # PLM
-    plm_path = download_if_needed(repo_id, PLM_CKPT_NAME)
     plm = PLMLanguageAdapter(
         model_name="Qwen/Qwen2.5-VL-3B-Instruct",
         transformer_dim=model.sam_mask_decoder.transformer_dim,
@@ -104,7 +119,7 @@ def ensure_models_loaded(stage):
     plm.load_state_dict(plm_sd["plm"], strict=True)
     plm.eval()
-    MODEL_CACHE[stage]["sam"], MODEL_CACHE[stage]["plm"] = model, plm
 # ----------------- GPU Inference -----------------
@@ -205,7 +220,11 @@ with gr.Blocks(title="SAM2 + PLM Segmentation") as demo:
             text_prompt = gr.Textbox(label="Text Prompt", placeholder="e.g., 'the surgical forceps'")
             with gr.Row():
-                stage_select = gr.Radio(choices=["Stage 1", "Stage 2"], value="Stage 1", label="Model Stage")
                 threshold_slider = gr.Slider(0.0, 1.0, value=0.5, step=0.01, label="Threshold")
             run_btn = gr.Button("Run Inference", variant="primary")

 from plm_adapter_lora_with_image_input_only_text_positions import PLMLanguageAdapter
 # ----------------- Configuration -----------------
 SAM2_CONFIG = "sam2_hiera_l.yaml"
 BASE_CKPT_NAME = "sam2_hiera_large.pt"
 SQUARE_DIM = 1024
 logging.basicConfig(level=logging.INFO)
+# Refactored to store specific filenames per model choice
+MODEL_CONFIGS = {
+    "Stage 1": {
+        "repo_id": "aadarsh99/ConvSeg-Stage1",
+        "sam_filename": "fine_tuned_sam2_batched_100000.torch",
+        "plm_filename": "fine_tuned_sam2_batched_plm_100000.torch"
+    },
+    "Stage 2 (grad-acc: 4)": {
+        "repo_id": "aadarsh99/ConvSeg-Stage2",
+        "sam_filename": "fine_tuned_sam2_batched_60000.torch",
+        "plm_filename": "fine_tuned_sam2_batched_plm_60000.torch"
+    },
+    "Stage 2 (grad-acc: 8)": {
+        "repo_id": "aadarsh99/ConvSeg-Stage2",
+        "sam_filename": "fine_tuned_sam2_batched_100000.torch",
+        "plm_filename": "fine_tuned_sam2_batched_plm_100000.torch"
+    }
 }
+# Dynamically create cache keys based on config
+MODEL_CACHE = {k: {"sam": None, "plm": None} for k in MODEL_CONFIGS.keys()}
 # ----------------- Helper Functions -----------------
 def download_if_needed(repo_id, filename):
     try:
     stroke_layer = Image.new("RGBA", base.size, color + (255,))
     stroke_layer.putalpha(edges)
+    # Composite safely
     out = Image.alpha_composite(base, fill_layer)
     out = Image.alpha_composite(out, stroke_layer)
     return out.convert("RGB")
+def ensure_models_loaded(stage_key):
     global MODEL_CACHE
+    if MODEL_CACHE[stage_key]["sam"] is not None:
         return
+    config = MODEL_CONFIGS[stage_key]
+    repo_id = config["repo_id"]
+    logging.info(f"Loading {stage_key} models from {repo_id} into CPU RAM...")
     # SAM2
+    # Base model is always the same
     base_path = download_if_needed(repo_id, BASE_CKPT_NAME)
     model = build_sam2(SAM2_CONFIG, base_path, device="cpu")
+    # Load specific fine-tuned checkpoint
+    final_path = download_if_needed(repo_id, config["sam_filename"])
     sd = torch.load(final_path, map_location="cpu")
     model.load_state_dict(sd.get("model", sd), strict=True)
     # PLM
+    plm_path = download_if_needed(repo_id, config["plm_filename"])
     plm = PLMLanguageAdapter(
         model_name="Qwen/Qwen2.5-VL-3B-Instruct",
         transformer_dim=model.sam_mask_decoder.transformer_dim,
     plm.load_state_dict(plm_sd["plm"], strict=True)
     plm.eval()
+    MODEL_CACHE[stage_key]["sam"], MODEL_CACHE[stage_key]["plm"] = model, plm
 # ----------------- GPU Inference -----------------
             text_prompt = gr.Textbox(label="Text Prompt", placeholder="e.g., 'the surgical forceps'")
             with gr.Row():
+                stage_select = gr.Radio(
+                    choices=list(MODEL_CONFIGS.keys()),
+                    value="Stage 2 (grad-acc: 8)",
+                    label="Model Stage"
+                )
                 threshold_slider = gr.Slider(0.0, 1.0, value=0.5, step=0.01, label="Threshold")
             run_btn = gr.Button("Run Inference", variant="primary")