yusufbardolia
/

phase_2b

Safetensors

Model card Files Files and versions

xet

Community

yusufbardolia commited on Jan 11

Commit

4da07fc

verified ·

1 Parent(s): 20c6fc7

Update script.py

Browse files

Files changed (1) hide show

script.py +61 -80

script.py CHANGED Viewed

@@ -1,31 +1,43 @@
 import requests
 import torch
-from PIL import Image, ImageDraw
 from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
 from tqdm import tqdm
 import os
 import pandas as pd
 def run_inference(image_path, model, save_path, prompt, box_threshold, text_threshold,
                   visualize_results, visualization_path, device):
-    test_images = os.listdir(image_path)
-    test_images.sort()
     bboxes = []
     category_ids = []
     test_images_names = []
     for image_name in tqdm(test_images):
         test_images_names.append(image_name)
         bbox = []
         category_id = []
-        img = Image.open(os.path.join(image_path, image_name))
         inputs = processor(images=img, text=prompt, return_tensors="pt").to(device)
@@ -40,98 +52,67 @@ def run_inference(image_path, model, save_path, prompt, box_threshold, text_thre
             target_sizes=[img.size[::-1]]
         )
-        # visualize results
         if visualize_results:
             draw = ImageDraw.Draw(img)
-            print(image_name)
-            print(results)
             for result in results:
                 boxes = result["boxes"]
-                for i, _ in enumerate(range(len(boxes))):
-                    box = boxes[i].tolist()
-                    label = result["labels"][i]
-                    draw.rectangle(box, outline="red", width=3, )
             img.save(os.path.join(visualization_path, image_name))
-        # --- REPLACEMENT BLOCK START ---
         for result in results:
             boxes = result["boxes"]
-            labels = result["labels"] # The model returns the text label here (e.g. "metal curved scissors")
-            for i, box in enumerate(boxes):
                 xmin, ymin, xmax, ymax = box.tolist()
-                width = xmax - xmin
-                height = ymax - ymin
-                bbox.append([xmin, ymin, width, height])
-                # Get the text label found by the model
-                label_text = labels[i].lower()
-                # Assign ID based on the text description
-                # WE USE 0, 1, 2 based on your successful test
-                if "needle" in label_text or "driver" in label_text:
-                    final_id = 0  # Large Needle Driver
-                elif "forceps" in label_text or "grasper" in label_text:
-                    final_id = 1  # Prograsp Forceps
-                elif "scissors" in label_text or "curved" in label_text:
-                    final_id = 2  # Monopolar Curved Scissors
-                else:
-                    final_id = 0  # Default fallback
-                category_id.append(final_id)
         bboxes.append(bbox)
         category_ids.append(category_id)
-    df_predictions = pd.DataFrame(columns=["file_name", "bbox", "category_id"])
-    for i in range(len(test_images_names)):
-        file_name = test_images_names[i]
-        new_row = pd.DataFrame({"file_name": file_name,
-                                "bbox": str(bboxes[i]),
-                                "category_id": str(category_ids[i]),
-                                }, index=[0])
-        df_predictions = pd.concat([df_predictions, new_row], ignore_index=True)
-    df_predictions.to_csv(save_path, index=False)
 if __name__ == "__main__":
-    # The following environment variables are required for offline mode during HuggingFace Submission
-    os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
-    os.environ["HF_HUB_OFFLINE"] = "1"
-    os.environ["HF_DATASETS_OFFLINE"] = "1"
-    current_directory = os.path.dirname(os.path.abspath(__file__))
-    TEST_IMAGE_PATH = "/tmp/data/test_images"
-    SUBMISSION_SAVE_PATH = os.path.join(current_directory, "submission.csv")
-    # Configure the model. More information here: https://huggingface.co/docs/transformers/model_doc/grounding-dino
-    # If you want to use another model - you need to make it avaible for offline usage. More information here: https://huggingface.co/docs/transformers/installation#offline-mode
     model_id = "IDEA-Research/grounding-dino-tiny"
-    device = "cuda"
-    processor = AutoProcessor.from_pretrained(os.path.join(current_directory, "processor"))
-    model = AutoModelForZeroShotObjectDetection.from_pretrained(os.path.join(current_directory, "model"))
     model.to(device)
-    BOX_THRESHOLD = 0.25   # Lowered from 0.4 to catch more items
-    TEXT_THRESHOLD = 0.20  # Lowered from 0.3
-    # Describing the shape helps the model find the object
-    PROMPT = "metal curved scissors . surgical grasper forceps . needle driver holder ."
-    # If you want to test out your model on training images and visualize the results, set visualize_results to True - Visualization images will be saved in the "outputs" folder
-    parent_directory = os.path.dirname(current_directory)
-    PATH_TO_TRAINING_IMAGES_FOR_FOR_VISUALIZATION = os.path.join(parent_directory, "images")
-    visualization_path = os.path.join(parent_directory, "outputs")
-    visualize_results = False
-    if visualize_results:
-        if os.path.exists(visualization_path):
-            os.system("rm -rf " + visualization_path)
-        os.makedirs(visualization_path, exist_ok=True)
-        run_inference(PATH_TO_TRAINING_IMAGES_FOR_FOR_VISUALIZATION, model, SUBMISSION_SAVE_PATH, PROMPT, BOX_THRESHOLD, TEXT_THRESHOLD, visualize_results, visualization_path, device)
-    else:
-        run_inference(TEST_IMAGE_PATH, model, SUBMISSION_SAVE_PATH, PROMPT, BOX_THRESHOLD, TEXT_THRESHOLD, visualize_results, visualization_path, device)

 import requests
 import torch
+from PIL import Image, ImageDraw, ImageFont
 from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
 from tqdm import tqdm
 import os
 import pandas as pd
+import shutil
+# --- CONFIGURATION FOR KAGGLE DEBUGGING ---
+# We force visualization to TRUE so you can see the images
+VISUALIZE_RESULTS = True
+# Use the path to your TRAINING images (from your sidebar)
+# Update this path if yours is different!
+PATH_TO_IMAGES = "/kaggle/input/phase2anewdata/new_data/images/Train"
+# Output folder for marked images
+OUTPUT_DIR = "/kaggle/working/debug_images"
 def run_inference(image_path, model, save_path, prompt, box_threshold, text_threshold,
                   visualize_results, visualization_path, device):
+    # Get first 5 images only for debugging
+    test_images = sorted(os.listdir(image_path))[:5]
     bboxes = []
     category_ids = []
     test_images_names = []
+    print(f"🕵️‍♂️ Debugging on {len(test_images)} images...")
     for image_name in tqdm(test_images):
         test_images_names.append(image_name)
         bbox = []
         category_id = []
+        full_img_path = os.path.join(image_path, image_name)
+        if not os.path.exists(full_img_path): continue
+        img = Image.open(full_img_path)
         inputs = processor(images=img, text=prompt, return_tensors="pt").to(device)
             target_sizes=[img.size[::-1]]
         )
+        # --- VISUALIZATION BLOCK ---
         if visualize_results:
             draw = ImageDraw.Draw(img)
+            # Try to load a font, fallback to default if fails
+            try:
+                font = ImageFont.truetype("arial.ttf", 20)
+            except:
+                font = None
             for result in results:
                 boxes = result["boxes"]
+                labels = result["labels"]
+                scores = result["scores"]
+                for i, box in enumerate(boxes):
+                    # Draw Box
+                    b = box.tolist()
+                    draw.rectangle(b, outline="red", width=4)
+                    # Draw Label
+                    label_text = f"{labels[i]} ({scores[i]:.2f})"
+                    draw.text((b[0], b[1]), label_text, fill="yellow", font=font)
             img.save(os.path.join(visualization_path, image_name))
+        # ---------------------------
+        # Simple Saving Logic (All ID=0 for now)
         for result in results:
             boxes = result["boxes"]
+            for box in boxes:
                 xmin, ymin, xmax, ymax = box.tolist()
+                bbox.append([xmin, ymin, xmax-xmin, ymax-ymin])
+                category_id.append(0)
         bboxes.append(bbox)
         category_ids.append(category_id)
+    print(f"✅ Debug images saved to: {visualization_path}")
 if __name__ == "__main__":
+    current_directory = os.path.dirname(os.path.abspath("__file__"))
+    # Model Setup
     model_id = "IDEA-Research/grounding-dino-tiny"
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    # Load model from local folders (Kaggle specific)
+    # Ensure these folders exist in your working dir!
+    processor = AutoProcessor.from_pretrained("/kaggle/working/processor")
+    model = AutoModelForZeroShotObjectDetection.from_pretrained("/kaggle/working/model")
     model.to(device)
+    # --- TUNING PARAMETERS ---
+    BOX_THRESHOLD = 0.25
+    TEXT_THRESHOLD = 0.20
+    # New Prompt Attempt: Using "robotic" to catch da Vinci tools
+    PROMPT = "robotic needle driver . robotic graspers . curved scissors ."
+    if os.path.exists(OUTPUT_DIR):
+        shutil.rmtree(OUTPUT_DIR)
+    os.makedirs(OUTPUT_DIR, exist_ok=True)
+    run_inference(PATH_TO_IMAGES, model, "dummy.csv", PROMPT, BOX_THRESHOLD, TEXT_THRESHOLD, VISUALIZE_RESULTS, OUTPUT_DIR, device)