yusufbardolia
/

phase_2b

Safetensors

Model card Files Files and versions

xet

Community

yusufbardolia commited on Jan 11

Commit

e98e09e

verified ·

1 Parent(s): 0aa63c6

Update script.py

Browse files

Files changed (1) hide show

script.py +45 -32

script.py CHANGED Viewed

@@ -1,34 +1,42 @@
-import requests
 import torch
 from PIL import Image, ImageDraw
 from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
 from tqdm import tqdm
-import os
-import pandas as pd
-def run_inference(image_path, model, save_path, prompt, box_threshold, text_threshold,
-                  visualize_results, visualization_path, device):
-    test_images = os.listdir(image_path)
-    test_images.sort()
     bboxes = []
     category_ids = []
     test_images_names = []
     for image_name in tqdm(test_images):
         test_images_names.append(image_name)
         bbox = []
         category_id = []
         try:
-            img = Image.open(os.path.join(image_path, image_name))
-        except:
-            # Fallback if image fails to load
             bboxes.append([])
             category_ids.append([])
             continue
         inputs = processor(images=img, text=prompt, return_tensors="pt").to(device)
         with torch.no_grad():
@@ -42,11 +50,12 @@ def run_inference(image_path, model, save_path, prompt, box_threshold, text_thre
             target_sizes=[img.size[::-1]]
         )
-        # --- SAFE MODE: SAVE EVERYTHING AS ID 0 ---
-        # This ensures we don't lose points by guessing the wrong class ID.
-        # We focus purely on finding the objects first.
         for result in results:
             boxes = result["boxes"]
             for box in boxes:
                 xmin, ymin, xmax, ymax = box.tolist()
                 width = xmax - xmin
@@ -57,46 +66,50 @@ def run_inference(image_path, model, save_path, prompt, box_threshold, text_thre
         bboxes.append(bbox)
         category_ids.append(category_id)
     df_predictions = pd.DataFrame(columns=["file_name", "bbox", "category_id"])
     for i in range(len(test_images_names)):
-        file_name = test_images_names[i]
-        new_row = pd.DataFrame({"file_name": file_name,
-                                "bbox": str(bboxes[i]),
-                                "category_id": str(category_ids[i]),
-                                }, index=[0])
         df_predictions = pd.concat([df_predictions, new_row], ignore_index=True)
     df_predictions.to_csv(save_path, index=False)
 if __name__ == "__main__":
     os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
     os.environ["HF_HUB_OFFLINE"] = "1"
     os.environ["HF_DATASETS_OFFLINE"] = "1"
     current_directory = os.path.dirname(os.path.abspath(__file__))
     TEST_IMAGE_PATH = "/tmp/data/test_images"
     SUBMISSION_SAVE_PATH = os.path.join(current_directory, "submission.csv")
-    model_id = "IDEA-Research/grounding-dino-tiny"
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    processor = AutoProcessor.from_pretrained(os.path.join(current_directory, "processor"))
-    model = AutoModelForZeroShotObjectDetection.from_pretrained(os.path.join(current_directory, "model"))
     model.to(device)
-    # --- TUNING SETTINGS ---
-    # 1. Lower Threshold: Catches faint objects
     BOX_THRESHOLD = 0.20
     TEXT_THRESHOLD = 0.20
-    # 2. Visual Prompt: Describes SHAPE rather than just name
-    # "robotic" helps because these are da Vinci tools
-    # "wristed" describes the joint
     PROMPT = "robotic surgical tool . metal curved scissors . wristed forceps grasper . needle driver ."
-    visualize_results = False
-    visualization_path = os.path.join(os.path.dirname(current_directory), "outputs")
-    run_inference(TEST_IMAGE_PATH, model, SUBMISSION_SAVE_PATH, PROMPT, BOX_THRESHOLD, TEXT_THRESHOLD, visualize_results, visualization_path, device)

+import os
 import torch
+import pandas as pd
 from PIL import Image, ImageDraw
 from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
 from tqdm import tqdm
+def run_inference(image_path, model, save_path, prompt, box_threshold, text_threshold, device):
+    # 1. Get list of images
+    try:
+        test_images = sorted(os.listdir(image_path))
+    except FileNotFoundError:
+        # Fallback for debugging if path is wrong
+        print(f"Error: Path {image_path} not found.")
+        return
     bboxes = []
     category_ids = []
     test_images_names = []
+    print(f"🚀 Running inference on {len(test_images)} images...")
     for image_name in tqdm(test_images):
         test_images_names.append(image_name)
         bbox = []
         category_id = []
+        # 2. Load Image safely
         try:
+            full_img_path = os.path.join(image_path, image_name)
+            img = Image.open(full_img_path).convert("RGB") # Ensure RGB
+        except Exception as e:
+            print(f"⚠️ Failed to load {image_name}: {e}")
             bboxes.append([])
             category_ids.append([])
             continue
+        # 3. Run Model
         inputs = processor(images=img, text=prompt, return_tensors="pt").to(device)
         with torch.no_grad():
             target_sizes=[img.size[::-1]]
         )
+        # 4. Save Results (SAFE MODE: All ID=0)
+        # We stick to ID 0 to ensure we get points for detection first.
         for result in results:
             boxes = result["boxes"]
+            # labels = result["labels"] # Not using labels for ID yet
             for box in boxes:
                 xmin, ymin, xmax, ymax = box.tolist()
                 width = xmax - xmin
         bboxes.append(bbox)
         category_ids.append(category_id)
+    # 5. Create Submission DataFrame
     df_predictions = pd.DataFrame(columns=["file_name", "bbox", "category_id"])
     for i in range(len(test_images_names)):
+        # Format explicitly as string for the CSV
+        new_row = pd.DataFrame({
+            "file_name": test_images_names[i],
+            "bbox": str(bboxes[i]),
+            "category_id": str(category_ids[i]),
+        }, index=[0])
         df_predictions = pd.concat([df_predictions, new_row], ignore_index=True)
     df_predictions.to_csv(save_path, index=False)
+    print("✅ Submission file generated.")
 if __name__ == "__main__":
+    # --- ENVIRONMENT SETUP ---
     os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
     os.environ["HF_HUB_OFFLINE"] = "1"
     os.environ["HF_DATASETS_OFFLINE"] = "1"
+    # Correct pathing for Hugging Face Repo
     current_directory = os.path.dirname(os.path.abspath(__file__))
     TEST_IMAGE_PATH = "/tmp/data/test_images"
     SUBMISSION_SAVE_PATH = os.path.join(current_directory, "submission.csv")
+    # Detect Device
     device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"🔧 Using device: {device}")
+    # --- MODEL LOADING (RELATIVELY) ---
+    # FIX: Point to folders relative to this script, NOT /kaggle/working/
+    processor_path = os.path.join(current_directory, "processor")
+    model_path = os.path.join(current_directory, "model")
+    processor = AutoProcessor.from_pretrained(processor_path)
+    model = AutoModelForZeroShotObjectDetection.from_pretrained(model_path)
     model.to(device)
+    # --- TUNING ---
     BOX_THRESHOLD = 0.20
     TEXT_THRESHOLD = 0.20
     PROMPT = "robotic surgical tool . metal curved scissors . wristed forceps grasper . needle driver ."
+    # Run!
+    run_inference(TEST_IMAGE_PATH, model, SUBMISSION_SAVE_PATH, PROMPT, BOX_THRESHOLD, TEXT_THRESHOLD, device)