File size: 4,300 Bytes

e98e09e
e14c3a3
e98e09e
8a5aefe
e14c3a3
 
 
e98e09e
e14c3a3
c4ac108
e98e09e
 
 
0d3b6b7
 
e98e09e
e14c3a3
 
 
 
594e990
c4ac108
594e990
 
e14c3a3
 
 
 
 
0aa63c6
e98e09e
c4ac108
594e990
 
 
0aa63c6
 
 
e14c3a3
0d3b6b7
e14c3a3
 
 
 
 
 
 
 
 
 
 
 
c4ac108
e14c3a3
 
594e990
4da07fc
e14c3a3
0d3b6b7
 
 
 
e14c3a3
 
 
 
c4ac108
0aa63c6
 
 
0d3b6b7
 
 
 
 
 
0aa63c6
 
e98e09e
e14c3a3
 
 
0d3b6b7
594e990
0aa63c6
 
 
 
 
0d3b6b7
0aa63c6
e14c3a3
594e990
4da07fc
0d3b6b7
c4ac108
 
0d3b6b7
e14c3a3
0d3b6b7
c4ac108
0d3b6b7
594e990
c4ac108
 
 
ce0ade6
c4ac108
 
 
ce0ade6
8a5aefe
c4ac108
 
 
 
2ed995f
594e990
0d3b6b7
0aa63c6
0d3b6b7

import os
import torch
import pandas as pd
from PIL import Image
from transformers import AutoProcessor, AutoModelForZeroShotObjectDetection
from tqdm import tqdm

def run_inference(image_path, model, save_path, prompt, box_threshold, text_threshold, device):
    
    # 1. Get list of images
    try:
        test_images = sorted(os.listdir(image_path))
    except FileNotFoundError:
        print(f"⚠️ Warning: Path {image_path} not found. Creating dummy submission.")
        test_images = []

    bboxes = []
    category_ids = []
    test_images_names = []
    
    print(f"🚀 Running inference on {len(test_images)} images...")
    print(f"📝 Prompt: {prompt}")
    
    # 2. Loop through all test images
    for image_name in tqdm(test_images):
        test_images_names.append(image_name)
        bbox = []
        category_id = []
        
        try:
            full_img_path = os.path.join(image_path, image_name)
            # Load image and ensure RGB
            img = Image.open(full_img_path).convert("RGB") 
        except Exception as e:
            print(f"Error loading {image_name}: {e}")
            bboxes.append([])
            category_ids.append([])
            continue
        
        inputs = processor(images=img, text=prompt, return_tensors="pt").to(device)
        
        with torch.no_grad():
            outputs = model(**inputs)
            
        results = processor.post_process_grounded_object_detection(
            outputs,
            inputs.input_ids,
            threshold=box_threshold,
            text_threshold=text_threshold,
            target_sizes=[img.size[::-1]]
        )
        
        # 3. Process Results (SAFE MODE: Map all to Class ID 0)
        for result in results:
            boxes = result["boxes"]
            
            for box in boxes:
                xmin, ymin, xmax, ymax = box.tolist()
                width = xmax - xmin
                height = ymax - ymin
                bbox.append([xmin, ymin, width, height])
                category_id.append(0) 
        
        bboxes.append(bbox)
        category_ids.append(category_id)
    
    # 4. Create Submission DataFrame
    df_predictions = pd.DataFrame(columns=["file_name", "bbox", "category_id"])
    
    for i in range(len(test_images_names)):
        new_row = pd.DataFrame({
            "file_name": test_images_names[i],
            "bbox": str(bboxes[i]),
            "category_id": str(category_ids[i]),
        }, index=[0])
        df_predictions = pd.concat([df_predictions, new_row], ignore_index=True)
        
    df_predictions.to_csv(save_path, index=False)
    print("✅ Submission file generated.")


if __name__ == "__main__":

    # --- HUGGING FACE SERVER CONFIGURATION ---
    os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
    os.environ["HF_HUB_OFFLINE"] = "1"
    os.environ["HF_DATASETS_OFFLINE"] = "1"
    
    current_directory = os.path.dirname(os.path.abspath(__file__))
    TEST_IMAGE_PATH = "/tmp/data/test_images" 
    SUBMISSION_SAVE_PATH = os.path.join(current_directory, "submission.csv")
    
    # --- MODEL LOADING ---
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    processor = AutoProcessor.from_pretrained(os.path.join(current_directory, "processor"))
    model = AutoModelForZeroShotObjectDetection.from_pretrained(os.path.join(current_directory, "model"))
    model.to(device)
    
    # ==========================================
    # 🏆 REVERTED WINNING CONFIGURATION
    # ==========================================
    
    # 1. Prompt Strategy: "Medical Names + Synonyms"
    # We are bringing back the specific names because the model recognizes them better
    # than generic "silver metal".
    PROMPT = (
        "Monopolar Curved Scissors . surgical scissors . "
        "Prograsp Forceps . grasper jaws . "
        "Large Needle Driver . needle holder ."
    )
    
    # 2. Threshold Strategy: "The Sweet Spot"
    # 0.40 was too high (low recall). 0.25 was too low (high noise).
    # 0.30 balances finding the tool vs ignoring the background.
    BOX_THRESHOLD = 0.30
    TEXT_THRESHOLD = 0.25
    
    # ==========================================
    
    run_inference(TEST_IMAGE_PATH, model, SUBMISSION_SAVE_PATH, PROMPT, BOX_THRESHOLD, TEXT_THRESHOLD, device)