File size: 2,531 Bytes

import os
import torch
import pandas as pd
from PIL import Image
from rfdetr import RFDETRBase


def run_inference(model, image_path, conf_threshold, save_path):

    test_images = sorted([
        f for f in os.listdir(image_path)
        if f.lower().endswith((".jpg", ".jpeg", ".png"))
    ])

    bboxes = []
    category_ids = []
    test_images_names = []

    for image_name in test_images:
        test_images_names.append(image_name)
        image_file = os.path.join(image_path, image_name)

        bbox = []
        category_id = []

        # Load image to get dimensions (IMPORTANT)
        with Image.open(image_file) as img:
            img_w, img_h = img.size

        preds = model.predict(image_file)

        if preds is not None and preds.xyxy is not None and len(preds.xyxy) > 0:
            for box, score, label in zip(
                preds.xyxy,
                preds.confidence,
                preds.class_id
            ):
                score = float(score)
                if score < conf_threshold:
                    continue

                xmin, ymin, xmax, ymax = map(float, box)

                
                xmin = max(0.0, xmin)
                ymin = max(0.0, ymin)
                xmax = min(float(img_w), xmax)
                ymax = min(float(img_h), ymax)

                width = xmax - xmin
                height = ymax - ymin

                if width <= 0 or height <= 0:
                    continue

                bbox.append([xmin, ymin, width, height])
                category_id.append(int(label))

        bboxes.append(bbox)
        category_ids.append(category_id)

    df_predictions = pd.DataFrame(columns=["file_name", "bbox", "category_id"])

    for i in range(len(test_images_names)):
        new_row = pd.DataFrame({
            "file_name": test_images_names[i],
            "bbox": str(bboxes[i]),
            "category_id": str(category_ids[i])
        }, index=[0])

        df_predictions = pd.concat([df_predictions, new_row], ignore_index=True)

    df_predictions.to_csv(save_path, index=False)
    print(f"Submission saved to {save_path}")


if __name__ == "__main__":

    TEST_IMAGE_PATH = "/tmp/data/test_images"
    SUBMISSION_SAVE_PATH = "submission.csv"
    CONF_THRESHOLD = 0.30  # lower to 0.15 if recall is poor

    model = RFDETRBase(
        checkpoint_path="checkpoint_best_total.pth",
        device="cuda" if torch.cuda.is_available() else "cpu"
    )

    run_inference(model, TEST_IMAGE_PATH, CONF_THRESHOLD, SUBMISSION_SAVE_PATH)