Siamese DINOv2 Wall Hatching Matcher

A TorchScript model for matching wall hatchings from architectural blueprints with legend patterns.

Model

Backbone: DINOv2 ViT-B/14
Architecture: Siamese network
Framework: PyTorch
Export: TorchScript
Input size: 518 × 518

Inference

import cv2
import numpy as np
import torch

from PIL import Image
from torchvision import transforms

input_data = {
    "legends": ["legend_correct.png", "legend_wrong.png"],
    "plan_image": "wall.png", 
    "plan_obb": [ 
        20.672607421875,    # x1
        20.71624755859375,  # y1
        42.37445068359375,  # x2
        20.71624755859375,  # y2
        42.37445068359375,  # ...
        111.15782165527344, 
        20.672607421875, 
        111.15782165527344
        ] # mask bbox for wall image
    }


IMAGE_SIZE = 518 # don't change

device = "cuda" if torch.cuda.is_available() else "cpu"

image_tf = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
])

mask_tf = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
])


def create_full_mask(size):
    return Image.new("L", size, 255)


def create_obb_mask(size, obb):
    w, h = size

    points = np.array(obb, dtype=np.int32).reshape(4, 2)

    mask = np.zeros((h, w), dtype=np.uint8)
    cv2.fillPoly(mask, [points], 255)

    return Image.fromarray(mask)


def prepare(image_path, obb=None):
    image = Image.open(image_path).convert("RGB")

    if obb is None:
        mask = create_full_mask(image.size)
    else:
        mask = create_obb_mask(image.size, obb)

    image = image_tf(image).unsqueeze(0).to(device)
    mask = mask_tf(mask).unsqueeze(0).to(device)

    return image, mask

for legend in input_data["legends"]:
    legend_image, legend_mask = prepare(legend)

    plan_image, plan_mask = prepare(
        input_data["plan_image"],
        input_data["plan_obb"],
    )

    model = torch.jit.load(
        "dino_hatching.pt",
        map_location=device,
    )
    model.eval()

    with torch.no_grad():
        logit = model(
            legend_image,
            legend_mask,
            plan_image,
            plan_mask,
        )

    score = torch.sigmoid(logit).item()

    print(f"{legend}: {score}")

# legend_correct.png: 0.9882104396820068
# legend_wrong.png: 0.0016661642584949732

score is the probability that both hatchings belong to the same wall type.

1.0 → same wall type
0.0 → different wall type

Downloads last month: -; Downloads are not tracked for this model. How to track

Inference Providers NEW

Image Feature Extraction

This model isn't deployed by any Inference Provider. 🙋 Ask for provider support