File size: 1,692 Bytes
87a2821
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import os
import torch
import onnxruntime as ort
from huggingface_hub import hf_hub_download
from transformers import PreTrainedModel
from .configuration_dfine import DFineConfig

class DFineModel(PreTrainedModel):
    config_class = DFineConfig

    def __init__(self, config):
        super().__init__(config)
        model_path = hf_hub_download(
            repo_id="Laudando-Associates-LLC/d-fine-medium",
            filename="model.onnx"
        )
        self.session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"])

    def forward(self, images, orig_target_sizes, ratio, pad_w, pad_h, conf_threshold=0.5):
        output = self.session.run(
            output_names=None,
            input_feed={"images": images.numpy(), "orig_target_sizes": orig_target_sizes.numpy()},
        )
        labels, boxes, scores = output

        # Convert to torch
        labels = torch.tensor(labels)
        boxes = torch.tensor(boxes)
        scores = torch.tensor(scores)

        # Filter by confidence per image
        results = []
        for i in range(scores.shape[0]):
            keep = scores[i] > conf_threshold
            labels_kept = labels[i][keep]
            boxes_kept = boxes[i][keep]
            scores_kept = scores[i][keep]

            # Auto-scale boxes back to original image resolution
            boxes_scaled = boxes_kept.clone()
            boxes_scaled[:, 0::2] -= pad_w[i]
            boxes_scaled[:, 1::2] -= pad_h[i]
            boxes_scaled /= ratio[i]

            results.append({
                "labels": labels_kept,
                "boxes": boxes_scaled,
                "scores": scores_kept
            })

        return results