initial push

Browse files

Files changed (7) hide show

.idea/vcs.xml +0 -1
CSAT.py +0 -490
ResNet18.py +0 -9
__pycache__/test_imagenet_10.cpython-311-pytest-8.4.1.pyc +0 -0
convert_and_push.py +0 -0
example.py +20 -29
example_2.py +0 -16

.idea/vcs.xml CHANGED Viewed

@@ -2,6 +2,5 @@
 <project version="4">
   <component name="VcsDirectoryMappings">
     <mapping directory="" vcs="Git" />
-    <mapping directory="$PROJECT_DIR$/CSATv2" vcs="Git" />
   </component>
 </project>

 <project version="4">
   <component name="VcsDirectoryMappings">
     <mapping directory="" vcs="Git" />
   </component>
 </project>

CSAT.py DELETED Viewed

@@ -1,490 +0,0 @@
-import torch
-from torch import nn
-from einops.layers.torch import Rearrange
-from torch.nn.functional import softmax, sigmoid
-class Block(nn.Module):
-    """ ConvNeXtV2 Block.
-    Args:
-        dim (int): Number of input channels.
-        drop_path (float): Stochastic depth rate. Default: 0.0
-    """
-    def __init__(self, dim, drop_path=0., img_size=None):
-        super().__init__()
-        self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim)  # depthwise conv
-        self.norm = LayerNorm(dim, eps=1e-6)
-        self.pwconv1 = nn.Linear(dim, 4 * dim)  # pointwise/1x1 convs, implemented with linear layers
-        self.act = nn.GELU()
-        self.grn = GRN(4 * dim)
-        self.pwconv2 = nn.Linear(4 * dim, dim)
-        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
-        self.attention = Spatial_Attention()
-    def forward(self, x):
-        input = x
-        x = self.dwconv(x)
-        x = x.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)
-        x = self.norm(x)
-        x = self.pwconv1(x)
-        x = self.act(x)
-        x = self.grn(x)
-        x = self.pwconv2(x)
-        x = x.permute(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)
-        attention = self.attention(x)
-        x = x * nn.UpsamplingBilinear2d(x.shape[2:])(attention)
-        x = input + self.drop_path(x)
-        return x
-class Spatial_Attention(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.avgpool = nn.AdaptiveAvgPool2d((7,7))
-        self.conv = nn.Conv2d(2,1, kernel_size=7, padding=3)
-        self.attention = TransformerBlock(1, 1, heads=1, dim_head=1, img_size=[7,7])
-    def forward(self, x):
-        x_avg = x.mean([1]).unsqueeze(1)
-        x_max = x.max(dim=1).values.unsqueeze(1)
-        # x = torch.concat([x_avg,x_max],dim=1)
-        x = torch.cat([x_avg, x_max], dim=1)
-        x = self.avgpool(x)
-        x = self.conv(x)
-        x = self.attention(x)
-        return x
-class TransformerBlock(nn.Module):
-    def __init__(self, inp, oup, heads=8, dim_head=32, img_size=None, downsample=False, dropout=0.):
-        super().__init__()
-        hidden_dim = int(inp * 4)
-        self.downsample = downsample
-        self.ih, self.iw = img_size
-        if self.downsample:
-            self.pool1 = nn.MaxPool2d(3, 2, 1)
-            self.pool2 = nn.MaxPool2d(3, 2, 1)
-            self.proj = nn.Conv2d(inp, oup, 1, 1, 0, bias=False)
-        self.attn = Attention(inp, oup, heads, dim_head, dropout)
-        self.ff = FeedForward(oup, hidden_dim, dropout)
-        self.attn = nn.Sequential(
-            Rearrange('b c ih iw -> b (ih iw) c'),
-            PreNorm(inp, self.attn, nn.LayerNorm),
-            Rearrange('b (ih iw) c -> b c ih iw', ih=self.ih, iw=self.iw)
-        )
-        self.ff = nn.Sequential(
-            Rearrange('b c ih iw -> b (ih iw) c'),
-            PreNorm(oup, self.ff, nn.LayerNorm),
-            Rearrange('b (ih iw) c -> b c ih iw', ih=self.ih, iw=self.iw)
-        )
-    def forward(self, x):
-        if self.downsample:
-            x = self.proj(self.pool1(x)) + self.attn(self.pool2(x))
-        else:
-            x = x + self.attn(x)
-        x = x + self.ff(x)
-        return x
-class CSAT(nn.Module):
-    def __init__(self,
-                 img_size=384,
-                 num_classes=1000,
-                 drop_path_rate=0,
-                 head_init_scale=1,
-                 weight = None
-                 ):
-        super().__init__()
-        dims = [32, 48, 96, 176]
-        channel_order = "channels_first"
-        depths = [2, 2, 6, 4]
-        dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
-        self.stem = nn.Sequential(nn.Conv2d(in_channels=3, out_channels=dims[0], kernel_size=4, stride=4),
-                                  LayerNorm(normalized_shape=dims[0], data_format=channel_order))
-        self.stages1 = nn.Sequential(
-            Block(dim=dims[0], drop_path=dp_rates[0], img_size=[int(img_size / 4), int(img_size / 4)]),
-            Block(dim=dims[0], drop_path=dp_rates[1], img_size=[int(img_size / 4), int(img_size / 4)]),
-            LayerNorm(dims[0], eps=1e-6, data_format=channel_order),
-            nn.Conv2d(dims[0], dims[0 + 1], kernel_size=2, stride=2),
-        )
-        self.stages2 = nn.Sequential(
-            Block(dim=dims[1], drop_path=dp_rates[0], img_size=[int(img_size / 8), int(img_size / 8)]),
-            Block(dim=dims[1], drop_path=dp_rates[1], img_size=[int(img_size / 8), int(img_size / 8)]),
-            LayerNorm(dims[1], eps=1e-6, data_format=channel_order),
-            nn.Conv2d(dims[1], dims[1 + 1], kernel_size=2, stride=2),
-        )
-        self.stages3 = nn.Sequential(
-            Block(dim=dims[2], drop_path=dp_rates[0], img_size=[int(img_size / 16), int(img_size / 16)]),
-            Block(dim=dims[2], drop_path=dp_rates[1], img_size=[int(img_size / 16), int(img_size / 16)]),
-            Block(dim=dims[2], drop_path=dp_rates[2], img_size=[int(img_size / 16), int(img_size / 16)]),
-            Block(dim=dims[2], drop_path=dp_rates[3], img_size=[int(img_size / 16), int(img_size / 16)]),
-            Block(dim=dims[2], drop_path=dp_rates[4], img_size=[int(img_size / 16), int(img_size / 16)]),
-            Block(dim=dims[2], drop_path=dp_rates[5], img_size=[int(img_size / 16), int(img_size / 16)]),
-            TransformerBlock(inp=dims[2], oup=dims[2], img_size=[int(img_size / 16), int(img_size / 16)]),
-            TransformerBlock(inp=dims[2], oup=dims[2], img_size=[int(img_size / 16), int(img_size / 16)]),
-            LayerNorm(dims[2], eps=1e-6, data_format=channel_order),
-            nn.Conv2d(dims[2], dims[2 + 1], kernel_size=2, stride=2),
-        )
-        self.stages4 = nn.Sequential(
-            Block(dim=dims[3], drop_path=dp_rates[0], img_size=[int(img_size / 32), int(img_size / 32)]),
-            Block(dim=dims[3], drop_path=dp_rates[1], img_size=[int(img_size / 32), int(img_size / 32)]),
-            Block(dim=dims[3], drop_path=dp_rates[2], img_size=[int(img_size / 32), int(img_size / 32)]),
-            Block(dim=dims[3], drop_path=dp_rates[3], img_size=[int(img_size / 32), int(img_size / 32)]),
-            TransformerBlock(inp=dims[3], oup=dims[3], img_size=[int(img_size / 32), int(img_size / 32)]),
-            TransformerBlock(inp=dims[3], oup=dims[3], img_size=[int(img_size / 32), int(img_size / 32)]),
-        )
-        self.norm = nn.LayerNorm(dims[-1], eps=1e-6)  # final norm layer
-        self.head = nn.Linear(dims[-1], num_classes)
-        self.apply(self._init_weights)
-        self.head.weight.data.mul_(head_init_scale)
-        self.head.bias.data.mul_(head_init_scale)
-        if weight != None:
-            self.load_checkpoint(checkpoint=weight)
-        self.freeze_weight()
-    def _init_weights(self, m):
-        if isinstance(m, (nn.Conv2d, nn.Linear)):
-            trunc_normal_(m.weight, std=.02)
-            try:
-                nn.init.constant_(m.bias, 0)
-            except:  # transformer layers
-                pass
-                # print("transformer layer can't initialize")
-    def freeze_weight(self):
-        for name, param in self.named_parameters():
-            if param.requires_grad and 'pos_embed' in name:
-                param.requires_grad = False
-    def load_checkpoint(self, checkpoint=None):
-        state = torch.load(checkpoint, map_location='cpu')
-        if 'state_dict' in state:
-            state_dict = state['state_dict']
-        elif 'model' in state:
-            state_dict = state['model']
-            for key in list(state_dict.keys()):
-                state_dict[key.replace('module.', '')] = state_dict.pop(key)
-        elif 'q_state_dict' in state:
-            state_dict = state['q_state_dict']
-        for key in list(state_dict.keys()):
-            state_dict[key.replace('backbone.', '')] = state_dict.pop(key)
-        model_dict = self.state_dict()
-        weights = {k: v for k, v in state_dict.items() if k in model_dict}
-        model_dict.update(weights)
-        del model_dict['head.weight']
-        del model_dict['head.bias']
-        self.load_state_dict(model_dict, strict=False)
-    def forward(self, x):
-        outputs = self.encoder(x)
-        # x, low_level, mid_level, high_level = self.seg_encoder(x)
-        return outputs
-    def encoder(self, x):
-        x = self.stem(x)
-        for _, layer in enumerate(self.stages1):
-            if _ == len(self.stages1) - 1:
-                x1 = x
-            x = layer(x)
-        for _, layer in enumerate(self.stages2):
-            if _ == len(self.stages2) - 1:
-                x2 = x
-            x = layer(x)
-        for _, layer in enumerate(self.stages3):
-            if _ == len(self.stages3) - 1:
-                x3 = x
-            x = layer(x)
-        x = self.stages4(x)
-        x = self.norm(x.mean([-2, -1]))
-        x = self.head(x)
-        return x
-    def seg_encoder(self, x):
-        org_img = x
-        x = self.stem(x)
-        for _, layer in enumerate(self.stages1):
-            if _ == len(self.stages1) - 2:
-                low_level = x
-            x = layer(x)
-        x = self.stages2(x)
-        for _, layer in enumerate(self.stages3):
-            if _ == len(self.stages3) - 2:
-                mid_level = x
-            x = layer(x)
-        for _, layer in enumerate(self.stages4):
-            x = layer(x)
-        high_level = x
-        return org_img, low_level, mid_level, high_level
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from einops import rearrange
-import math
-import warnings
-class LayerNorm(nn.Module):
-    """ LayerNorm that supports two data formats: channels_last (default) or channels_first.
-    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
-    shape (batch_size, height, width, channels) while channels_first corresponds to inputs
-    with shape (batch_size, channels, height, width).
-    """
-    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
-        super().__init__()
-        self.weight = nn.Parameter(torch.ones(normalized_shape))
-        self.bias = nn.Parameter(torch.zeros(normalized_shape))
-        self.eps = eps
-        self.data_format = data_format
-        if self.data_format not in ["channels_last", "channels_first"]:
-            raise NotImplementedError
-        self.normalized_shape = (normalized_shape,)
-    def forward(self, x):
-        if self.data_format == "channels_last":
-            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
-        elif self.data_format == "channels_first":
-            u = x.mean(1, keepdim=True)
-            s = (x - u).pow(2).mean(1, keepdim=True)
-            x = (x - u) / torch.sqrt(s + self.eps)
-            x = self.weight[:, None, None] * x + self.bias[:, None, None]
-            return x
-class GRN(nn.Module):
-    """ GRN (Global Response Normalization) layer
-    """
-    def __init__(self, dim):
-        super().__init__()
-        self.gamma = nn.Parameter(torch.zeros(1, 1, 1, dim))
-        self.beta = nn.Parameter(torch.zeros(1, 1, 1, dim))
-    def forward(self, x):
-        Gx = torch.norm(x, p=2, dim=(1, 2), keepdim=True)
-        Nx = Gx / (Gx.mean(dim=-1, keepdim=True) + 1e-6)
-        return self.gamma * (x * Nx) + self.beta + x
-def drop_path(x, drop_prob: float = 0., training: bool = False):
-    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
-    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
-    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
-    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
-    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
-    'survival rate' as the argument.
-    """
-    if drop_prob == 0. or not training:
-        return x
-    keep_prob = 1 - drop_prob
-    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
-    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
-    random_tensor.floor_()  # binarize
-    output = x.div(keep_prob) * random_tensor
-    return output
-class DropPath(nn.Module):
-    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
-    """
-    def __init__(self, drop_prob=None):
-        super(DropPath, self).__init__()
-        self.drop_prob = drop_prob
-    def forward(self, x):
-        return drop_path(x, self.drop_prob, self.training)
-class FeedForward(nn.Module):
-    def __init__(self, dim, hidden_dim, dropout=0.):
-        super().__init__()
-        self.net = nn.Sequential(
-            nn.Linear(dim, hidden_dim),
-            nn.GELU(),
-            nn.Dropout(dropout),
-            nn.Linear(hidden_dim, dim),
-            nn.Dropout(dropout)
-        )
-    def forward(self, x):
-        return self.net(x)
-class PreNorm(nn.Module):
-    def __init__(self, dim, fn, norm):
-        super().__init__()
-        self.norm = norm(dim)
-        self.fn = fn
-    def forward(self, x, **kwargs):
-        return self.fn(self.norm(x), **kwargs)
-class Attention(nn.Module):
-    def __init__(self, inp, oup, heads=8, dim_head=32, dropout=0.):
-        super().__init__()
-        inner_dim = dim_head * heads
-        project_out = not (heads == 1 and dim_head == inp)
-        # self.ih, self.iw = image_size
-        self.heads = heads
-        self.scale = dim_head ** -0.5
-        self.attend = nn.Softmax(dim=-1)
-        self.to_qkv = nn.Linear(inp, inner_dim * 3, bias=False)
-        self.to_out = nn.Sequential(
-            nn.Linear(inner_dim, oup),
-            nn.Dropout(dropout)
-        ) if project_out else nn.Identity()
-        self.pos_embed = PosCNN(in_chans=inp)
-    def forward(self, x):
-        x = self.pos_embed(x)
-        qkv = self.to_qkv(x).chunk(3, dim=-1)
-        q, k, v = map(lambda t: rearrange(
-            t, 'b n (h d) -> b h n d', h=self.heads), qkv)
-        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
-        attn = self.attend(dots)
-        out = torch.matmul(attn, v)
-        out = rearrange(out, 'b h n d -> b n (h d)')
-        out = self.to_out(out)
-        return out
-# PEG  from https://arxiv.org/abs/2102.10882
-class PosCNN(nn.Module):
-    def __init__(self, in_chans):
-        super(PosCNN, self).__init__()
-        self.proj = nn.Conv2d(in_chans, in_chans, kernel_size=3, stride = 1, padding=1, bias=True, groups=in_chans)
-    def forward(self, x):
-        B, N, C = x.shape
-        feat_token = x
-        H, W = int(N**0.5), int(N**0.5)
-        cnn_feat = feat_token.transpose(1, 2).view(B, C, H, W)
-        x = self.proj(cnn_feat) + cnn_feat
-        x = x.flatten(2).transpose(1, 2)
-        return x
-def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
-    # type: (Tensor, float, float, float, float) -> Tensor
-    r"""Fills the input Tensor with values drawn from a truncated
-    normal distribution. The values are effectively drawn from the
-    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
-    with values outside :math:`[a, b]` redrawn until they are within
-    the bounds. The method used for generating the random values works
-    best when :math:`a \leq \text{mean} \leq b`.
-    Args:
-        tensor: an n-dimensional `torch.Tensor`
-        mean: the mean of the normal distribution
-        std: the standard deviation of the normal distribution
-        a: the minimum cutoff value
-        b: the maximum cutoff value
-    Examples:
-        >>> w = torch.empty(3, 5)
-        >>> nn.init.trunc_normal_(w)
-    """
-    return _no_grad_trunc_normal_(tensor, mean, std, a, b)
-def _no_grad_trunc_normal_(tensor, mean, std, a, b):
-    # Cut & paste from PyTorch official master until it's in a few official releases - RW
-    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
-    def norm_cdf(x):
-        # Computes standard normal cumulative distribution function
-        return (1. + math.erf(x / math.sqrt(2.))) / 2.
-    if (mean < a - 2 * std) or (mean > b + 2 * std):
-        warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
-                      "The distribution of values may be incorrect.",
-                      stacklevel=2)
-    with torch.no_grad():
-        # Values are generated by using a truncated uniform distribution and
-        # then using the inverse CDF for the normal distribution.
-        # Get upper and lower cdf values
-        l = norm_cdf((a - mean) / std)
-        u = norm_cdf((b - mean) / std)
-        # Uniformly fill tensor with values from [l, u], then translate to
-        # [2l-1, 2u-1].
-        tensor.uniform_(2 * l - 1, 2 * u - 1)
-        # Use inverse cdf transform for normal distribution to get truncated
-        # standard normal
-        tensor.erfinv_()
-        # Transform to proper mean, std
-        tensor.mul_(std * math.sqrt(2.))
-        tensor.add_(mean)
-        # Clamp to ensure it's in the proper range
-        tensor.clamp_(min=a, max=b)
-        return tensor
-class DoubleConv(nn.Module):
-    """(convolution => [BN] => ReLU) * 2"""
-    def __init__(self, in_channels, out_channels, mid_channels=None):
-        super().__init__()
-        if not mid_channels:
-            mid_channels = out_channels
-        self.double_conv = nn.Sequential(
-            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
-            nn.BatchNorm2d(mid_channels),
-            nn.ReLU(inplace=True),
-            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
-            nn.BatchNorm2d(out_channels),
-            nn.ReLU(inplace=True)
-        )
-    def forward(self, x):
-        return self.double_conv(x)
-class Up(nn.Module):
-    """Upscaling then double conv"""
-    def __init__(self, in_channels, out_channels, bilinear=True):
-        super().__init__()
-        # if bilinear, use the normal convolutions to reduce the number of channels
-        if bilinear:
-            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
-            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
-        else:
-            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
-            self.conv = DoubleConv(in_channels, out_channels)
-    def forward(self, x1, x2):
-        x1 = self.up(x1)
-        # input is CHW
-        diffY = x2.size()[2] - x1.size()[2]
-        diffX = x2.size()[3] - x1.size()[3]
-        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
-                        diffY // 2, diffY - diffY // 2])
-        # if you have padding issues, see
-        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
-        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
-        x = torch.cat([x2, x1], dim=1)
-        return self.conv(x)

ResNet18.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torchvision
-class ResNet18(torchvision.models.ResNet):
-    def __init__(self, num_classes=1000, weight=None):
-        super(ResNet18, self).__init__(block=torchvision.models.resnet.BasicBlock, layers=[2, 2, 2, 2], num_classes=num_classes)
-        self.zero_init_residual = True
-    def forward(self, x):
-        return self._forward_impl(x)

__pycache__/test_imagenet_10.cpython-311-pytest-8.4.1.pyc ADDED Viewed

Binary file (4.08 kB). View file

convert_and_push.py DELETED Viewed

File without changes

example.py CHANGED Viewed

@@ -1,33 +1,24 @@
 import torch
-from model.ResNet18 import ResNet18
-from model.CSAT import CSAT
-from model.CSATv2 import CSATv2
-from torch import nn
-img_size = 224
-path = r'./CSAT_ImageNet.pth.tar' #or CSAT_RCKD.pth.tar <- for pathological image analysis
-model = CSAT(img_size=img_size)
-state = torch.load(path, map_location='cpu')
-model.load_state_dict(state)
-data = torch.zeros((1, 3, img_size, img_size)) #b, c, h, w = 1, 3, 224, 224
-model.head = nn.Identity()
-output = model(data)#b, c = 1, 176
-print(output.shape)
-path = r'./ResNet18_RCKD.pth.tar'
-model = ResNet18()
-state = torch.load(path, map_location='cpu')
-model.load_state_dict(state)
-data = torch.zeros((1, 3, img_size, img_size)) #b, c, h, w = 1, 3, 224, 224
-model.fc = nn.Identity()
-output = model(data)#b, c = 1, 512
-print(output.shape)
-path = r'./CSAT_v2_ImageNet.pth.tar'
-model = CSATv2(img_size=img_size)
-state = torch.load(path, map_location='cpu')
-model.load_state_dict(state['state_dict'])
-data = torch.zeros((1, 3, img_size, img_size)) #b, c, h, w = 1, 3, 512, 512
-model.fc = nn.Identity()
-output = model(data)#b, c = 1, 512
-print(output.shape)

 import torch
+from datasets import load_dataset
+from transformers import AutoImageProcessor, AutoModelForImageClassification
+# 예시 데이터: 고양이 이미지
+dataset = load_dataset("huggingface/cats-image")
+image = dataset["test"]["image"][0]
+# 👉 CSATv2 모델로 교체
+model_name = "Hyunil/CSATv2"
+# Preprocessor + Model 로드
+processor = AutoImageProcessor.from_pretrained(model_name, trust_remote_code=True)
+model = AutoModelForImageClassification.from_pretrained(model_name, trust_remote_code=True)
+# 전처리
+inputs = processor(image, return_tensors="pt")
+# 추론
+with torch.no_grad():
+    logits = model(**inputs).logits
+pred = logits.argmax(-1).item()
+print("Predicted label:", model.config.id2label[pred])

example_2.py DELETED Viewed

@@ -1,16 +0,0 @@
-from transformers import AutoImageProcessor, AutoModelForImageClassification
-from PIL import Image
-import requests
-processor = AutoImageProcessor.from_pretrained("Hyunil/CSATv2", trust_remote_code=True)
-model = AutoModelForImageClassification.from_pretrained("Hyunil/CSATv2", trust_remote_code=True)
-url = "https://images.unsplash.com/photo-1516116216624-53e697fedbea"
-image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
-inputs = processor(image, return_tensors="pt")
-outputs = model(**inputs)
-probs = outputs.logits.softmax(dim=-1)
-top_prob, top_idx = probs.topk(5)
-print(top_idx, top_prob)