|
|
import torch |
|
|
import torchvision |
|
|
|
|
|
from torch import nn |
|
|
def create_vit_model(num_classes:int=3, |
|
|
seed:int=42): |
|
|
"""Creates a ViT-B/16 feature extractor model and transforms. |
|
|
|
|
|
Args: |
|
|
num_classes (int, optional): number of target classes. Defaults to 3. |
|
|
seed (int, optional): random seed value for output layer. Defaults to 42. |
|
|
|
|
|
Returns: |
|
|
model (torch.nn.Module): ViT-B/16 feature extractor model. |
|
|
transforms (torchvision.transforms): ViT-B/16 image transforms. |
|
|
""" |
|
|
|
|
|
weights = torchvision.models.ViT_B_16_Weights.DEFAULT |
|
|
transforms = weights.transforms() |
|
|
model = torchvision.models.vit_b_16(weights=weights) |
|
|
|
|
|
|
|
|
for param in model.parameters(): |
|
|
param.requires_grad = False |
|
|
|
|
|
|
|
|
torch.manual_seed(seed) |
|
|
model.heads = nn.Sequential( |
|
|
nn.LayerNorm(768), |
|
|
nn.Dropout(0.2), |
|
|
nn.Linear(768, 121) |
|
|
) |
|
|
|
|
|
|
|
|
return model, transforms |
|
|
|