AfnanSD
update model file
84abcf2
import torch
import torchvision
from torch import nn
from torchvision import transforms
def create_vit_model(num_classes:int=28,
seed:int=42):
"""Creates an ViTB16 feature extractor model and transforms.
Args:
num_classes (int, optional): number of classes in the classifier head.
Defaults to 28.
seed (int, optional): random seed value. Defaults to 42.
Returns:
model (torch.nn.Module): ViTB16 feature extractor model.
transforms (torchvision.transforms): ViTB16 image transforms.
"""
# Create ViTB16 pretrained weights, transforms and model
weights = torchvision.models.ViT_B_16_Weights.DEFAULT
# Get transforms from weights
vit_transforms = weights.transforms()
# Extend the vit_transforms to include grayscale conversion, since vit is trained on 3-channel RGB
transform_pipeline = transforms.Compose([
transforms.Grayscale(num_output_channels=3), # Convert grayscale to 3-channel RGB
vit_transforms # Append the existing transforms
])
# transforms = weights.transforms()
model = torchvision.models.vit_b_16(weights=weights)
# Freeze all layers in base model
for param in model.parameters():
param.requires_grad = False
# Change heads with random seed for reproducibility
model.heads = torch.nn.Sequential(
nn.Linear(in_features=768,
out_features=28, # Number of Arabic letters = our classes
bias=True))
return model, transform_pipeline