VisionBite / model.py
amar6de2's picture
Add working ViT food classifier Space
4d37213
import torch
import torchvision
from torch import nn
def create_vit_model(num_classes:int=121,
seed:int=42):
"""Creates a ViT-B/16 feature extractor model and transforms.
Args:
num_classes (int, optional): number of target classes. Defaults to 3.
seed (int, optional): random seed value for output layer. Defaults to 42.
Returns:
model (torch.nn.Module): ViT-B/16 feature extractor model.
transforms (torchvision.transforms): ViT-B/16 image transforms.
"""
# Create ViT_B_16 pretrained weights, transforms and model
weights = torchvision.models.ViT_B_16_Weights.DEFAULT
transforms = weights.transforms()
model = torchvision.models.vit_b_16(weights=weights)
# Freeze all layers in model
for param in model.parameters():
param.requires_grad = False
# Change classifier head to suit our needs (this will be trainable)
torch.manual_seed(seed)
model.heads = nn.Sequential(
nn.LayerNorm(768),
nn.Dropout(0.2), # Try 0.1 or 0.2
nn.Linear(768, 121)
)
return model, transforms