amael-apple's picture
Initial commit
c20d7cc
"""Contains preset for ViT modules.
For licensing see accompanying LICENSE file.
Copyright (C) 2025 Apple Inc. All Rights Reserved.
"""
from __future__ import annotations
import dataclasses
from typing import Literal
ViTPreset = Literal["dinov2l16_384",]
MLPMode = Literal["vanilla", "glu"]
@dataclasses.dataclass
class ViTConfig:
"""Configuration for ViT."""
in_chans: int
embed_dim: int
depth: int
num_heads: int
init_values: float
img_size: int = 384
patch_size: int = 16
num_classes: int = 21841
mlp_ratio: float = 4.0
drop_rate: float = 0.0
attn_drop_rate: float = 0.0
drop_path_rate: float = 0.0
qkv_bias: bool = True
global_pool: str = "avg"
# Properties for timm_vit.
mlp_mode: MLPMode = "vanilla"
# Properties for SPN.
intermediate_features_ids: list[int] | None = None
def asdict(self):
"""Convenience method to convert the class to a dict."""
return dataclasses.asdict(self)
VIT_CONFIG_DICT: dict[ViTPreset, ViTConfig] = {
"dinov2l16_384": ViTConfig(
in_chans=3,
embed_dim=1024,
depth=24,
num_heads=16,
init_values=1e-5,
global_pool="",
),
}