| | |
| | |
| | |
| | |
| |
|
| | import logging |
| |
|
| | from . import vision_transformer as vits |
| |
|
| |
|
| | logger = logging.getLogger("dinov2") |
| |
|
| |
|
| | def build_model(args, only_teacher=False, img_size=224): |
| | args.arch = args.arch.removesuffix("_memeff") |
| | if "vit" in args.arch: |
| | vit_kwargs = dict( |
| | img_size=img_size, |
| | patch_size=args.patch_size, |
| | init_values=args.layerscale, |
| | ffn_layer=args.ffn_layer, |
| | block_chunks=args.block_chunks, |
| | qkv_bias=args.qkv_bias, |
| | proj_bias=args.proj_bias, |
| | ffn_bias=args.ffn_bias, |
| | num_register_tokens=args.num_register_tokens, |
| | interpolate_offset=args.interpolate_offset, |
| | interpolate_antialias=args.interpolate_antialias, |
| | ) |
| | teacher = vits.__dict__[args.arch](**vit_kwargs) |
| | if only_teacher: |
| | return teacher, teacher.embed_dim |
| | student = vits.__dict__[args.arch]( |
| | **vit_kwargs, |
| | drop_path_rate=args.drop_path_rate, |
| | drop_path_uniform=args.drop_path_uniform, |
| | ) |
| | embed_dim = student.embed_dim |
| | return student, teacher, embed_dim |
| |
|
| |
|
| | def build_model_from_cfg(cfg, only_teacher=False): |
| | return build_model(cfg.student, only_teacher=only_teacher, img_size=cfg.crops.global_crops_size) |
| |
|