zhouyik's picture
Upload folder using huggingface_hub
032e687 verified
from functools import partial
import torch.nn as nn
from detectron2.config import LazyCall as L
from detectron2.modeling.backbone.fpn import LastLevelMaxPool
from ape.modeling.backbone.vit_eva02 import SimpleFeaturePyramid, ViT
# Creates Simple Feature Pyramid from ViT backbone
backbone = L(SimpleFeaturePyramid)(
net=L(ViT)( # Single-scale ViT backbone
img_size=1024,
patch_size=16,
embed_dim=192,
depth=12,
num_heads=3,
drop_path_rate=0.8,
window_size=14,
mlp_ratio=4 * 2 / 3,
qkv_bias=True,
norm_layer=partial(nn.LayerNorm, eps=1e-6),
window_block_indexes=list(range(0, 2))
+ list(range(3, 5))
+ list(range(6, 8))
+ list(range(9, 11)),
residual_block_indexes=[],
use_rel_pos=True,
out_feature="last_feat",
use_act_checkpoint=False,
xattn=True,
subln=False,
swiglu=True,
naiveswiglu=False,
),
in_feature="${.net.out_feature}",
out_channels=256,
scale_factors=(4.0, 2.0, 1.0, 0.5),
top_block=L(LastLevelMaxPool)(),
norm="LN",
square_pad=1024,
)