bb_mlp_224 / ds_meta.py

Add/Update backbone checkpoints (count=6)

2eed5eb verified 7 days ago

7.53 kB

	from typing import Literal, Any

	# ============================================================
	# 1) Allowed 224 backbones (Fixed whitelist)
	# 1) 허용 224 백본 (화이트리스트 고정)
	# ============================================================
	# This Literal defines the only backbone identifiers that are allowed in configs.
	# 이 Literal은 config에서 허용되는 backbone 식별자 집합을 강제합니다.
	BackboneID = Literal[
	"google/vit-base-patch16-224",
	"microsoft/swin-tiny-patch4-window7-224",
	"microsoft/resnet-50",
	"google/efficientnet-b0",
	"timm/densenet121.tv_in1k",
	"torchvision/densenet121",
	]

	# ============================================================
	# 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
	# 2) 백본 메타 레지스트리 (feature dim/rule/unfreeze rule 고정)
	# ============================================================
	# This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
	# 이 테이블은 backbone별 feature 추출 및 미세조정 규칙의 단일 기준(source of truth)입니다.
	#
	# The key type is BackboneID to ensure meta keys never drift from the whitelist.
	# 키 타입을 BackboneID로 고정하여 메타 키가 화이트리스트와 어긋나지 않게 합니다.
	BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
	# -------------------------
	# Transformers (ViT/Swin)
	# -------------------------
	# These backbones come from transformers and typically output hidden states and/or pooler outputs.
	# 이 백본들은 transformers 계열이며 hidden states와 pooler 출력 등을 제공합니다.

	"google/vit-base-patch16-224": {
	# type indicates which loading/forward/extraction pathway the model code should use.
	# type은 모델 코드가 어떤 로딩/forward/feature 추출 경로를 사용할지 결정합니다.
	"type": "vit",

	# feat_dim is the feature vector dimension consumed by the MLP head.
	# feat_dim은 MLP head가 입력으로 받는 feature 벡터 차원입니다.
	"feat_dim": 768,

	# feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
	# feat_rule은 backbone 출력에서 (B, feat_dim) 텐서를 얻는 규칙을 정의합니다.
	"feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
	# last_hidden_state[:, 0, :]를 CLS 토큰 임베딩으로 사용합니다.

	# unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
	# unfreeze는 stage2 미세조정에서 어떤 레이어를 풀지 정책을 정의합니다.
	"unfreeze": "last_n", # Unfreeze the last n encoder blocks.
	# encoder 블록의 마지막 n개를 unfreeze 합니다.

	# has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
	# has_bn은 BatchNorm 존재 여부이며 freeze 시 특별 취급이 필요한지 판단에 사용합니다.
	"has_bn": False,
	},

	"microsoft/swin-tiny-patch4-window7-224": {
	# This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
	# 이 백본은 Swin Transformer이며 구현에 따라 pooler output 제공 여부가 달라질 수 있습니다.
	"type": "swin",
	"feat_dim": 768,

	# Prefer pooler output if available, otherwise fall back to mean pooling.
	# pooler가 있으면 우선 사용하고, 없으면 mean pooling으로 대체합니다.
	"feat_rule": "pool_or_mean",

	# Unfreeze strategy is aligned with transformer-style encoder blocks.
	# unfreeze 전략은 transformer 계열 encoder 블록 기준으로 맞춥니다.
	"unfreeze": "last_n",
	"has_bn": False,
	},

	# -------------------------
	# Transformers (CNNs)
	# -------------------------
	# These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
	# 이 백본들은 transformers로 노출된 CNN이며 pooled feature 또는 feature map을 제공합니다.

	"microsoft/resnet-50": {
	# This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
	# 이 항목은 transformers 호환 ResNet이 pooler 또는 최종 feature map을 제공할 수 있다고 가정합니다.
	"type": "resnet",
	"feat_dim": 2048,

	# Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
	# pooler가 있으면 사용하고, 없으면 global average pooling(GAP)을 적용합니다.
	"feat_rule": "pool_or_gap",

	# CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
	# CNN도 모델 코드에서 block/stage 단위로 last_n 정책을 적용할 수 있습니다.
	"unfreeze": "last_n",
	"has_bn": True,
	},

	"google/efficientnet-b0": {
	# This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
	# 이 항목은 transformers 호환 EfficientNet이 pooled feature 또는 최종 feature map을 제공한다고 가정합니다.
	"type": "efficientnet",
	"feat_dim": 1280,
	"feat_rule": "pool_or_gap",
	"unfreeze": "last_n",
	"has_bn": True,
	},

	# -------------------------
	# timm (DenseNet via HF Hub)
	# -------------------------
	# This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
	# 이 백본은 모델 로더에서 timm의 "hf_hub:" 프리픽스를 사용해 로드합니다.
	"timm/densenet121.tv_in1k": {
	"type": "timm_densenet",

	# DenseNet-121 final channel dimension is 1024 for the canonical architecture.
	# DenseNet-121의 표준 아키텍처에서 최종 채널 차원은 1024입니다.
	"feat_dim": 1024,

	# timm forward_features typically returns a feature map that you then GAP to (B, C).
	# timm의 forward_features는 보통 feature map을 반환하고 이후 GAP으로 (B, C)를 만듭니다.
	"feat_rule": "timm_gap",

	# DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
	# DenseNet은 BatchNorm 사용이 많아 stage1/stage2에서 freeze_bn 처리가 중요합니다.
	"unfreeze": "last_n",
	"has_bn": True,
	},

	# -------------------------
	# torchvision (DenseNet direct)
	# -------------------------
	# This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
	# 이 백본은 transformers/timm이 아니라 torchvision 스타일 로딩 및 feature 추출을 대상으로 합니다.
	"torchvision/densenet121": {
	"type": "torchvision_densenet",
	"feat_dim": 1024,

	# torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
	# torchvision DenseNet은 보통 .features를 노출하며 GAP으로 (B, C)를 얻습니다.
	"feat_rule": "torchvision_densenet_gap",

	# Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
	# unfreeze 정책은 last_n을 유지하되, 해석은 torchvision 모듈 네이밍에 맞아야 합니다.
	"unfreeze": "last_n",
	"has_bn": True,
	},
	}