Add/Update backbone checkpoints (count=6)

e115a15 verified 2 months ago

15.7 kB

	from transformers import PretrainedConfig
	from typing import Literal, Any

	__all__ = [
	"BackboneID",
	"BACKBONE_META",
	"BackboneMLPHeadConfig",
	]


	# ============================================================
	# Backbone whitelist + meta registry
	# ============================================================

	BackboneID = Literal[
	"google/vit-base-patch16-224",
	"microsoft/swin-tiny-patch4-window7-224",
	"microsoft/resnet-50",
	"google/efficientnet-b0",
	"timm/densenet121.tv_in1k",
	"torchvision/densenet121",
	]

	# ============================================================
	# 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
	# 2) 백본 메타 레지스트리 (feature dim/rule/unfreeze rule 고정)
	# ============================================================
	# This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
	# 이 테이블은 backbone별 feature 추출 및 미세조정 규칙의 단일 기준(source of truth)입니다.
	#
	# The key type is BackboneID to ensure meta keys never drift from the whitelist.
	# 키 타입을 BackboneID로 고정하여 메타 키가 화이트리스트와 어긋나지 않게 합니다.
	BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
	# -------------------------
	# Transformers (ViT/Swin)
	# -------------------------
	# These backbones come from transformers and typically output hidden states and/or pooler outputs.
	# 이 백본들은 transformers 계열이며 hidden states와 pooler 출력 등을 제공합니다.

	"google/vit-base-patch16-224": {
	# type indicates which loading/forward/extraction pathway the model code should use.
	# type은 모델 코드가 어떤 로딩/forward/feature 추출 경로를 사용할지 결정합니다.
	"type": "vit",

	# feat_dim is the feature vector dimension consumed by the MLP head.
	# feat_dim은 MLP head가 입력으로 받는 feature 벡터 차원입니다.
	"feat_dim": 768,

	# feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
	# feat_rule은 backbone 출력에서 (B, feat_dim) 텐서를 얻는 규칙을 정의합니다.
	"feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
	# last_hidden_state[:, 0, :]를 CLS 토큰 임베딩으로 사용합니다.

	# unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
	# unfreeze는 stage2 미세조정에서 어떤 레이어를 풀지 정책을 정의합니다.
	"unfreeze": "last_n", # Unfreeze the last n encoder blocks.
	# encoder 블록의 마지막 n개를 unfreeze 합니다.

	# has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
	# has_bn은 BatchNorm 존재 여부이며 freeze 시 특별 취급이 필요한지 판단에 사용합니다.
	"has_bn": False,
	},

	"microsoft/swin-tiny-patch4-window7-224": {
	# This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
	# 이 백본은 Swin Transformer이며 구현에 따라 pooler output 제공 여부가 달라질 수 있습니다.
	"type": "swin",
	"feat_dim": 768,

	# Prefer pooler output if available, otherwise fall back to mean pooling.
	# pooler가 있으면 우선 사용하고, 없으면 mean pooling으로 대체합니다.
	"feat_rule": "pool_or_mean",

	# Unfreeze strategy is aligned with transformer-style encoder blocks.
	# unfreeze 전략은 transformer 계열 encoder 블록 기준으로 맞춥니다.
	"unfreeze": "last_n",
	"has_bn": False,
	},

	# -------------------------
	# Transformers (CNNs)
	# -------------------------
	# These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
	# 이 백본들은 transformers로 노출된 CNN이며 pooled feature 또는 feature map을 제공합니다.

	"microsoft/resnet-50": {
	# This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
	# 이 항목은 transformers 호환 ResNet이 pooler 또는 최종 feature map을 제공할 수 있다고 가정합니다.
	"type": "resnet",
	"feat_dim": 2048,

	# Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
	# pooler가 있으면 사용하고, 없으면 global average pooling(GAP)을 적용합니다.
	"feat_rule": "pool_or_gap",

	# CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
	# CNN도 모델 코드에서 block/stage 단위로 last_n 정책을 적용할 수 있습니다.
	"unfreeze": "last_n",
	"has_bn": True,
	},

	"google/efficientnet-b0": {
	# This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
	# 이 항목은 transformers 호환 EfficientNet이 pooled feature 또는 최종 feature map을 제공한다고 가정합니다.
	"type": "efficientnet",
	"feat_dim": 1280,
	"feat_rule": "pool_or_gap",
	"unfreeze": "last_n",
	"has_bn": True,
	},

	# -------------------------
	# timm (DenseNet via HF Hub)
	# -------------------------
	# This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
	# 이 백본은 모델 로더에서 timm의 "hf_hub:" 프리픽스를 사용해 로드합니다.
	"timm/densenet121.tv_in1k": {
	"type": "timm_densenet",

	# DenseNet-121 final channel dimension is 1024 for the canonical architecture.
	# DenseNet-121의 표준 아키텍처에서 최종 채널 차원은 1024입니다.
	"feat_dim": 1024,

	# timm forward_features typically returns a feature map that you then GAP to (B, C).
	# timm의 forward_features는 보통 feature map을 반환하고 이후 GAP으로 (B, C)를 만듭니다.
	"feat_rule": "timm_gap",

	# DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
	# DenseNet은 BatchNorm 사용이 많아 stage1/stage2에서 freeze_bn 처리가 중요합니다.
	"unfreeze": "last_n",
	"has_bn": True,
	},

	# -------------------------
	# torchvision (DenseNet direct)
	# -------------------------
	# This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
	# 이 백본은 transformers/timm이 아니라 torchvision 스타일 로딩 및 feature 추출을 대상으로 합니다.
	"torchvision/densenet121": {
	"type": "torchvision_densenet",
	"feat_dim": 1024,

	# torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
	# torchvision DenseNet은 보통 .features를 노출하며 GAP으로 (B, C)를 얻습니다.
	"feat_rule": "torchvision_densenet_gap",

	# Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
	# unfreeze 정책은 last_n을 유지하되, 해석은 torchvision 모듈 네이밍에 맞아야 합니다.
	"unfreeze": "last_n",
	"has_bn": True,
	},
	}



	class BackboneMLPHeadConfig(PretrainedConfig):
	"""
	Configuration for Backbone + MLP Head models.

	Backbone + MLP Head 모델을 위한 설정 클래스입니다.
	"""

	# This string is used by Hugging Face AutoConfig to identify the config class.
	# Hugging Face AutoConfig가 이 config를 식별하기 위해 사용하는 고유 ID입니다.
	model_type = "backbone-mlphead-224-fixed"

	def __init__(
	self,
	backbone_name_or_path: BackboneID \| None = None,
	mlp_head_bottleneck: int = 256,
	mlp_head_dropout: float = 0.2,
	label2id: dict[str, int] \| None = None,
	id2label: dict[int, str] \| None = None,
	**kwargs,
	):
	# ============================================================
	# 0) Guard for argument-less construction
	# 0) 무인자 생성 경로 방어
	# ============================================================
	# Transformers may internally construct this config without arguments
	# (e.g., during AutoConfig resolution or Hub loading).
	# Transformers 내부에서 AutoConfig 또는 Hub 로드 과정 중
	# 인자 없이 config를 생성하는 경로가 실제로 존재합니다.
	#
	# In this case, we must NOT validate or raise errors.
	# 이 경우 검증이나 예외를 발생시키면 안 됩니다.
	#
	# The goal here is to provide a minimal, serialization-safe config.
	# 목표는 최소한의 값만 채워 직렬화/역직렬화가 깨지지 않게 하는 것입니다.
	if backbone_name_or_path is None:
	# num_labels may be implicitly assumed by downstream code,
	# so we explicitly set a safe default.
	# num_labels는 downstream 코드에서 암묵적으로 가정되므로
	# 안전한 기본값을 명시적으로 넣어줍니다.
	if "num_labels" not in kwargs:
	kwargs["num_labels"] = 0

	super().__init__(**kwargs)

	# Backbone is intentionally unset in this path.
	# 이 경로에서는 backbone을 의도적으로 설정하지 않습니다.
	self.backbone_name_or_path = None

	# Store MLP head hyperparameters for completeness.
	# MLP head 관련 하이퍼파라미터는 형태 유지를 위해 저장합니다.
	self.mlp_head_bottleneck = int(mlp_head_bottleneck)
	self.mlp_head_dropout = float(mlp_head_dropout)

	# Empty label mappings ensure stable save/load behavior.
	# 빈 label 매핑은 저장/로드 안정성을 보장합니다.
	self.label2id = {}
	self.id2label = {}
	self.num_labels = int(kwargs.get("num_labels", 0))
	return

	# ============================================================
	# 1) Backbone whitelist validation
	# 1) Backbone 화이트리스트 검증
	# ============================================================
	# Only backbones explicitly registered in BACKBONE_META are allowed.
	# BACKBONE_META에 등록된 backbone만 허용합니다.
	#
	# This prevents accidental usage of unsupported or inconsistent backbones.
	# 지원되지 않거나 불일치한 backbone 사용을 원천 차단합니다.
	if backbone_name_or_path not in BACKBONE_META:
	raise ValueError(
	f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
	f"Allowed: {sorted(BACKBONE_META.keys())}"
	)

	# ============================================================
	# 2) Label mapping normalization
	# 2) 라벨 매핑 정규화
	# ============================================================
	# Both label2id and id2label may be None during pure loading scenarios.
	# 단순 로드(from_pretrained) 시 label2id/id2label이 None일 수 있습니다.
	#
	# We allow this here to keep Hub loading and verification stable.
	# Hub 로드 및 검증 안정성을 위해 여기서는 이를 허용합니다.
	#
	# Fail-fast validation should happen at model or training level instead.
	# 실제 사용 검증은 모델 또는 학습 단계에서 fail-fast로 처리해야 합니다.
	if label2id is None and id2label is None:
	# Respect num_labels if explicitly provided, otherwise default to 0.
	# num_labels가 주어졌다면 존중하고, 아니면 0으로 둡니다.
	num_labels = int(kwargs.get("num_labels", 0))
	label2id_norm: dict[str, int] = {}
	id2label_norm: dict[int, str] = {}
	else:
	# If only one mapping is provided, derive the other.
	# 하나만 주어진 경우 나머지를 자동으로 생성합니다.
	if id2label is None:
	id2label = {v: k for k, v in label2id.items()}
	if label2id is None:
	label2id = {v: k for k, v in id2label.items()}

	# Ensure both mappings are consistent in size.
	# 두 매핑의 크기가 일치하는지 확인합니다.
	if len(label2id) != len(id2label):
	raise ValueError(
	f"label2id/id2label size mismatch: "
	f"{len(label2id)} vs {len(id2label)}"
	)

	num_labels = len(id2label)
	label2id_norm = dict(label2id)
	id2label_norm = dict(id2label)

	# ============================================================
	# 3) num_labels consistency enforcement
	# 3) num_labels 일관성 강제
	# ============================================================
	# If num_labels is provided via kwargs, it must match inferred labels.
	# kwargs로 num_labels가 들어온 경우 추론된 값과 반드시 일치해야 합니다.
	if "num_labels" in kwargs:
	if (label2id is not None or id2label is not None) and int(kwargs["num_labels"]) != num_labels:
	raise ValueError(
	f"kwargs['num_labels']={kwargs['num_labels']} "
	f"!= inferred num_labels={num_labels}"
	)
	else:
	kwargs["num_labels"] = num_labels

	# ============================================================
	# 4) Parent initialization
	# 4) 부모 클래스 초기화
	# ============================================================
	# Initialize PretrainedConfig with normalized label mappings.
	# 정규화된 라벨 매핑을 사용하여 PretrainedConfig를 초기화합니다.
	super().__init__(
	label2id=label2id_norm,
	id2label=id2label_norm,
	**kwargs,
	)

	# ============================================================
	# 5) Explicit attribute assignment for save/load stability
	# 5) 저장/로드 안정성을 위한 명시적 속성 고정
	# ============================================================
	# Explicitly reassign critical fields to avoid subtle serialization issues.
	# 미묘한 직렬화 문제를 방지하기 위해 핵심 필드를 명시적으로 다시 설정합니다.
	self.backbone_name_or_path = backbone_name_or_path
	self.mlp_head_bottleneck = int(mlp_head_bottleneck)
	self.mlp_head_dropout = float(mlp_head_dropout)

	self.label2id = label2id_norm
	self.id2label = id2label_norm
	self.num_labels = int(kwargs["num_labels"])

	def to_dict(self):
	# Call the parent implementation first.
	# 먼저 부모 구현을 호출합니다.
	output = super().to_dict()

	# Force num_labels to be present and consistent.
	# num_labels가 반드시 존재하고 일관되도록 강제합니다.
	output["num_labels"] = int(
	getattr(self, "num_labels", output.get("num_labels", 0))
	)
	return output


	# Register this config so it can be resolved via AutoConfig.
	# AutoConfig를 통해 이 config가 해석될 수 있도록 등록합니다.
	BackboneMLPHeadConfig.register_for_auto_class("AutoConfig")