Add/Update backbone checkpoints (count=6)
Browse files- ds_cfg.py +144 -1
- ds_model.py +66 -10
- ds_proc.py +1 -1
- manifest_20260210_163348.json +53 -0
- models/google__efficientnet-b0/config.json +3 -5
- models/google__efficientnet-b0/ds_cfg.py +144 -1
- models/google__efficientnet-b0/ds_model.py +66 -10
- models/google__efficientnet-b0/ds_proc.py +1 -1
- models/google__efficientnet-b0/model.safetensors +1 -1
- models/google__vit-base-patch16-224/config.json +3 -5
- models/google__vit-base-patch16-224/ds_cfg.py +144 -1
- models/google__vit-base-patch16-224/ds_model.py +66 -10
- models/google__vit-base-patch16-224/ds_proc.py +1 -1
- models/google__vit-base-patch16-224/model.safetensors +1 -1
- models/microsoft__resnet-50/config.json +3 -5
- models/microsoft__resnet-50/ds_cfg.py +144 -1
- models/microsoft__resnet-50/ds_model.py +66 -10
- models/microsoft__resnet-50/ds_proc.py +1 -1
- models/microsoft__resnet-50/model.safetensors +1 -1
- models/microsoft__swin-tiny-patch4-window7-224/config.json +3 -5
- models/microsoft__swin-tiny-patch4-window7-224/ds_cfg.py +144 -1
- models/microsoft__swin-tiny-patch4-window7-224/ds_model.py +66 -10
- models/microsoft__swin-tiny-patch4-window7-224/ds_proc.py +1 -1
- models/microsoft__swin-tiny-patch4-window7-224/model.safetensors +1 -1
- models/timm__densenet121.tv_in1k/config.json +3 -5
- models/timm__densenet121.tv_in1k/ds_cfg.py +144 -1
- models/timm__densenet121.tv_in1k/ds_model.py +66 -10
- models/timm__densenet121.tv_in1k/ds_proc.py +1 -1
- models/timm__densenet121.tv_in1k/model.safetensors +1 -1
- models/torchvision__densenet121/config.json +3 -5
- models/torchvision__densenet121/ds_cfg.py +144 -1
- models/torchvision__densenet121/ds_model.py +66 -10
- models/torchvision__densenet121/ds_proc.py +1 -1
- models/torchvision__densenet121/model.safetensors +1 -1
ds_cfg.py
CHANGED
|
@@ -1,6 +1,149 @@
|
|
| 1 |
from transformers import PretrainedConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
from ds_meta import BackboneID, BACKBONE_META
|
| 4 |
|
| 5 |
|
| 6 |
class BackboneMLPHeadConfig(PretrainedConfig):
|
|
|
|
| 1 |
from transformers import PretrainedConfig
|
| 2 |
+
from typing import Literal, Any
|
| 3 |
+
|
| 4 |
+
# ============================================================
|
| 5 |
+
# Backbone whitelist + meta registry
|
| 6 |
+
# ============================================================
|
| 7 |
+
|
| 8 |
+
BackboneID = Literal[
|
| 9 |
+
"google/vit-base-patch16-224",
|
| 10 |
+
"microsoft/swin-tiny-patch4-window7-224",
|
| 11 |
+
"microsoft/resnet-50",
|
| 12 |
+
"google/efficientnet-b0",
|
| 13 |
+
"timm/densenet121.tv_in1k",
|
| 14 |
+
"torchvision/densenet121",
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
# ============================================================
|
| 18 |
+
# 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
|
| 19 |
+
# 2) ๋ฐฑ๋ณธ ๋ฉํ ๋ ์ง์คํธ๋ฆฌ (feature dim/rule/unfreeze rule ๊ณ ์ )
|
| 20 |
+
# ============================================================
|
| 21 |
+
# This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
|
| 22 |
+
# ์ด ํ
์ด๋ธ์ backbone๋ณ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค(source of truth)์
๋๋ค.
|
| 23 |
+
#
|
| 24 |
+
# The key type is BackboneID to ensure meta keys never drift from the whitelist.
|
| 25 |
+
# ํค ํ์
์ BackboneID๋ก ๊ณ ์ ํ์ฌ ๋ฉํ ํค๊ฐ ํ์ดํธ๋ฆฌ์คํธ์ ์ด๊ธ๋์ง ์๊ฒ ํฉ๋๋ค.
|
| 26 |
+
BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
|
| 27 |
+
# -------------------------
|
| 28 |
+
# Transformers (ViT/Swin)
|
| 29 |
+
# -------------------------
|
| 30 |
+
# These backbones come from transformers and typically output hidden states and/or pooler outputs.
|
| 31 |
+
# ์ด ๋ฐฑ๋ณธ๋ค์ transformers ๊ณ์ด์ด๋ฉฐ hidden states์ pooler ์ถ๋ ฅ ๋ฑ์ ์ ๊ณตํฉ๋๋ค.
|
| 32 |
+
|
| 33 |
+
"google/vit-base-patch16-224": {
|
| 34 |
+
# type indicates which loading/forward/extraction pathway the model code should use.
|
| 35 |
+
# type์ ๋ชจ๋ธ ์ฝ๋๊ฐ ์ด๋ค ๋ก๋ฉ/forward/feature ์ถ์ถ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 36 |
+
"type": "vit",
|
| 37 |
+
|
| 38 |
+
# feat_dim is the feature vector dimension consumed by the MLP head.
|
| 39 |
+
# feat_dim์ MLP head๊ฐ ์
๋ ฅ์ผ๋ก ๋ฐ๋ feature ๋ฒกํฐ ์ฐจ์์
๋๋ค.
|
| 40 |
+
"feat_dim": 768,
|
| 41 |
+
|
| 42 |
+
# feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
|
| 43 |
+
# feat_rule์ backbone ์ถ๋ ฅ์์ (B, feat_dim) ํ
์๋ฅผ ์ป๋ ๊ท์น์ ์ ์ํฉ๋๋ค.
|
| 44 |
+
"feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
|
| 45 |
+
# last_hidden_state[:, 0, :]๋ฅผ CLS ํ ํฐ ์๋ฒ ๋ฉ์ผ๋ก ์ฌ์ฉํฉ๋๋ค.
|
| 46 |
+
|
| 47 |
+
# unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
|
| 48 |
+
# unfreeze๋ stage2 ๋ฏธ์ธ์กฐ์ ์์ ์ด๋ค ๋ ์ด์ด๋ฅผ ํ์ง ์ ์ฑ
์ ์ ์ํฉ๋๋ค.
|
| 49 |
+
"unfreeze": "last_n", # Unfreeze the last n encoder blocks.
|
| 50 |
+
# encoder ๋ธ๋ก์ ๋ง์ง๋ง n๊ฐ๋ฅผ unfreeze ํฉ๋๋ค.
|
| 51 |
+
|
| 52 |
+
# has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
|
| 53 |
+
# has_bn์ BatchNorm ์กด์ฌ ์ฌ๋ถ์ด๋ฉฐ freeze ์ ํน๋ณ ์ทจ๊ธ์ด ํ์ํ์ง ํ๋จ์ ์ฌ์ฉํฉ๋๋ค.
|
| 54 |
+
"has_bn": False,
|
| 55 |
+
},
|
| 56 |
+
|
| 57 |
+
"microsoft/swin-tiny-patch4-window7-224": {
|
| 58 |
+
# This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
|
| 59 |
+
# ์ด ๋ฐฑ๋ณธ์ Swin Transformer์ด๋ฉฐ ๊ตฌํ์ ๋ฐ๋ผ pooler output ์ ๊ณต ์ฌ๋ถ๊ฐ ๋ฌ๋ผ์ง ์ ์์ต๋๋ค.
|
| 60 |
+
"type": "swin",
|
| 61 |
+
"feat_dim": 768,
|
| 62 |
+
|
| 63 |
+
# Prefer pooler output if available, otherwise fall back to mean pooling.
|
| 64 |
+
# pooler๊ฐ ์์ผ๋ฉด ์ฐ์ ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด mean pooling์ผ๋ก ๋์ฒดํฉ๋๋ค.
|
| 65 |
+
"feat_rule": "pool_or_mean",
|
| 66 |
+
|
| 67 |
+
# Unfreeze strategy is aligned with transformer-style encoder blocks.
|
| 68 |
+
# unfreeze ์ ๋ต์ transformer ๊ณ์ด encoder ๋ธ๋ก ๊ธฐ์ค์ผ๋ก ๋ง์ถฅ๋๋ค.
|
| 69 |
+
"unfreeze": "last_n",
|
| 70 |
+
"has_bn": False,
|
| 71 |
+
},
|
| 72 |
+
|
| 73 |
+
# -------------------------
|
| 74 |
+
# Transformers (CNNs)
|
| 75 |
+
# -------------------------
|
| 76 |
+
# These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
|
| 77 |
+
# ์ด ๋ฐฑ๋ณธ๋ค์ transformers๋ก ๋
ธ์ถ๋ CNN์ด๋ฉฐ pooled feature ๋๋ feature map์ ์ ๊ณตํฉ๋๋ค.
|
| 78 |
+
|
| 79 |
+
"microsoft/resnet-50": {
|
| 80 |
+
# This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
|
| 81 |
+
# ์ด ํญ๋ชฉ์ transformers ํธํ ResNet์ด pooler ๋๋ ์ต์ข
feature map์ ์ ๊ณตํ ์ ์๋ค๊ณ ๊ฐ์ ํฉ๋๋ค.
|
| 82 |
+
"type": "resnet",
|
| 83 |
+
"feat_dim": 2048,
|
| 84 |
+
|
| 85 |
+
# Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
|
| 86 |
+
# pooler๊ฐ ์์ผ๋ฉด ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด global average pooling(GAP)์ ์ ์ฉํฉ๋๋ค.
|
| 87 |
+
"feat_rule": "pool_or_gap",
|
| 88 |
+
|
| 89 |
+
# CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
|
| 90 |
+
# CNN๋ ๋ชจ๋ธ ์ฝ๋์์ block/stage ๋จ์๋ก last_n ์ ์ฑ
์ ์ ์ฉํ ์ ์์ต๋๋ค.
|
| 91 |
+
"unfreeze": "last_n",
|
| 92 |
+
"has_bn": True,
|
| 93 |
+
},
|
| 94 |
+
|
| 95 |
+
"google/efficientnet-b0": {
|
| 96 |
+
# This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
|
| 97 |
+
# ์ด ํญ๋ชฉ์ transformers ํธํ EfficientNet์ด pooled feature ๋๋ ์ต์ข
feature map์ ์ ๊ณตํ๋ค๊ณ ๊ฐ์ ํฉ๋๋ค.
|
| 98 |
+
"type": "efficientnet",
|
| 99 |
+
"feat_dim": 1280,
|
| 100 |
+
"feat_rule": "pool_or_gap",
|
| 101 |
+
"unfreeze": "last_n",
|
| 102 |
+
"has_bn": True,
|
| 103 |
+
},
|
| 104 |
+
|
| 105 |
+
# -------------------------
|
| 106 |
+
# timm (DenseNet via HF Hub)
|
| 107 |
+
# -------------------------
|
| 108 |
+
# This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
|
| 109 |
+
# ์ด ๋ฐฑ๋ณธ์ ๋ชจ๋ธ ๋ก๋์์ timm์ "hf_hub:" ํ๋ฆฌํฝ์ค๋ฅผ ์ฌ์ฉํด ๋ก๋ํฉ๋๋ค.
|
| 110 |
+
"timm/densenet121.tv_in1k": {
|
| 111 |
+
"type": "timm_densenet",
|
| 112 |
+
|
| 113 |
+
# DenseNet-121 final channel dimension is 1024 for the canonical architecture.
|
| 114 |
+
# DenseNet-121์ ํ์ค ์ํคํ
์ฒ์์ ์ต์ข
์ฑ๋ ์ฐจ์์ 1024์
๋๋ค.
|
| 115 |
+
"feat_dim": 1024,
|
| 116 |
+
|
| 117 |
+
# timm forward_features typically returns a feature map that you then GAP to (B, C).
|
| 118 |
+
# timm์ forward_features๋ ๋ณดํต feature map์ ๋ฐํํ๊ณ ์ดํ GAP์ผ๋ก (B, C)๋ฅผ ๋ง๋ญ๋๋ค.
|
| 119 |
+
"feat_rule": "timm_gap",
|
| 120 |
+
|
| 121 |
+
# DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
|
| 122 |
+
# DenseNet์ BatchNorm ์ฌ์ฉ์ด ๋ง์ stage1/stage2์์ freeze_bn ์ฒ๋ฆฌ๊ฐ ์ค์ํฉ๋๋ค.
|
| 123 |
+
"unfreeze": "last_n",
|
| 124 |
+
"has_bn": True,
|
| 125 |
+
},
|
| 126 |
+
|
| 127 |
+
# -------------------------
|
| 128 |
+
# torchvision (DenseNet direct)
|
| 129 |
+
# -------------------------
|
| 130 |
+
# This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
|
| 131 |
+
# ์ด ๋ฐฑ๋ณธ์ transformers/timm์ด ์๋๋ผ torchvision ์คํ์ผ ๋ก๋ฉ ๋ฐ feature ์ถ์ถ์ ๋์์ผ๋ก ํฉ๋๋ค.
|
| 132 |
+
"torchvision/densenet121": {
|
| 133 |
+
"type": "torchvision_densenet",
|
| 134 |
+
"feat_dim": 1024,
|
| 135 |
+
|
| 136 |
+
# torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
|
| 137 |
+
# torchvision DenseNet์ ๋ณดํต .features๋ฅผ ๋
ธ์ถํ๋ฉฐ GAP์ผ๋ก (B, C)๋ฅผ ์ป์ต๋๋ค.
|
| 138 |
+
"feat_rule": "torchvision_densenet_gap",
|
| 139 |
+
|
| 140 |
+
# Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
|
| 141 |
+
# unfreeze ์ ์ฑ
์ last_n์ ์ ์งํ๋, ํด์์ torchvision ๋ชจ๋ ๋ค์ด๋ฐ์ ๋ง์์ผ ํฉ๋๋ค.
|
| 142 |
+
"unfreeze": "last_n",
|
| 143 |
+
"has_bn": True,
|
| 144 |
+
},
|
| 145 |
+
}
|
| 146 |
|
|
|
|
| 147 |
|
| 148 |
|
| 149 |
class BackboneMLPHeadConfig(PretrainedConfig):
|
ds_model.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
|
| 4 |
# src/ds_model.py
|
| 5 |
|
| 6 |
-
from typing import Optional, List
|
| 7 |
|
| 8 |
import torch
|
| 9 |
import torch.nn as nn
|
|
@@ -17,9 +17,60 @@ from transformers.modeling_outputs import ImageClassifierOutput
|
|
| 17 |
# --- torchvision ---
|
| 18 |
from torchvision import models as tv_models
|
| 19 |
|
| 20 |
-
from ds_cfg import BackboneMLPHeadConfig
|
| 21 |
-
from
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# ============================================================
|
| 25 |
# (3) Model: backbone + MLP head
|
|
@@ -67,7 +118,9 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 67 |
|
| 68 |
# Meta is a single source of truth for extraction and fine-tuning rules.
|
| 69 |
# meta๋ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค์
๋๋ค.
|
| 70 |
-
|
|
|
|
|
|
|
| 71 |
|
| 72 |
# Backbone skeleton is always created without pretrained weights.
|
| 73 |
# backbone skeleton์ ํญ์ pretrained weight ์์ด ์์ฑํฉ๋๋ค.
|
|
@@ -105,7 +158,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 105 |
def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
|
| 106 |
# Meta decides which loader path to use.
|
| 107 |
# meta๊ฐ ์ด๋ค ๋ก๋ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 108 |
-
meta = BACKBONE_META
|
|
|
|
|
|
|
|
|
|
| 109 |
t = meta["type"]
|
| 110 |
|
| 111 |
if t == "timm_densenet":
|
|
@@ -169,7 +225,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 169 |
from_pretrained() ์ดํ ํธ์ถํ๋ฉด ์ฒดํฌํฌ์ธํธ ๊ฐ์ค์น๋ฅผ ๋ฎ์ด์ฐ๋ฏ๋ก ์ ๋ ํธ์ถํ๋ฉด ์ ๋ฉ๋๋ค.
|
| 170 |
"""
|
| 171 |
bb = self.config.backbone_name_or_path
|
| 172 |
-
meta =
|
| 173 |
t = meta["type"]
|
| 174 |
|
| 175 |
if t == "timm_densenet":
|
|
@@ -394,7 +450,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
|
|
| 394 |
_set_requires_grad(model.backbone, False)
|
| 395 |
_set_requires_grad(model.classifier, True)
|
| 396 |
|
| 397 |
-
meta =
|
| 398 |
if freeze_bn and meta.get("has_bn", False):
|
| 399 |
set_bn_eval(model.backbone)
|
| 400 |
|
|
@@ -403,7 +459,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
|
|
| 403 |
# Stage2: train mode, optionally keeping BN layers in eval for stability.
|
| 404 |
# stage2: train ๋ชจ๋๋ก ๋๋ ์์ ์ฑ์ ์ํด BN์ eval๋ก ์ ์งํ ์ ์์ต๋๋ค.
|
| 405 |
model.train()
|
| 406 |
-
meta =
|
| 407 |
if keep_bn_eval and meta.get("has_bn", False):
|
| 408 |
set_bn_eval(model.backbone)
|
| 409 |
|
|
@@ -431,7 +487,7 @@ def unfreeze_last_stage(
|
|
| 431 |
if n <= 0:
|
| 432 |
return
|
| 433 |
|
| 434 |
-
meta =
|
| 435 |
if meta.get("unfreeze") != "last_n":
|
| 436 |
raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
|
| 437 |
|
|
|
|
| 3 |
|
| 4 |
# src/ds_model.py
|
| 5 |
|
| 6 |
+
from typing import Optional, List, Any, Dict
|
| 7 |
|
| 8 |
import torch
|
| 9 |
import torch.nn as nn
|
|
|
|
| 17 |
# --- torchvision ---
|
| 18 |
from torchvision import models as tv_models
|
| 19 |
|
| 20 |
+
from ds_cfg import BackboneMLPHeadConfig, BACKBONE_META
|
| 21 |
+
# from mlp_head import MLPHead
|
| 22 |
+
|
| 23 |
+
class MLPHead(nn.Module):
|
| 24 |
+
"""
|
| 25 |
+
๊ฐ๋จํ 2-layer MLP head.
|
| 26 |
+
|
| 27 |
+
Parameters
|
| 28 |
+
----------
|
| 29 |
+
in_dim : int
|
| 30 |
+
backbone feature dim
|
| 31 |
+
num_labels : int
|
| 32 |
+
class count
|
| 33 |
+
bottleneck : int
|
| 34 |
+
hidden dim
|
| 35 |
+
p : float
|
| 36 |
+
dropout prob
|
| 37 |
+
"""
|
| 38 |
+
def __init__(self, in_dim: int, num_labels: int, bottleneck: int = 256, p: float = 0.2):
|
| 39 |
+
super().__init__()
|
| 40 |
+
self.fc1 = nn.Linear(in_dim, bottleneck)
|
| 41 |
+
self.act = nn.GELU()
|
| 42 |
+
self.drop = nn.Dropout(p)
|
| 43 |
+
self.fc2 = nn.Linear(bottleneck, num_labels)
|
| 44 |
+
|
| 45 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 46 |
+
return self.fc2(self.drop(self.act(self.fc1(x))))
|
| 47 |
+
|
| 48 |
+
# ------------------------------------------------------------
|
| 49 |
+
# backbone_meta resolver
|
| 50 |
+
# ------------------------------------------------------------
|
| 51 |
+
def _resolve_backbone_meta(config: BackboneMLPHeadConfig, fallback_table: Dict[str, Dict[str, Any]] | None = None) -> Dict[str, Any]:
|
| 52 |
+
"""
|
| 53 |
+
Resolve runtime backbone meta.
|
| 54 |
+
|
| 55 |
+
Priority:
|
| 56 |
+
1) config.backbone_meta (preferred; required for Hub runtime determinism)
|
| 57 |
+
2) fallback_table[config.backbone_name_or_path] (backward compatibility for local/dev)
|
| 58 |
+
|
| 59 |
+
Returns a dict with at least: type, feat_rule, feat_dim (and optional has_bn/unfreeze).
|
| 60 |
+
"""
|
| 61 |
+
meta = getattr(config, "backbone_meta", None)
|
| 62 |
+
if isinstance(meta, dict) and len(meta) > 0:
|
| 63 |
+
return meta
|
| 64 |
+
|
| 65 |
+
bb = getattr(config, "backbone_name_or_path", None)
|
| 66 |
+
if fallback_table is not None and bb in fallback_table:
|
| 67 |
+
return fallback_table[bb]
|
| 68 |
+
|
| 69 |
+
raise ValueError(
|
| 70 |
+
"config.backbone_meta is missing/empty and no fallback meta is available. "
|
| 71 |
+
"Populate config.backbone_meta when saving to the Hub (single source of truth)."
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
|
| 75 |
# ============================================================
|
| 76 |
# (3) Model: backbone + MLP head
|
|
|
|
| 118 |
|
| 119 |
# Meta is a single source of truth for extraction and fine-tuning rules.
|
| 120 |
# meta๋ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค์
๋๋ค.
|
| 121 |
+
# Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
|
| 122 |
+
# Prefer config.backbone_meta to keep Hub runtime self-contained.
|
| 123 |
+
self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
|
| 124 |
|
| 125 |
# Backbone skeleton is always created without pretrained weights.
|
| 126 |
# backbone skeleton์ ํญ์ pretrained weight ์์ด ์์ฑํฉ๋๋ค.
|
|
|
|
| 158 |
def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
|
| 159 |
# Meta decides which loader path to use.
|
| 160 |
# meta๊ฐ ์ด๋ค ๋ก๋ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 161 |
+
meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
|
| 162 |
+
if meta is None:
|
| 163 |
+
raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
|
| 164 |
+
|
| 165 |
t = meta["type"]
|
| 166 |
|
| 167 |
if t == "timm_densenet":
|
|
|
|
| 225 |
from_pretrained() ์ดํ ํธ์ถํ๋ฉด ์ฒดํฌํฌ์ธํธ ๊ฐ์ค์น๋ฅผ ๋ฎ์ด์ฐ๋ฏ๋ก ์ ๋ ํธ์ถํ๋ฉด ์ ๋ฉ๋๋ค.
|
| 226 |
"""
|
| 227 |
bb = self.config.backbone_name_or_path
|
| 228 |
+
meta = self._meta
|
| 229 |
t = meta["type"]
|
| 230 |
|
| 231 |
if t == "timm_densenet":
|
|
|
|
| 450 |
_set_requires_grad(model.backbone, False)
|
| 451 |
_set_requires_grad(model.classifier, True)
|
| 452 |
|
| 453 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 454 |
if freeze_bn and meta.get("has_bn", False):
|
| 455 |
set_bn_eval(model.backbone)
|
| 456 |
|
|
|
|
| 459 |
# Stage2: train mode, optionally keeping BN layers in eval for stability.
|
| 460 |
# stage2: train ๋ชจ๋๋ก ๋๋ ์์ ์ฑ์ ์ํด BN์ eval๋ก ์ ์งํ ์ ์์ต๋๋ค.
|
| 461 |
model.train()
|
| 462 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 463 |
if keep_bn_eval and meta.get("has_bn", False):
|
| 464 |
set_bn_eval(model.backbone)
|
| 465 |
|
|
|
|
| 487 |
if n <= 0:
|
| 488 |
return
|
| 489 |
|
| 490 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 491 |
if meta.get("unfreeze") != "last_n":
|
| 492 |
raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
|
| 493 |
|
ds_proc.py
CHANGED
|
@@ -16,7 +16,7 @@ from transformers import AutoImageProcessor, AutoConfig
|
|
| 16 |
from transformers.image_processing_base import ImageProcessingMixin
|
| 17 |
from transformers.utils.generic import TensorType
|
| 18 |
|
| 19 |
-
from
|
| 20 |
|
| 21 |
|
| 22 |
class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
|
|
|
|
| 16 |
from transformers.image_processing_base import ImageProcessingMixin
|
| 17 |
from transformers.utils.generic import TensorType
|
| 18 |
|
| 19 |
+
from ds_cfg import BackboneID, BACKBONE_META
|
| 20 |
|
| 21 |
|
| 22 |
class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
|
manifest_20260210_163348.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"timestamp": "20260210_163348",
|
| 3 |
+
"repo_id": "dsaint31/bb_mlp_224",
|
| 4 |
+
"revision": "main",
|
| 5 |
+
"tag": null,
|
| 6 |
+
"num_labels": 3,
|
| 7 |
+
"build_device": "mps",
|
| 8 |
+
"count": 6,
|
| 9 |
+
"items": [
|
| 10 |
+
{
|
| 11 |
+
"backbone": "google/vit-base-patch16-224",
|
| 12 |
+
"subdir": "models/google__vit-base-patch16-224",
|
| 13 |
+
"dirname": "google__vit-base-patch16-224"
|
| 14 |
+
},
|
| 15 |
+
{
|
| 16 |
+
"backbone": "microsoft/swin-tiny-patch4-window7-224",
|
| 17 |
+
"subdir": "models/microsoft__swin-tiny-patch4-window7-224",
|
| 18 |
+
"dirname": "microsoft__swin-tiny-patch4-window7-224"
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"backbone": "microsoft/resnet-50",
|
| 22 |
+
"subdir": "models/microsoft__resnet-50",
|
| 23 |
+
"dirname": "microsoft__resnet-50"
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"backbone": "google/efficientnet-b0",
|
| 27 |
+
"subdir": "models/google__efficientnet-b0",
|
| 28 |
+
"dirname": "google__efficientnet-b0"
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"backbone": "timm/densenet121.tv_in1k",
|
| 32 |
+
"subdir": "models/timm__densenet121.tv_in1k",
|
| 33 |
+
"dirname": "timm__densenet121.tv_in1k"
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"backbone": "torchvision/densenet121",
|
| 37 |
+
"subdir": "models/torchvision__densenet121",
|
| 38 |
+
"dirname": "torchvision__densenet121"
|
| 39 |
+
}
|
| 40 |
+
],
|
| 41 |
+
"root_code_included": true,
|
| 42 |
+
"root_code_files": [
|
| 43 |
+
"ds_proc.py",
|
| 44 |
+
"ds_model.py",
|
| 45 |
+
"ds_cfg.py"
|
| 46 |
+
],
|
| 47 |
+
"subfolder_code_included": true,
|
| 48 |
+
"subfolder_code_files": [
|
| 49 |
+
"ds_proc.py",
|
| 50 |
+
"ds_model.py",
|
| 51 |
+
"ds_cfg.py"
|
| 52 |
+
]
|
| 53 |
+
}
|
models/google__efficientnet-b0/config.json
CHANGED
|
@@ -24,7 +24,7 @@
|
|
| 24 |
"num_labels": 3,
|
| 25 |
"transformers_version": "5.1.0",
|
| 26 |
"ds_provenance": {
|
| 27 |
-
"created_at": "
|
| 28 |
"repo_id": "dsaint31/bb_mlp_224",
|
| 29 |
"subdir": "models/google__efficientnet-b0",
|
| 30 |
"wrapper_class": "BackboneWithMLPHeadForImageClassification",
|
|
@@ -40,15 +40,13 @@
|
|
| 40 |
"root_code_files": [
|
| 41 |
"ds_proc.py",
|
| 42 |
"ds_model.py",
|
| 43 |
-
"ds_cfg.py"
|
| 44 |
-
"ds_meta.py"
|
| 45 |
],
|
| 46 |
"subfolder_code_included": true,
|
| 47 |
"subfolder_code_files": [
|
| 48 |
"ds_proc.py",
|
| 49 |
"ds_model.py",
|
| 50 |
-
"ds_cfg.py"
|
| 51 |
-
"ds_meta.py"
|
| 52 |
],
|
| 53 |
"processor_use_fast": true
|
| 54 |
}
|
|
|
|
| 24 |
"num_labels": 3,
|
| 25 |
"transformers_version": "5.1.0",
|
| 26 |
"ds_provenance": {
|
| 27 |
+
"created_at": "20260210_163348",
|
| 28 |
"repo_id": "dsaint31/bb_mlp_224",
|
| 29 |
"subdir": "models/google__efficientnet-b0",
|
| 30 |
"wrapper_class": "BackboneWithMLPHeadForImageClassification",
|
|
|
|
| 40 |
"root_code_files": [
|
| 41 |
"ds_proc.py",
|
| 42 |
"ds_model.py",
|
| 43 |
+
"ds_cfg.py"
|
|
|
|
| 44 |
],
|
| 45 |
"subfolder_code_included": true,
|
| 46 |
"subfolder_code_files": [
|
| 47 |
"ds_proc.py",
|
| 48 |
"ds_model.py",
|
| 49 |
+
"ds_cfg.py"
|
|
|
|
| 50 |
],
|
| 51 |
"processor_use_fast": true
|
| 52 |
}
|
models/google__efficientnet-b0/ds_cfg.py
CHANGED
|
@@ -1,6 +1,149 @@
|
|
| 1 |
from transformers import PretrainedConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
from ds_meta import BackboneID, BACKBONE_META
|
| 4 |
|
| 5 |
|
| 6 |
class BackboneMLPHeadConfig(PretrainedConfig):
|
|
|
|
| 1 |
from transformers import PretrainedConfig
|
| 2 |
+
from typing import Literal, Any
|
| 3 |
+
|
| 4 |
+
# ============================================================
|
| 5 |
+
# Backbone whitelist + meta registry
|
| 6 |
+
# ============================================================
|
| 7 |
+
|
| 8 |
+
BackboneID = Literal[
|
| 9 |
+
"google/vit-base-patch16-224",
|
| 10 |
+
"microsoft/swin-tiny-patch4-window7-224",
|
| 11 |
+
"microsoft/resnet-50",
|
| 12 |
+
"google/efficientnet-b0",
|
| 13 |
+
"timm/densenet121.tv_in1k",
|
| 14 |
+
"torchvision/densenet121",
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
# ============================================================
|
| 18 |
+
# 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
|
| 19 |
+
# 2) ๋ฐฑ๋ณธ ๋ฉํ ๋ ์ง์คํธ๋ฆฌ (feature dim/rule/unfreeze rule ๊ณ ์ )
|
| 20 |
+
# ============================================================
|
| 21 |
+
# This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
|
| 22 |
+
# ์ด ํ
์ด๋ธ์ backbone๋ณ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค(source of truth)์
๋๋ค.
|
| 23 |
+
#
|
| 24 |
+
# The key type is BackboneID to ensure meta keys never drift from the whitelist.
|
| 25 |
+
# ํค ํ์
์ BackboneID๋ก ๊ณ ์ ํ์ฌ ๋ฉํ ํค๊ฐ ํ์ดํธ๋ฆฌ์คํธ์ ์ด๊ธ๋์ง ์๊ฒ ํฉ๋๋ค.
|
| 26 |
+
BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
|
| 27 |
+
# -------------------------
|
| 28 |
+
# Transformers (ViT/Swin)
|
| 29 |
+
# -------------------------
|
| 30 |
+
# These backbones come from transformers and typically output hidden states and/or pooler outputs.
|
| 31 |
+
# ์ด ๋ฐฑ๋ณธ๋ค์ transformers ๊ณ์ด์ด๋ฉฐ hidden states์ pooler ์ถ๋ ฅ ๋ฑ์ ์ ๊ณตํฉ๋๋ค.
|
| 32 |
+
|
| 33 |
+
"google/vit-base-patch16-224": {
|
| 34 |
+
# type indicates which loading/forward/extraction pathway the model code should use.
|
| 35 |
+
# type์ ๋ชจ๋ธ ์ฝ๋๊ฐ ์ด๋ค ๋ก๋ฉ/forward/feature ์ถ์ถ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 36 |
+
"type": "vit",
|
| 37 |
+
|
| 38 |
+
# feat_dim is the feature vector dimension consumed by the MLP head.
|
| 39 |
+
# feat_dim์ MLP head๊ฐ ์
๋ ฅ์ผ๋ก ๋ฐ๋ feature ๋ฒกํฐ ์ฐจ์์
๋๋ค.
|
| 40 |
+
"feat_dim": 768,
|
| 41 |
+
|
| 42 |
+
# feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
|
| 43 |
+
# feat_rule์ backbone ์ถ๋ ฅ์์ (B, feat_dim) ํ
์๋ฅผ ์ป๋ ๊ท์น์ ์ ์ํฉ๋๋ค.
|
| 44 |
+
"feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
|
| 45 |
+
# last_hidden_state[:, 0, :]๋ฅผ CLS ํ ํฐ ์๋ฒ ๋ฉ์ผ๋ก ์ฌ์ฉํฉ๋๋ค.
|
| 46 |
+
|
| 47 |
+
# unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
|
| 48 |
+
# unfreeze๋ stage2 ๋ฏธ์ธ์กฐ์ ์์ ์ด๋ค ๋ ์ด์ด๋ฅผ ํ์ง ์ ์ฑ
์ ์ ์ํฉ๋๋ค.
|
| 49 |
+
"unfreeze": "last_n", # Unfreeze the last n encoder blocks.
|
| 50 |
+
# encoder ๋ธ๋ก์ ๋ง์ง๋ง n๊ฐ๋ฅผ unfreeze ํฉ๋๋ค.
|
| 51 |
+
|
| 52 |
+
# has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
|
| 53 |
+
# has_bn์ BatchNorm ์กด์ฌ ์ฌ๋ถ์ด๋ฉฐ freeze ์ ํน๋ณ ์ทจ๊ธ์ด ํ์ํ์ง ํ๋จ์ ์ฌ์ฉํฉ๋๋ค.
|
| 54 |
+
"has_bn": False,
|
| 55 |
+
},
|
| 56 |
+
|
| 57 |
+
"microsoft/swin-tiny-patch4-window7-224": {
|
| 58 |
+
# This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
|
| 59 |
+
# ์ด ๋ฐฑ๋ณธ์ Swin Transformer์ด๋ฉฐ ๊ตฌํ์ ๋ฐ๋ผ pooler output ์ ๊ณต ์ฌ๋ถ๊ฐ ๋ฌ๋ผ์ง ์ ์์ต๋๋ค.
|
| 60 |
+
"type": "swin",
|
| 61 |
+
"feat_dim": 768,
|
| 62 |
+
|
| 63 |
+
# Prefer pooler output if available, otherwise fall back to mean pooling.
|
| 64 |
+
# pooler๊ฐ ์์ผ๋ฉด ์ฐ์ ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด mean pooling์ผ๋ก ๋์ฒดํฉ๋๋ค.
|
| 65 |
+
"feat_rule": "pool_or_mean",
|
| 66 |
+
|
| 67 |
+
# Unfreeze strategy is aligned with transformer-style encoder blocks.
|
| 68 |
+
# unfreeze ์ ๋ต์ transformer ๊ณ์ด encoder ๋ธ๋ก ๊ธฐ์ค์ผ๋ก ๋ง์ถฅ๋๋ค.
|
| 69 |
+
"unfreeze": "last_n",
|
| 70 |
+
"has_bn": False,
|
| 71 |
+
},
|
| 72 |
+
|
| 73 |
+
# -------------------------
|
| 74 |
+
# Transformers (CNNs)
|
| 75 |
+
# -------------------------
|
| 76 |
+
# These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
|
| 77 |
+
# ์ด ๋ฐฑ๋ณธ๋ค์ transformers๋ก ๋
ธ์ถ๋ CNN์ด๋ฉฐ pooled feature ๋๋ feature map์ ์ ๊ณตํฉ๋๋ค.
|
| 78 |
+
|
| 79 |
+
"microsoft/resnet-50": {
|
| 80 |
+
# This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
|
| 81 |
+
# ์ด ํญ๋ชฉ์ transformers ํธํ ResNet์ด pooler ๋๋ ์ต์ข
feature map์ ์ ๊ณตํ ์ ์๋ค๊ณ ๊ฐ์ ํฉ๋๋ค.
|
| 82 |
+
"type": "resnet",
|
| 83 |
+
"feat_dim": 2048,
|
| 84 |
+
|
| 85 |
+
# Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
|
| 86 |
+
# pooler๊ฐ ์์ผ๋ฉด ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด global average pooling(GAP)์ ์ ์ฉํฉ๋๋ค.
|
| 87 |
+
"feat_rule": "pool_or_gap",
|
| 88 |
+
|
| 89 |
+
# CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
|
| 90 |
+
# CNN๋ ๋ชจ๋ธ ์ฝ๋์์ block/stage ๋จ์๋ก last_n ์ ์ฑ
์ ์ ์ฉํ ์ ์์ต๋๋ค.
|
| 91 |
+
"unfreeze": "last_n",
|
| 92 |
+
"has_bn": True,
|
| 93 |
+
},
|
| 94 |
+
|
| 95 |
+
"google/efficientnet-b0": {
|
| 96 |
+
# This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
|
| 97 |
+
# ์ด ํญ๋ชฉ์ transformers ํธํ EfficientNet์ด pooled feature ๋๋ ์ต์ข
feature map์ ์ ๊ณตํ๋ค๊ณ ๊ฐ์ ํฉ๋๋ค.
|
| 98 |
+
"type": "efficientnet",
|
| 99 |
+
"feat_dim": 1280,
|
| 100 |
+
"feat_rule": "pool_or_gap",
|
| 101 |
+
"unfreeze": "last_n",
|
| 102 |
+
"has_bn": True,
|
| 103 |
+
},
|
| 104 |
+
|
| 105 |
+
# -------------------------
|
| 106 |
+
# timm (DenseNet via HF Hub)
|
| 107 |
+
# -------------------------
|
| 108 |
+
# This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
|
| 109 |
+
# ์ด ๋ฐฑ๋ณธ์ ๋ชจ๋ธ ๋ก๋์์ timm์ "hf_hub:" ํ๋ฆฌํฝ์ค๋ฅผ ์ฌ์ฉํด ๋ก๋ํฉ๋๋ค.
|
| 110 |
+
"timm/densenet121.tv_in1k": {
|
| 111 |
+
"type": "timm_densenet",
|
| 112 |
+
|
| 113 |
+
# DenseNet-121 final channel dimension is 1024 for the canonical architecture.
|
| 114 |
+
# DenseNet-121์ ํ์ค ์ํคํ
์ฒ์์ ์ต์ข
์ฑ๋ ์ฐจ์์ 1024์
๋๋ค.
|
| 115 |
+
"feat_dim": 1024,
|
| 116 |
+
|
| 117 |
+
# timm forward_features typically returns a feature map that you then GAP to (B, C).
|
| 118 |
+
# timm์ forward_features๋ ๋ณดํต feature map์ ๋ฐํํ๊ณ ์ดํ GAP์ผ๋ก (B, C)๋ฅผ ๋ง๋ญ๋๋ค.
|
| 119 |
+
"feat_rule": "timm_gap",
|
| 120 |
+
|
| 121 |
+
# DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
|
| 122 |
+
# DenseNet์ BatchNorm ์ฌ์ฉ์ด ๋ง์ stage1/stage2์์ freeze_bn ์ฒ๋ฆฌ๊ฐ ์ค์ํฉ๋๋ค.
|
| 123 |
+
"unfreeze": "last_n",
|
| 124 |
+
"has_bn": True,
|
| 125 |
+
},
|
| 126 |
+
|
| 127 |
+
# -------------------------
|
| 128 |
+
# torchvision (DenseNet direct)
|
| 129 |
+
# -------------------------
|
| 130 |
+
# This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
|
| 131 |
+
# ์ด ๋ฐฑ๋ณธ์ transformers/timm์ด ์๋๋ผ torchvision ์คํ์ผ ๋ก๋ฉ ๋ฐ feature ์ถ์ถ์ ๋์์ผ๋ก ํฉ๋๋ค.
|
| 132 |
+
"torchvision/densenet121": {
|
| 133 |
+
"type": "torchvision_densenet",
|
| 134 |
+
"feat_dim": 1024,
|
| 135 |
+
|
| 136 |
+
# torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
|
| 137 |
+
# torchvision DenseNet์ ๋ณดํต .features๋ฅผ ๋
ธ์ถํ๋ฉฐ GAP์ผ๋ก (B, C)๋ฅผ ์ป์ต๋๋ค.
|
| 138 |
+
"feat_rule": "torchvision_densenet_gap",
|
| 139 |
+
|
| 140 |
+
# Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
|
| 141 |
+
# unfreeze ์ ์ฑ
์ last_n์ ์ ์งํ๋, ํด์์ torchvision ๋ชจ๋ ๋ค์ด๋ฐ์ ๋ง์์ผ ํฉ๋๋ค.
|
| 142 |
+
"unfreeze": "last_n",
|
| 143 |
+
"has_bn": True,
|
| 144 |
+
},
|
| 145 |
+
}
|
| 146 |
|
|
|
|
| 147 |
|
| 148 |
|
| 149 |
class BackboneMLPHeadConfig(PretrainedConfig):
|
models/google__efficientnet-b0/ds_model.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
|
| 4 |
# src/ds_model.py
|
| 5 |
|
| 6 |
-
from typing import Optional, List
|
| 7 |
|
| 8 |
import torch
|
| 9 |
import torch.nn as nn
|
|
@@ -17,9 +17,60 @@ from transformers.modeling_outputs import ImageClassifierOutput
|
|
| 17 |
# --- torchvision ---
|
| 18 |
from torchvision import models as tv_models
|
| 19 |
|
| 20 |
-
from ds_cfg import BackboneMLPHeadConfig
|
| 21 |
-
from
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# ============================================================
|
| 25 |
# (3) Model: backbone + MLP head
|
|
@@ -67,7 +118,9 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 67 |
|
| 68 |
# Meta is a single source of truth for extraction and fine-tuning rules.
|
| 69 |
# meta๋ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค์
๋๋ค.
|
| 70 |
-
|
|
|
|
|
|
|
| 71 |
|
| 72 |
# Backbone skeleton is always created without pretrained weights.
|
| 73 |
# backbone skeleton์ ํญ์ pretrained weight ์์ด ์์ฑํฉ๋๋ค.
|
|
@@ -105,7 +158,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 105 |
def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
|
| 106 |
# Meta decides which loader path to use.
|
| 107 |
# meta๊ฐ ์ด๋ค ๋ก๋ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 108 |
-
meta = BACKBONE_META
|
|
|
|
|
|
|
|
|
|
| 109 |
t = meta["type"]
|
| 110 |
|
| 111 |
if t == "timm_densenet":
|
|
@@ -169,7 +225,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 169 |
from_pretrained() ์ดํ ํธ์ถํ๋ฉด ์ฒดํฌํฌ์ธํธ ๊ฐ์ค์น๋ฅผ ๋ฎ์ด์ฐ๋ฏ๋ก ์ ๋ ํธ์ถํ๋ฉด ์ ๋ฉ๋๋ค.
|
| 170 |
"""
|
| 171 |
bb = self.config.backbone_name_or_path
|
| 172 |
-
meta =
|
| 173 |
t = meta["type"]
|
| 174 |
|
| 175 |
if t == "timm_densenet":
|
|
@@ -394,7 +450,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
|
|
| 394 |
_set_requires_grad(model.backbone, False)
|
| 395 |
_set_requires_grad(model.classifier, True)
|
| 396 |
|
| 397 |
-
meta =
|
| 398 |
if freeze_bn and meta.get("has_bn", False):
|
| 399 |
set_bn_eval(model.backbone)
|
| 400 |
|
|
@@ -403,7 +459,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
|
|
| 403 |
# Stage2: train mode, optionally keeping BN layers in eval for stability.
|
| 404 |
# stage2: train ๋ชจ๋๋ก ๋๋ ์์ ์ฑ์ ์ํด BN์ eval๋ก ์ ์งํ ์ ์์ต๋๋ค.
|
| 405 |
model.train()
|
| 406 |
-
meta =
|
| 407 |
if keep_bn_eval and meta.get("has_bn", False):
|
| 408 |
set_bn_eval(model.backbone)
|
| 409 |
|
|
@@ -431,7 +487,7 @@ def unfreeze_last_stage(
|
|
| 431 |
if n <= 0:
|
| 432 |
return
|
| 433 |
|
| 434 |
-
meta =
|
| 435 |
if meta.get("unfreeze") != "last_n":
|
| 436 |
raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
|
| 437 |
|
|
|
|
| 3 |
|
| 4 |
# src/ds_model.py
|
| 5 |
|
| 6 |
+
from typing import Optional, List, Any, Dict
|
| 7 |
|
| 8 |
import torch
|
| 9 |
import torch.nn as nn
|
|
|
|
| 17 |
# --- torchvision ---
|
| 18 |
from torchvision import models as tv_models
|
| 19 |
|
| 20 |
+
from ds_cfg import BackboneMLPHeadConfig, BACKBONE_META
|
| 21 |
+
# from mlp_head import MLPHead
|
| 22 |
+
|
| 23 |
+
class MLPHead(nn.Module):
|
| 24 |
+
"""
|
| 25 |
+
๊ฐ๋จํ 2-layer MLP head.
|
| 26 |
+
|
| 27 |
+
Parameters
|
| 28 |
+
----------
|
| 29 |
+
in_dim : int
|
| 30 |
+
backbone feature dim
|
| 31 |
+
num_labels : int
|
| 32 |
+
class count
|
| 33 |
+
bottleneck : int
|
| 34 |
+
hidden dim
|
| 35 |
+
p : float
|
| 36 |
+
dropout prob
|
| 37 |
+
"""
|
| 38 |
+
def __init__(self, in_dim: int, num_labels: int, bottleneck: int = 256, p: float = 0.2):
|
| 39 |
+
super().__init__()
|
| 40 |
+
self.fc1 = nn.Linear(in_dim, bottleneck)
|
| 41 |
+
self.act = nn.GELU()
|
| 42 |
+
self.drop = nn.Dropout(p)
|
| 43 |
+
self.fc2 = nn.Linear(bottleneck, num_labels)
|
| 44 |
+
|
| 45 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 46 |
+
return self.fc2(self.drop(self.act(self.fc1(x))))
|
| 47 |
+
|
| 48 |
+
# ------------------------------------------------------------
|
| 49 |
+
# backbone_meta resolver
|
| 50 |
+
# ------------------------------------------------------------
|
| 51 |
+
def _resolve_backbone_meta(config: BackboneMLPHeadConfig, fallback_table: Dict[str, Dict[str, Any]] | None = None) -> Dict[str, Any]:
|
| 52 |
+
"""
|
| 53 |
+
Resolve runtime backbone meta.
|
| 54 |
+
|
| 55 |
+
Priority:
|
| 56 |
+
1) config.backbone_meta (preferred; required for Hub runtime determinism)
|
| 57 |
+
2) fallback_table[config.backbone_name_or_path] (backward compatibility for local/dev)
|
| 58 |
+
|
| 59 |
+
Returns a dict with at least: type, feat_rule, feat_dim (and optional has_bn/unfreeze).
|
| 60 |
+
"""
|
| 61 |
+
meta = getattr(config, "backbone_meta", None)
|
| 62 |
+
if isinstance(meta, dict) and len(meta) > 0:
|
| 63 |
+
return meta
|
| 64 |
+
|
| 65 |
+
bb = getattr(config, "backbone_name_or_path", None)
|
| 66 |
+
if fallback_table is not None and bb in fallback_table:
|
| 67 |
+
return fallback_table[bb]
|
| 68 |
+
|
| 69 |
+
raise ValueError(
|
| 70 |
+
"config.backbone_meta is missing/empty and no fallback meta is available. "
|
| 71 |
+
"Populate config.backbone_meta when saving to the Hub (single source of truth)."
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
|
| 75 |
# ============================================================
|
| 76 |
# (3) Model: backbone + MLP head
|
|
|
|
| 118 |
|
| 119 |
# Meta is a single source of truth for extraction and fine-tuning rules.
|
| 120 |
# meta๋ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค์
๋๋ค.
|
| 121 |
+
# Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
|
| 122 |
+
# Prefer config.backbone_meta to keep Hub runtime self-contained.
|
| 123 |
+
self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
|
| 124 |
|
| 125 |
# Backbone skeleton is always created without pretrained weights.
|
| 126 |
# backbone skeleton์ ํญ์ pretrained weight ์์ด ์์ฑํฉ๋๋ค.
|
|
|
|
| 158 |
def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
|
| 159 |
# Meta decides which loader path to use.
|
| 160 |
# meta๊ฐ ์ด๋ค ๋ก๋ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 161 |
+
meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
|
| 162 |
+
if meta is None:
|
| 163 |
+
raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
|
| 164 |
+
|
| 165 |
t = meta["type"]
|
| 166 |
|
| 167 |
if t == "timm_densenet":
|
|
|
|
| 225 |
from_pretrained() ์ดํ ํธ์ถํ๋ฉด ์ฒดํฌํฌ์ธํธ ๊ฐ์ค์น๋ฅผ ๋ฎ์ด์ฐ๋ฏ๋ก ์ ๋ ํธ์ถํ๋ฉด ์ ๋ฉ๋๋ค.
|
| 226 |
"""
|
| 227 |
bb = self.config.backbone_name_or_path
|
| 228 |
+
meta = self._meta
|
| 229 |
t = meta["type"]
|
| 230 |
|
| 231 |
if t == "timm_densenet":
|
|
|
|
| 450 |
_set_requires_grad(model.backbone, False)
|
| 451 |
_set_requires_grad(model.classifier, True)
|
| 452 |
|
| 453 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 454 |
if freeze_bn and meta.get("has_bn", False):
|
| 455 |
set_bn_eval(model.backbone)
|
| 456 |
|
|
|
|
| 459 |
# Stage2: train mode, optionally keeping BN layers in eval for stability.
|
| 460 |
# stage2: train ๋ชจ๋๋ก ๋๋ ์์ ์ฑ์ ์ํด BN์ eval๋ก ์ ์งํ ์ ์์ต๋๋ค.
|
| 461 |
model.train()
|
| 462 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 463 |
if keep_bn_eval and meta.get("has_bn", False):
|
| 464 |
set_bn_eval(model.backbone)
|
| 465 |
|
|
|
|
| 487 |
if n <= 0:
|
| 488 |
return
|
| 489 |
|
| 490 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 491 |
if meta.get("unfreeze") != "last_n":
|
| 492 |
raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
|
| 493 |
|
models/google__efficientnet-b0/ds_proc.py
CHANGED
|
@@ -16,7 +16,7 @@ from transformers import AutoImageProcessor, AutoConfig
|
|
| 16 |
from transformers.image_processing_base import ImageProcessingMixin
|
| 17 |
from transformers.utils.generic import TensorType
|
| 18 |
|
| 19 |
-
from
|
| 20 |
|
| 21 |
|
| 22 |
class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
|
|
|
|
| 16 |
from transformers.image_processing_base import ImageProcessingMixin
|
| 17 |
from transformers.utils.generic import TensorType
|
| 18 |
|
| 19 |
+
from ds_cfg import BackboneID, BACKBONE_META
|
| 20 |
|
| 21 |
|
| 22 |
class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
|
models/google__efficientnet-b0/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 17558436
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05324e85fb965b74d443308262fc7c776bcc001035e3d66bf63a52b0ba4ce300
|
| 3 |
size 17558436
|
models/google__vit-base-patch16-224/config.json
CHANGED
|
@@ -24,7 +24,7 @@
|
|
| 24 |
"num_labels": 3,
|
| 25 |
"transformers_version": "5.1.0",
|
| 26 |
"ds_provenance": {
|
| 27 |
-
"created_at": "
|
| 28 |
"repo_id": "dsaint31/bb_mlp_224",
|
| 29 |
"subdir": "models/google__vit-base-patch16-224",
|
| 30 |
"wrapper_class": "BackboneWithMLPHeadForImageClassification",
|
|
@@ -40,15 +40,13 @@
|
|
| 40 |
"root_code_files": [
|
| 41 |
"ds_proc.py",
|
| 42 |
"ds_model.py",
|
| 43 |
-
"ds_cfg.py"
|
| 44 |
-
"ds_meta.py"
|
| 45 |
],
|
| 46 |
"subfolder_code_included": true,
|
| 47 |
"subfolder_code_files": [
|
| 48 |
"ds_proc.py",
|
| 49 |
"ds_model.py",
|
| 50 |
-
"ds_cfg.py"
|
| 51 |
-
"ds_meta.py"
|
| 52 |
],
|
| 53 |
"processor_use_fast": true
|
| 54 |
}
|
|
|
|
| 24 |
"num_labels": 3,
|
| 25 |
"transformers_version": "5.1.0",
|
| 26 |
"ds_provenance": {
|
| 27 |
+
"created_at": "20260210_163348",
|
| 28 |
"repo_id": "dsaint31/bb_mlp_224",
|
| 29 |
"subdir": "models/google__vit-base-patch16-224",
|
| 30 |
"wrapper_class": "BackboneWithMLPHeadForImageClassification",
|
|
|
|
| 40 |
"root_code_files": [
|
| 41 |
"ds_proc.py",
|
| 42 |
"ds_model.py",
|
| 43 |
+
"ds_cfg.py"
|
|
|
|
| 44 |
],
|
| 45 |
"subfolder_code_included": true,
|
| 46 |
"subfolder_code_files": [
|
| 47 |
"ds_proc.py",
|
| 48 |
"ds_model.py",
|
| 49 |
+
"ds_cfg.py"
|
|
|
|
| 50 |
],
|
| 51 |
"processor_use_fast": true
|
| 52 |
}
|
models/google__vit-base-patch16-224/ds_cfg.py
CHANGED
|
@@ -1,6 +1,149 @@
|
|
| 1 |
from transformers import PretrainedConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
from ds_meta import BackboneID, BACKBONE_META
|
| 4 |
|
| 5 |
|
| 6 |
class BackboneMLPHeadConfig(PretrainedConfig):
|
|
|
|
| 1 |
from transformers import PretrainedConfig
|
| 2 |
+
from typing import Literal, Any
|
| 3 |
+
|
| 4 |
+
# ============================================================
|
| 5 |
+
# Backbone whitelist + meta registry
|
| 6 |
+
# ============================================================
|
| 7 |
+
|
| 8 |
+
BackboneID = Literal[
|
| 9 |
+
"google/vit-base-patch16-224",
|
| 10 |
+
"microsoft/swin-tiny-patch4-window7-224",
|
| 11 |
+
"microsoft/resnet-50",
|
| 12 |
+
"google/efficientnet-b0",
|
| 13 |
+
"timm/densenet121.tv_in1k",
|
| 14 |
+
"torchvision/densenet121",
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
# ============================================================
|
| 18 |
+
# 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
|
| 19 |
+
# 2) ๋ฐฑ๋ณธ ๋ฉํ ๋ ์ง์คํธ๋ฆฌ (feature dim/rule/unfreeze rule ๊ณ ์ )
|
| 20 |
+
# ============================================================
|
| 21 |
+
# This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
|
| 22 |
+
# ์ด ํ
์ด๋ธ์ backbone๋ณ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค(source of truth)์
๋๋ค.
|
| 23 |
+
#
|
| 24 |
+
# The key type is BackboneID to ensure meta keys never drift from the whitelist.
|
| 25 |
+
# ํค ํ์
์ BackboneID๋ก ๊ณ ์ ํ์ฌ ๋ฉํ ํค๊ฐ ํ์ดํธ๋ฆฌ์คํธ์ ์ด๊ธ๋์ง ์๊ฒ ํฉ๋๋ค.
|
| 26 |
+
BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
|
| 27 |
+
# -------------------------
|
| 28 |
+
# Transformers (ViT/Swin)
|
| 29 |
+
# -------------------------
|
| 30 |
+
# These backbones come from transformers and typically output hidden states and/or pooler outputs.
|
| 31 |
+
# ์ด ๋ฐฑ๋ณธ๋ค์ transformers ๊ณ์ด์ด๋ฉฐ hidden states์ pooler ์ถ๋ ฅ ๋ฑ์ ์ ๊ณตํฉ๋๋ค.
|
| 32 |
+
|
| 33 |
+
"google/vit-base-patch16-224": {
|
| 34 |
+
# type indicates which loading/forward/extraction pathway the model code should use.
|
| 35 |
+
# type์ ๋ชจ๋ธ ์ฝ๋๊ฐ ์ด๋ค ๋ก๋ฉ/forward/feature ์ถ์ถ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 36 |
+
"type": "vit",
|
| 37 |
+
|
| 38 |
+
# feat_dim is the feature vector dimension consumed by the MLP head.
|
| 39 |
+
# feat_dim์ MLP head๊ฐ ์
๋ ฅ์ผ๋ก ๋ฐ๋ feature ๋ฒกํฐ ์ฐจ์์
๋๋ค.
|
| 40 |
+
"feat_dim": 768,
|
| 41 |
+
|
| 42 |
+
# feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
|
| 43 |
+
# feat_rule์ backbone ์ถ๋ ฅ์์ (B, feat_dim) ํ
์๋ฅผ ์ป๋ ๊ท์น์ ์ ์ํฉ๋๋ค.
|
| 44 |
+
"feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
|
| 45 |
+
# last_hidden_state[:, 0, :]๋ฅผ CLS ํ ํฐ ์๋ฒ ๋ฉ์ผ๋ก ์ฌ์ฉํฉ๋๋ค.
|
| 46 |
+
|
| 47 |
+
# unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
|
| 48 |
+
# unfreeze๋ stage2 ๋ฏธ์ธ์กฐ์ ์์ ์ด๋ค ๋ ์ด์ด๋ฅผ ํ์ง ์ ์ฑ
์ ์ ์ํฉ๋๋ค.
|
| 49 |
+
"unfreeze": "last_n", # Unfreeze the last n encoder blocks.
|
| 50 |
+
# encoder ๋ธ๋ก์ ๋ง์ง๋ง n๊ฐ๋ฅผ unfreeze ํฉ๋๋ค.
|
| 51 |
+
|
| 52 |
+
# has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
|
| 53 |
+
# has_bn์ BatchNorm ์กด์ฌ ์ฌ๋ถ์ด๋ฉฐ freeze ์ ํน๋ณ ์ทจ๊ธ์ด ํ์ํ์ง ํ๋จ์ ์ฌ์ฉํฉ๋๋ค.
|
| 54 |
+
"has_bn": False,
|
| 55 |
+
},
|
| 56 |
+
|
| 57 |
+
"microsoft/swin-tiny-patch4-window7-224": {
|
| 58 |
+
# This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
|
| 59 |
+
# ์ด ๋ฐฑ๋ณธ์ Swin Transformer์ด๋ฉฐ ๊ตฌํ์ ๋ฐ๋ผ pooler output ์ ๊ณต ์ฌ๋ถ๊ฐ ๋ฌ๋ผ์ง ์ ์์ต๋๋ค.
|
| 60 |
+
"type": "swin",
|
| 61 |
+
"feat_dim": 768,
|
| 62 |
+
|
| 63 |
+
# Prefer pooler output if available, otherwise fall back to mean pooling.
|
| 64 |
+
# pooler๊ฐ ์์ผ๋ฉด ์ฐ์ ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด mean pooling์ผ๋ก ๋์ฒดํฉ๋๋ค.
|
| 65 |
+
"feat_rule": "pool_or_mean",
|
| 66 |
+
|
| 67 |
+
# Unfreeze strategy is aligned with transformer-style encoder blocks.
|
| 68 |
+
# unfreeze ์ ๋ต์ transformer ๊ณ์ด encoder ๋ธ๋ก ๊ธฐ์ค์ผ๋ก ๋ง์ถฅ๋๋ค.
|
| 69 |
+
"unfreeze": "last_n",
|
| 70 |
+
"has_bn": False,
|
| 71 |
+
},
|
| 72 |
+
|
| 73 |
+
# -------------------------
|
| 74 |
+
# Transformers (CNNs)
|
| 75 |
+
# -------------------------
|
| 76 |
+
# These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
|
| 77 |
+
# ์ด ๋ฐฑ๋ณธ๋ค์ transformers๋ก ๋
ธ์ถ๋ CNN์ด๋ฉฐ pooled feature ๋๋ feature map์ ์ ๊ณตํฉ๋๋ค.
|
| 78 |
+
|
| 79 |
+
"microsoft/resnet-50": {
|
| 80 |
+
# This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
|
| 81 |
+
# ์ด ํญ๋ชฉ์ transformers ํธํ ResNet์ด pooler ๋๋ ์ต์ข
feature map์ ์ ๊ณตํ ์ ์๋ค๊ณ ๊ฐ์ ํฉ๋๋ค.
|
| 82 |
+
"type": "resnet",
|
| 83 |
+
"feat_dim": 2048,
|
| 84 |
+
|
| 85 |
+
# Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
|
| 86 |
+
# pooler๊ฐ ์์ผ๋ฉด ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด global average pooling(GAP)์ ์ ์ฉํฉ๋๋ค.
|
| 87 |
+
"feat_rule": "pool_or_gap",
|
| 88 |
+
|
| 89 |
+
# CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
|
| 90 |
+
# CNN๋ ๋ชจ๋ธ ์ฝ๋์์ block/stage ๋จ์๋ก last_n ์ ์ฑ
์ ์ ์ฉํ ์ ์์ต๋๋ค.
|
| 91 |
+
"unfreeze": "last_n",
|
| 92 |
+
"has_bn": True,
|
| 93 |
+
},
|
| 94 |
+
|
| 95 |
+
"google/efficientnet-b0": {
|
| 96 |
+
# This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
|
| 97 |
+
# ์ด ํญ๋ชฉ์ transformers ํธํ EfficientNet์ด pooled feature ๋๋ ์ต์ข
feature map์ ์ ๊ณตํ๋ค๊ณ ๊ฐ์ ํฉ๋๋ค.
|
| 98 |
+
"type": "efficientnet",
|
| 99 |
+
"feat_dim": 1280,
|
| 100 |
+
"feat_rule": "pool_or_gap",
|
| 101 |
+
"unfreeze": "last_n",
|
| 102 |
+
"has_bn": True,
|
| 103 |
+
},
|
| 104 |
+
|
| 105 |
+
# -------------------------
|
| 106 |
+
# timm (DenseNet via HF Hub)
|
| 107 |
+
# -------------------------
|
| 108 |
+
# This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
|
| 109 |
+
# ์ด ๋ฐฑ๋ณธ์ ๋ชจ๋ธ ๋ก๋์์ timm์ "hf_hub:" ํ๋ฆฌํฝ์ค๋ฅผ ์ฌ์ฉํด ๋ก๋ํฉ๋๋ค.
|
| 110 |
+
"timm/densenet121.tv_in1k": {
|
| 111 |
+
"type": "timm_densenet",
|
| 112 |
+
|
| 113 |
+
# DenseNet-121 final channel dimension is 1024 for the canonical architecture.
|
| 114 |
+
# DenseNet-121์ ํ์ค ์ํคํ
์ฒ์์ ์ต์ข
์ฑ๋ ์ฐจ์์ 1024์
๋๋ค.
|
| 115 |
+
"feat_dim": 1024,
|
| 116 |
+
|
| 117 |
+
# timm forward_features typically returns a feature map that you then GAP to (B, C).
|
| 118 |
+
# timm์ forward_features๋ ๋ณดํต feature map์ ๋ฐํํ๊ณ ์ดํ GAP์ผ๋ก (B, C)๋ฅผ ๋ง๋ญ๋๋ค.
|
| 119 |
+
"feat_rule": "timm_gap",
|
| 120 |
+
|
| 121 |
+
# DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
|
| 122 |
+
# DenseNet์ BatchNorm ์ฌ์ฉ์ด ๋ง์ stage1/stage2์์ freeze_bn ์ฒ๋ฆฌ๊ฐ ์ค์ํฉ๋๋ค.
|
| 123 |
+
"unfreeze": "last_n",
|
| 124 |
+
"has_bn": True,
|
| 125 |
+
},
|
| 126 |
+
|
| 127 |
+
# -------------------------
|
| 128 |
+
# torchvision (DenseNet direct)
|
| 129 |
+
# -------------------------
|
| 130 |
+
# This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
|
| 131 |
+
# ์ด ๋ฐฑ๋ณธ์ transformers/timm์ด ์๋๋ผ torchvision ์คํ์ผ ๋ก๋ฉ ๋ฐ feature ์ถ์ถ์ ๋์์ผ๋ก ํฉ๋๋ค.
|
| 132 |
+
"torchvision/densenet121": {
|
| 133 |
+
"type": "torchvision_densenet",
|
| 134 |
+
"feat_dim": 1024,
|
| 135 |
+
|
| 136 |
+
# torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
|
| 137 |
+
# torchvision DenseNet์ ๋ณดํต .features๋ฅผ ๋
ธ์ถํ๋ฉฐ GAP์ผ๋ก (B, C)๋ฅผ ์ป์ต๋๋ค.
|
| 138 |
+
"feat_rule": "torchvision_densenet_gap",
|
| 139 |
+
|
| 140 |
+
# Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
|
| 141 |
+
# unfreeze ์ ์ฑ
์ last_n์ ์ ์งํ๋, ํด์์ torchvision ๋ชจ๋ ๋ค์ด๋ฐ์ ๋ง์์ผ ํฉ๋๋ค.
|
| 142 |
+
"unfreeze": "last_n",
|
| 143 |
+
"has_bn": True,
|
| 144 |
+
},
|
| 145 |
+
}
|
| 146 |
|
|
|
|
| 147 |
|
| 148 |
|
| 149 |
class BackboneMLPHeadConfig(PretrainedConfig):
|
models/google__vit-base-patch16-224/ds_model.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
|
| 4 |
# src/ds_model.py
|
| 5 |
|
| 6 |
-
from typing import Optional, List
|
| 7 |
|
| 8 |
import torch
|
| 9 |
import torch.nn as nn
|
|
@@ -17,9 +17,60 @@ from transformers.modeling_outputs import ImageClassifierOutput
|
|
| 17 |
# --- torchvision ---
|
| 18 |
from torchvision import models as tv_models
|
| 19 |
|
| 20 |
-
from ds_cfg import BackboneMLPHeadConfig
|
| 21 |
-
from
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# ============================================================
|
| 25 |
# (3) Model: backbone + MLP head
|
|
@@ -67,7 +118,9 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 67 |
|
| 68 |
# Meta is a single source of truth for extraction and fine-tuning rules.
|
| 69 |
# meta๋ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค์
๋๋ค.
|
| 70 |
-
|
|
|
|
|
|
|
| 71 |
|
| 72 |
# Backbone skeleton is always created without pretrained weights.
|
| 73 |
# backbone skeleton์ ํญ์ pretrained weight ์์ด ์์ฑํฉ๋๋ค.
|
|
@@ -105,7 +158,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 105 |
def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
|
| 106 |
# Meta decides which loader path to use.
|
| 107 |
# meta๊ฐ ์ด๋ค ๋ก๋ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 108 |
-
meta = BACKBONE_META
|
|
|
|
|
|
|
|
|
|
| 109 |
t = meta["type"]
|
| 110 |
|
| 111 |
if t == "timm_densenet":
|
|
@@ -169,7 +225,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 169 |
from_pretrained() ์ดํ ํธ์ถํ๋ฉด ์ฒดํฌํฌ์ธํธ ๊ฐ์ค์น๋ฅผ ๋ฎ์ด์ฐ๋ฏ๋ก ์ ๋ ํธ์ถํ๋ฉด ์ ๋ฉ๋๋ค.
|
| 170 |
"""
|
| 171 |
bb = self.config.backbone_name_or_path
|
| 172 |
-
meta =
|
| 173 |
t = meta["type"]
|
| 174 |
|
| 175 |
if t == "timm_densenet":
|
|
@@ -394,7 +450,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
|
|
| 394 |
_set_requires_grad(model.backbone, False)
|
| 395 |
_set_requires_grad(model.classifier, True)
|
| 396 |
|
| 397 |
-
meta =
|
| 398 |
if freeze_bn and meta.get("has_bn", False):
|
| 399 |
set_bn_eval(model.backbone)
|
| 400 |
|
|
@@ -403,7 +459,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
|
|
| 403 |
# Stage2: train mode, optionally keeping BN layers in eval for stability.
|
| 404 |
# stage2: train ๋ชจ๋๋ก ๋๋ ์์ ์ฑ์ ์ํด BN์ eval๋ก ์ ์งํ ์ ์์ต๋๋ค.
|
| 405 |
model.train()
|
| 406 |
-
meta =
|
| 407 |
if keep_bn_eval and meta.get("has_bn", False):
|
| 408 |
set_bn_eval(model.backbone)
|
| 409 |
|
|
@@ -431,7 +487,7 @@ def unfreeze_last_stage(
|
|
| 431 |
if n <= 0:
|
| 432 |
return
|
| 433 |
|
| 434 |
-
meta =
|
| 435 |
if meta.get("unfreeze") != "last_n":
|
| 436 |
raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
|
| 437 |
|
|
|
|
| 3 |
|
| 4 |
# src/ds_model.py
|
| 5 |
|
| 6 |
+
from typing import Optional, List, Any, Dict
|
| 7 |
|
| 8 |
import torch
|
| 9 |
import torch.nn as nn
|
|
|
|
| 17 |
# --- torchvision ---
|
| 18 |
from torchvision import models as tv_models
|
| 19 |
|
| 20 |
+
from ds_cfg import BackboneMLPHeadConfig, BACKBONE_META
|
| 21 |
+
# from mlp_head import MLPHead
|
| 22 |
+
|
| 23 |
+
class MLPHead(nn.Module):
|
| 24 |
+
"""
|
| 25 |
+
๊ฐ๋จํ 2-layer MLP head.
|
| 26 |
+
|
| 27 |
+
Parameters
|
| 28 |
+
----------
|
| 29 |
+
in_dim : int
|
| 30 |
+
backbone feature dim
|
| 31 |
+
num_labels : int
|
| 32 |
+
class count
|
| 33 |
+
bottleneck : int
|
| 34 |
+
hidden dim
|
| 35 |
+
p : float
|
| 36 |
+
dropout prob
|
| 37 |
+
"""
|
| 38 |
+
def __init__(self, in_dim: int, num_labels: int, bottleneck: int = 256, p: float = 0.2):
|
| 39 |
+
super().__init__()
|
| 40 |
+
self.fc1 = nn.Linear(in_dim, bottleneck)
|
| 41 |
+
self.act = nn.GELU()
|
| 42 |
+
self.drop = nn.Dropout(p)
|
| 43 |
+
self.fc2 = nn.Linear(bottleneck, num_labels)
|
| 44 |
+
|
| 45 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 46 |
+
return self.fc2(self.drop(self.act(self.fc1(x))))
|
| 47 |
+
|
| 48 |
+
# ------------------------------------------------------------
|
| 49 |
+
# backbone_meta resolver
|
| 50 |
+
# ------------------------------------------------------------
|
| 51 |
+
def _resolve_backbone_meta(config: BackboneMLPHeadConfig, fallback_table: Dict[str, Dict[str, Any]] | None = None) -> Dict[str, Any]:
|
| 52 |
+
"""
|
| 53 |
+
Resolve runtime backbone meta.
|
| 54 |
+
|
| 55 |
+
Priority:
|
| 56 |
+
1) config.backbone_meta (preferred; required for Hub runtime determinism)
|
| 57 |
+
2) fallback_table[config.backbone_name_or_path] (backward compatibility for local/dev)
|
| 58 |
+
|
| 59 |
+
Returns a dict with at least: type, feat_rule, feat_dim (and optional has_bn/unfreeze).
|
| 60 |
+
"""
|
| 61 |
+
meta = getattr(config, "backbone_meta", None)
|
| 62 |
+
if isinstance(meta, dict) and len(meta) > 0:
|
| 63 |
+
return meta
|
| 64 |
+
|
| 65 |
+
bb = getattr(config, "backbone_name_or_path", None)
|
| 66 |
+
if fallback_table is not None and bb in fallback_table:
|
| 67 |
+
return fallback_table[bb]
|
| 68 |
+
|
| 69 |
+
raise ValueError(
|
| 70 |
+
"config.backbone_meta is missing/empty and no fallback meta is available. "
|
| 71 |
+
"Populate config.backbone_meta when saving to the Hub (single source of truth)."
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
|
| 75 |
# ============================================================
|
| 76 |
# (3) Model: backbone + MLP head
|
|
|
|
| 118 |
|
| 119 |
# Meta is a single source of truth for extraction and fine-tuning rules.
|
| 120 |
# meta๋ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค์
๋๋ค.
|
| 121 |
+
# Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
|
| 122 |
+
# Prefer config.backbone_meta to keep Hub runtime self-contained.
|
| 123 |
+
self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
|
| 124 |
|
| 125 |
# Backbone skeleton is always created without pretrained weights.
|
| 126 |
# backbone skeleton์ ํญ์ pretrained weight ์์ด ์์ฑํฉ๋๋ค.
|
|
|
|
| 158 |
def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
|
| 159 |
# Meta decides which loader path to use.
|
| 160 |
# meta๊ฐ ์ด๋ค ๋ก๋ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 161 |
+
meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
|
| 162 |
+
if meta is None:
|
| 163 |
+
raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
|
| 164 |
+
|
| 165 |
t = meta["type"]
|
| 166 |
|
| 167 |
if t == "timm_densenet":
|
|
|
|
| 225 |
from_pretrained() ์ดํ ํธ์ถํ๋ฉด ์ฒดํฌํฌ์ธํธ ๊ฐ์ค์น๋ฅผ ๋ฎ์ด์ฐ๋ฏ๋ก ์ ๋ ํธ์ถํ๋ฉด ์ ๋ฉ๋๋ค.
|
| 226 |
"""
|
| 227 |
bb = self.config.backbone_name_or_path
|
| 228 |
+
meta = self._meta
|
| 229 |
t = meta["type"]
|
| 230 |
|
| 231 |
if t == "timm_densenet":
|
|
|
|
| 450 |
_set_requires_grad(model.backbone, False)
|
| 451 |
_set_requires_grad(model.classifier, True)
|
| 452 |
|
| 453 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 454 |
if freeze_bn and meta.get("has_bn", False):
|
| 455 |
set_bn_eval(model.backbone)
|
| 456 |
|
|
|
|
| 459 |
# Stage2: train mode, optionally keeping BN layers in eval for stability.
|
| 460 |
# stage2: train ๋ชจ๋๋ก ๋๋ ์์ ์ฑ์ ์ํด BN์ eval๋ก ์ ์งํ ์ ์์ต๋๋ค.
|
| 461 |
model.train()
|
| 462 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 463 |
if keep_bn_eval and meta.get("has_bn", False):
|
| 464 |
set_bn_eval(model.backbone)
|
| 465 |
|
|
|
|
| 487 |
if n <= 0:
|
| 488 |
return
|
| 489 |
|
| 490 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 491 |
if meta.get("unfreeze") != "last_n":
|
| 492 |
raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
|
| 493 |
|
models/google__vit-base-patch16-224/ds_proc.py
CHANGED
|
@@ -16,7 +16,7 @@ from transformers import AutoImageProcessor, AutoConfig
|
|
| 16 |
from transformers.image_processing_base import ImageProcessingMixin
|
| 17 |
from transformers.utils.generic import TensorType
|
| 18 |
|
| 19 |
-
from
|
| 20 |
|
| 21 |
|
| 22 |
class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
|
|
|
|
| 16 |
from transformers.image_processing_base import ImageProcessingMixin
|
| 17 |
from transformers.utils.generic import TensorType
|
| 18 |
|
| 19 |
+
from ds_cfg import BackboneID, BACKBONE_META
|
| 20 |
|
| 21 |
|
| 22 |
class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
|
models/google__vit-base-patch16-224/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 346372132
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:152041a83fb83b3877e72054e3d74e4542e1b28dba1ca7a70682b03efd68bae4
|
| 3 |
size 346372132
|
models/microsoft__resnet-50/config.json
CHANGED
|
@@ -24,7 +24,7 @@
|
|
| 24 |
"num_labels": 3,
|
| 25 |
"transformers_version": "5.1.0",
|
| 26 |
"ds_provenance": {
|
| 27 |
-
"created_at": "
|
| 28 |
"repo_id": "dsaint31/bb_mlp_224",
|
| 29 |
"subdir": "models/microsoft__resnet-50",
|
| 30 |
"wrapper_class": "BackboneWithMLPHeadForImageClassification",
|
|
@@ -40,15 +40,13 @@
|
|
| 40 |
"root_code_files": [
|
| 41 |
"ds_proc.py",
|
| 42 |
"ds_model.py",
|
| 43 |
-
"ds_cfg.py"
|
| 44 |
-
"ds_meta.py"
|
| 45 |
],
|
| 46 |
"subfolder_code_included": true,
|
| 47 |
"subfolder_code_files": [
|
| 48 |
"ds_proc.py",
|
| 49 |
"ds_model.py",
|
| 50 |
-
"ds_cfg.py"
|
| 51 |
-
"ds_meta.py"
|
| 52 |
],
|
| 53 |
"processor_use_fast": true
|
| 54 |
}
|
|
|
|
| 24 |
"num_labels": 3,
|
| 25 |
"transformers_version": "5.1.0",
|
| 26 |
"ds_provenance": {
|
| 27 |
+
"created_at": "20260210_163348",
|
| 28 |
"repo_id": "dsaint31/bb_mlp_224",
|
| 29 |
"subdir": "models/microsoft__resnet-50",
|
| 30 |
"wrapper_class": "BackboneWithMLPHeadForImageClassification",
|
|
|
|
| 40 |
"root_code_files": [
|
| 41 |
"ds_proc.py",
|
| 42 |
"ds_model.py",
|
| 43 |
+
"ds_cfg.py"
|
|
|
|
| 44 |
],
|
| 45 |
"subfolder_code_included": true,
|
| 46 |
"subfolder_code_files": [
|
| 47 |
"ds_proc.py",
|
| 48 |
"ds_model.py",
|
| 49 |
+
"ds_cfg.py"
|
|
|
|
| 50 |
],
|
| 51 |
"processor_use_fast": true
|
| 52 |
}
|
models/microsoft__resnet-50/ds_cfg.py
CHANGED
|
@@ -1,6 +1,149 @@
|
|
| 1 |
from transformers import PretrainedConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
from ds_meta import BackboneID, BACKBONE_META
|
| 4 |
|
| 5 |
|
| 6 |
class BackboneMLPHeadConfig(PretrainedConfig):
|
|
|
|
| 1 |
from transformers import PretrainedConfig
|
| 2 |
+
from typing import Literal, Any
|
| 3 |
+
|
| 4 |
+
# ============================================================
|
| 5 |
+
# Backbone whitelist + meta registry
|
| 6 |
+
# ============================================================
|
| 7 |
+
|
| 8 |
+
BackboneID = Literal[
|
| 9 |
+
"google/vit-base-patch16-224",
|
| 10 |
+
"microsoft/swin-tiny-patch4-window7-224",
|
| 11 |
+
"microsoft/resnet-50",
|
| 12 |
+
"google/efficientnet-b0",
|
| 13 |
+
"timm/densenet121.tv_in1k",
|
| 14 |
+
"torchvision/densenet121",
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
# ============================================================
|
| 18 |
+
# 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
|
| 19 |
+
# 2) ๋ฐฑ๋ณธ ๋ฉํ ๋ ์ง์คํธ๋ฆฌ (feature dim/rule/unfreeze rule ๊ณ ์ )
|
| 20 |
+
# ============================================================
|
| 21 |
+
# This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
|
| 22 |
+
# ์ด ํ
์ด๋ธ์ backbone๋ณ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค(source of truth)์
๋๋ค.
|
| 23 |
+
#
|
| 24 |
+
# The key type is BackboneID to ensure meta keys never drift from the whitelist.
|
| 25 |
+
# ํค ํ์
์ BackboneID๋ก ๊ณ ์ ํ์ฌ ๋ฉํ ํค๊ฐ ํ์ดํธ๋ฆฌ์คํธ์ ์ด๊ธ๋์ง ์๊ฒ ํฉ๋๋ค.
|
| 26 |
+
BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
|
| 27 |
+
# -------------------------
|
| 28 |
+
# Transformers (ViT/Swin)
|
| 29 |
+
# -------------------------
|
| 30 |
+
# These backbones come from transformers and typically output hidden states and/or pooler outputs.
|
| 31 |
+
# ์ด ๋ฐฑ๋ณธ๋ค์ transformers ๊ณ์ด์ด๋ฉฐ hidden states์ pooler ์ถ๋ ฅ ๋ฑ์ ์ ๊ณตํฉ๋๋ค.
|
| 32 |
+
|
| 33 |
+
"google/vit-base-patch16-224": {
|
| 34 |
+
# type indicates which loading/forward/extraction pathway the model code should use.
|
| 35 |
+
# type์ ๋ชจ๋ธ ์ฝ๋๊ฐ ์ด๋ค ๋ก๋ฉ/forward/feature ์ถ์ถ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 36 |
+
"type": "vit",
|
| 37 |
+
|
| 38 |
+
# feat_dim is the feature vector dimension consumed by the MLP head.
|
| 39 |
+
# feat_dim์ MLP head๊ฐ ์
๋ ฅ์ผ๋ก ๋ฐ๋ feature ๋ฒกํฐ ์ฐจ์์
๋๋ค.
|
| 40 |
+
"feat_dim": 768,
|
| 41 |
+
|
| 42 |
+
# feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
|
| 43 |
+
# feat_rule์ backbone ์ถ๋ ฅ์์ (B, feat_dim) ํ
์๋ฅผ ์ป๋ ๊ท์น์ ์ ์ํฉ๋๋ค.
|
| 44 |
+
"feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
|
| 45 |
+
# last_hidden_state[:, 0, :]๋ฅผ CLS ํ ํฐ ์๋ฒ ๋ฉ์ผ๋ก ์ฌ์ฉํฉ๋๋ค.
|
| 46 |
+
|
| 47 |
+
# unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
|
| 48 |
+
# unfreeze๋ stage2 ๋ฏธ์ธ์กฐ์ ์์ ์ด๋ค ๋ ์ด์ด๋ฅผ ํ์ง ์ ์ฑ
์ ์ ์ํฉ๋๋ค.
|
| 49 |
+
"unfreeze": "last_n", # Unfreeze the last n encoder blocks.
|
| 50 |
+
# encoder ๋ธ๋ก์ ๋ง์ง๋ง n๊ฐ๋ฅผ unfreeze ํฉ๋๋ค.
|
| 51 |
+
|
| 52 |
+
# has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
|
| 53 |
+
# has_bn์ BatchNorm ์กด์ฌ ์ฌ๋ถ์ด๋ฉฐ freeze ์ ํน๋ณ ์ทจ๊ธ์ด ํ์ํ์ง ํ๋จ์ ์ฌ์ฉํฉ๋๋ค.
|
| 54 |
+
"has_bn": False,
|
| 55 |
+
},
|
| 56 |
+
|
| 57 |
+
"microsoft/swin-tiny-patch4-window7-224": {
|
| 58 |
+
# This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
|
| 59 |
+
# ์ด ๋ฐฑ๋ณธ์ Swin Transformer์ด๋ฉฐ ๊ตฌํ์ ๋ฐ๋ผ pooler output ์ ๊ณต ์ฌ๋ถ๊ฐ ๋ฌ๋ผ์ง ์ ์์ต๋๋ค.
|
| 60 |
+
"type": "swin",
|
| 61 |
+
"feat_dim": 768,
|
| 62 |
+
|
| 63 |
+
# Prefer pooler output if available, otherwise fall back to mean pooling.
|
| 64 |
+
# pooler๊ฐ ์์ผ๋ฉด ์ฐ์ ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด mean pooling์ผ๋ก ๋์ฒดํฉ๋๋ค.
|
| 65 |
+
"feat_rule": "pool_or_mean",
|
| 66 |
+
|
| 67 |
+
# Unfreeze strategy is aligned with transformer-style encoder blocks.
|
| 68 |
+
# unfreeze ์ ๋ต์ transformer ๊ณ์ด encoder ๋ธ๋ก ๊ธฐ์ค์ผ๋ก ๋ง์ถฅ๋๋ค.
|
| 69 |
+
"unfreeze": "last_n",
|
| 70 |
+
"has_bn": False,
|
| 71 |
+
},
|
| 72 |
+
|
| 73 |
+
# -------------------------
|
| 74 |
+
# Transformers (CNNs)
|
| 75 |
+
# -------------------------
|
| 76 |
+
# These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
|
| 77 |
+
# ์ด ๋ฐฑ๋ณธ๋ค์ transformers๋ก ๋
ธ์ถ๋ CNN์ด๋ฉฐ pooled feature ๋๋ feature map์ ์ ๊ณตํฉ๋๋ค.
|
| 78 |
+
|
| 79 |
+
"microsoft/resnet-50": {
|
| 80 |
+
# This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
|
| 81 |
+
# ์ด ํญ๋ชฉ์ transformers ํธํ ResNet์ด pooler ๋๋ ์ต์ข
feature map์ ์ ๊ณตํ ์ ์๋ค๊ณ ๊ฐ์ ํฉ๋๋ค.
|
| 82 |
+
"type": "resnet",
|
| 83 |
+
"feat_dim": 2048,
|
| 84 |
+
|
| 85 |
+
# Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
|
| 86 |
+
# pooler๊ฐ ์์ผ๋ฉด ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด global average pooling(GAP)์ ์ ์ฉํฉ๋๋ค.
|
| 87 |
+
"feat_rule": "pool_or_gap",
|
| 88 |
+
|
| 89 |
+
# CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
|
| 90 |
+
# CNN๋ ๋ชจ๋ธ ์ฝ๋์์ block/stage ๋จ์๋ก last_n ์ ์ฑ
์ ์ ์ฉํ ์ ์์ต๋๋ค.
|
| 91 |
+
"unfreeze": "last_n",
|
| 92 |
+
"has_bn": True,
|
| 93 |
+
},
|
| 94 |
+
|
| 95 |
+
"google/efficientnet-b0": {
|
| 96 |
+
# This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
|
| 97 |
+
# ์ด ํญ๋ชฉ์ transformers ํธํ EfficientNet์ด pooled feature ๋๋ ์ต์ข
feature map์ ์ ๊ณตํ๋ค๊ณ ๊ฐ์ ํฉ๋๋ค.
|
| 98 |
+
"type": "efficientnet",
|
| 99 |
+
"feat_dim": 1280,
|
| 100 |
+
"feat_rule": "pool_or_gap",
|
| 101 |
+
"unfreeze": "last_n",
|
| 102 |
+
"has_bn": True,
|
| 103 |
+
},
|
| 104 |
+
|
| 105 |
+
# -------------------------
|
| 106 |
+
# timm (DenseNet via HF Hub)
|
| 107 |
+
# -------------------------
|
| 108 |
+
# This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
|
| 109 |
+
# ์ด ๋ฐฑ๋ณธ์ ๋ชจ๋ธ ๋ก๋์์ timm์ "hf_hub:" ํ๋ฆฌํฝ์ค๋ฅผ ์ฌ์ฉํด ๋ก๋ํฉ๋๋ค.
|
| 110 |
+
"timm/densenet121.tv_in1k": {
|
| 111 |
+
"type": "timm_densenet",
|
| 112 |
+
|
| 113 |
+
# DenseNet-121 final channel dimension is 1024 for the canonical architecture.
|
| 114 |
+
# DenseNet-121์ ํ์ค ์ํคํ
์ฒ์์ ์ต์ข
์ฑ๋ ์ฐจ์์ 1024์
๋๋ค.
|
| 115 |
+
"feat_dim": 1024,
|
| 116 |
+
|
| 117 |
+
# timm forward_features typically returns a feature map that you then GAP to (B, C).
|
| 118 |
+
# timm์ forward_features๋ ๋ณดํต feature map์ ๋ฐํํ๊ณ ์ดํ GAP์ผ๋ก (B, C)๋ฅผ ๋ง๋ญ๋๋ค.
|
| 119 |
+
"feat_rule": "timm_gap",
|
| 120 |
+
|
| 121 |
+
# DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
|
| 122 |
+
# DenseNet์ BatchNorm ์ฌ์ฉ์ด ๋ง์ stage1/stage2์์ freeze_bn ์ฒ๋ฆฌ๊ฐ ์ค์ํฉ๋๋ค.
|
| 123 |
+
"unfreeze": "last_n",
|
| 124 |
+
"has_bn": True,
|
| 125 |
+
},
|
| 126 |
+
|
| 127 |
+
# -------------------------
|
| 128 |
+
# torchvision (DenseNet direct)
|
| 129 |
+
# -------------------------
|
| 130 |
+
# This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
|
| 131 |
+
# ์ด ๋ฐฑ๋ณธ์ transformers/timm์ด ์๋๋ผ torchvision ์คํ์ผ ๋ก๋ฉ ๋ฐ feature ์ถ์ถ์ ๋์์ผ๋ก ํฉ๋๋ค.
|
| 132 |
+
"torchvision/densenet121": {
|
| 133 |
+
"type": "torchvision_densenet",
|
| 134 |
+
"feat_dim": 1024,
|
| 135 |
+
|
| 136 |
+
# torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
|
| 137 |
+
# torchvision DenseNet์ ๋ณดํต .features๋ฅผ ๋
ธ์ถํ๋ฉฐ GAP์ผ๋ก (B, C)๋ฅผ ์ป์ต๋๋ค.
|
| 138 |
+
"feat_rule": "torchvision_densenet_gap",
|
| 139 |
+
|
| 140 |
+
# Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
|
| 141 |
+
# unfreeze ์ ์ฑ
์ last_n์ ์ ์งํ๋, ํด์์ torchvision ๋ชจ๋ ๋ค์ด๋ฐ์ ๋ง์์ผ ํฉ๋๋ค.
|
| 142 |
+
"unfreeze": "last_n",
|
| 143 |
+
"has_bn": True,
|
| 144 |
+
},
|
| 145 |
+
}
|
| 146 |
|
|
|
|
| 147 |
|
| 148 |
|
| 149 |
class BackboneMLPHeadConfig(PretrainedConfig):
|
models/microsoft__resnet-50/ds_model.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
|
| 4 |
# src/ds_model.py
|
| 5 |
|
| 6 |
-
from typing import Optional, List
|
| 7 |
|
| 8 |
import torch
|
| 9 |
import torch.nn as nn
|
|
@@ -17,9 +17,60 @@ from transformers.modeling_outputs import ImageClassifierOutput
|
|
| 17 |
# --- torchvision ---
|
| 18 |
from torchvision import models as tv_models
|
| 19 |
|
| 20 |
-
from ds_cfg import BackboneMLPHeadConfig
|
| 21 |
-
from
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# ============================================================
|
| 25 |
# (3) Model: backbone + MLP head
|
|
@@ -67,7 +118,9 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 67 |
|
| 68 |
# Meta is a single source of truth for extraction and fine-tuning rules.
|
| 69 |
# meta๋ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค์
๋๋ค.
|
| 70 |
-
|
|
|
|
|
|
|
| 71 |
|
| 72 |
# Backbone skeleton is always created without pretrained weights.
|
| 73 |
# backbone skeleton์ ํญ์ pretrained weight ์์ด ์์ฑํฉ๋๋ค.
|
|
@@ -105,7 +158,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 105 |
def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
|
| 106 |
# Meta decides which loader path to use.
|
| 107 |
# meta๊ฐ ์ด๋ค ๋ก๋ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 108 |
-
meta = BACKBONE_META
|
|
|
|
|
|
|
|
|
|
| 109 |
t = meta["type"]
|
| 110 |
|
| 111 |
if t == "timm_densenet":
|
|
@@ -169,7 +225,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 169 |
from_pretrained() ์ดํ ํธ์ถํ๋ฉด ์ฒดํฌํฌ์ธํธ ๊ฐ์ค์น๋ฅผ ๋ฎ์ด์ฐ๋ฏ๋ก ์ ๋ ํธ์ถํ๋ฉด ์ ๋ฉ๋๋ค.
|
| 170 |
"""
|
| 171 |
bb = self.config.backbone_name_or_path
|
| 172 |
-
meta =
|
| 173 |
t = meta["type"]
|
| 174 |
|
| 175 |
if t == "timm_densenet":
|
|
@@ -394,7 +450,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
|
|
| 394 |
_set_requires_grad(model.backbone, False)
|
| 395 |
_set_requires_grad(model.classifier, True)
|
| 396 |
|
| 397 |
-
meta =
|
| 398 |
if freeze_bn and meta.get("has_bn", False):
|
| 399 |
set_bn_eval(model.backbone)
|
| 400 |
|
|
@@ -403,7 +459,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
|
|
| 403 |
# Stage2: train mode, optionally keeping BN layers in eval for stability.
|
| 404 |
# stage2: train ๋ชจ๋๋ก ๋๋ ์์ ์ฑ์ ์ํด BN์ eval๋ก ์ ์งํ ์ ์์ต๋๋ค.
|
| 405 |
model.train()
|
| 406 |
-
meta =
|
| 407 |
if keep_bn_eval and meta.get("has_bn", False):
|
| 408 |
set_bn_eval(model.backbone)
|
| 409 |
|
|
@@ -431,7 +487,7 @@ def unfreeze_last_stage(
|
|
| 431 |
if n <= 0:
|
| 432 |
return
|
| 433 |
|
| 434 |
-
meta =
|
| 435 |
if meta.get("unfreeze") != "last_n":
|
| 436 |
raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
|
| 437 |
|
|
|
|
| 3 |
|
| 4 |
# src/ds_model.py
|
| 5 |
|
| 6 |
+
from typing import Optional, List, Any, Dict
|
| 7 |
|
| 8 |
import torch
|
| 9 |
import torch.nn as nn
|
|
|
|
| 17 |
# --- torchvision ---
|
| 18 |
from torchvision import models as tv_models
|
| 19 |
|
| 20 |
+
from ds_cfg import BackboneMLPHeadConfig, BACKBONE_META
|
| 21 |
+
# from mlp_head import MLPHead
|
| 22 |
+
|
| 23 |
+
class MLPHead(nn.Module):
|
| 24 |
+
"""
|
| 25 |
+
๊ฐ๋จํ 2-layer MLP head.
|
| 26 |
+
|
| 27 |
+
Parameters
|
| 28 |
+
----------
|
| 29 |
+
in_dim : int
|
| 30 |
+
backbone feature dim
|
| 31 |
+
num_labels : int
|
| 32 |
+
class count
|
| 33 |
+
bottleneck : int
|
| 34 |
+
hidden dim
|
| 35 |
+
p : float
|
| 36 |
+
dropout prob
|
| 37 |
+
"""
|
| 38 |
+
def __init__(self, in_dim: int, num_labels: int, bottleneck: int = 256, p: float = 0.2):
|
| 39 |
+
super().__init__()
|
| 40 |
+
self.fc1 = nn.Linear(in_dim, bottleneck)
|
| 41 |
+
self.act = nn.GELU()
|
| 42 |
+
self.drop = nn.Dropout(p)
|
| 43 |
+
self.fc2 = nn.Linear(bottleneck, num_labels)
|
| 44 |
+
|
| 45 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 46 |
+
return self.fc2(self.drop(self.act(self.fc1(x))))
|
| 47 |
+
|
| 48 |
+
# ------------------------------------------------------------
|
| 49 |
+
# backbone_meta resolver
|
| 50 |
+
# ------------------------------------------------------------
|
| 51 |
+
def _resolve_backbone_meta(config: BackboneMLPHeadConfig, fallback_table: Dict[str, Dict[str, Any]] | None = None) -> Dict[str, Any]:
|
| 52 |
+
"""
|
| 53 |
+
Resolve runtime backbone meta.
|
| 54 |
+
|
| 55 |
+
Priority:
|
| 56 |
+
1) config.backbone_meta (preferred; required for Hub runtime determinism)
|
| 57 |
+
2) fallback_table[config.backbone_name_or_path] (backward compatibility for local/dev)
|
| 58 |
+
|
| 59 |
+
Returns a dict with at least: type, feat_rule, feat_dim (and optional has_bn/unfreeze).
|
| 60 |
+
"""
|
| 61 |
+
meta = getattr(config, "backbone_meta", None)
|
| 62 |
+
if isinstance(meta, dict) and len(meta) > 0:
|
| 63 |
+
return meta
|
| 64 |
+
|
| 65 |
+
bb = getattr(config, "backbone_name_or_path", None)
|
| 66 |
+
if fallback_table is not None and bb in fallback_table:
|
| 67 |
+
return fallback_table[bb]
|
| 68 |
+
|
| 69 |
+
raise ValueError(
|
| 70 |
+
"config.backbone_meta is missing/empty and no fallback meta is available. "
|
| 71 |
+
"Populate config.backbone_meta when saving to the Hub (single source of truth)."
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
|
| 75 |
# ============================================================
|
| 76 |
# (3) Model: backbone + MLP head
|
|
|
|
| 118 |
|
| 119 |
# Meta is a single source of truth for extraction and fine-tuning rules.
|
| 120 |
# meta๋ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค์
๋๋ค.
|
| 121 |
+
# Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
|
| 122 |
+
# Prefer config.backbone_meta to keep Hub runtime self-contained.
|
| 123 |
+
self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
|
| 124 |
|
| 125 |
# Backbone skeleton is always created without pretrained weights.
|
| 126 |
# backbone skeleton์ ํญ์ pretrained weight ์์ด ์์ฑํฉ๋๋ค.
|
|
|
|
| 158 |
def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
|
| 159 |
# Meta decides which loader path to use.
|
| 160 |
# meta๊ฐ ์ด๋ค ๋ก๋ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 161 |
+
meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
|
| 162 |
+
if meta is None:
|
| 163 |
+
raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
|
| 164 |
+
|
| 165 |
t = meta["type"]
|
| 166 |
|
| 167 |
if t == "timm_densenet":
|
|
|
|
| 225 |
from_pretrained() ์ดํ ํธ์ถํ๋ฉด ์ฒดํฌํฌ์ธํธ ๊ฐ์ค์น๋ฅผ ๋ฎ์ด์ฐ๋ฏ๋ก ์ ๋ ํธ์ถํ๋ฉด ์ ๋ฉ๋๋ค.
|
| 226 |
"""
|
| 227 |
bb = self.config.backbone_name_or_path
|
| 228 |
+
meta = self._meta
|
| 229 |
t = meta["type"]
|
| 230 |
|
| 231 |
if t == "timm_densenet":
|
|
|
|
| 450 |
_set_requires_grad(model.backbone, False)
|
| 451 |
_set_requires_grad(model.classifier, True)
|
| 452 |
|
| 453 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 454 |
if freeze_bn and meta.get("has_bn", False):
|
| 455 |
set_bn_eval(model.backbone)
|
| 456 |
|
|
|
|
| 459 |
# Stage2: train mode, optionally keeping BN layers in eval for stability.
|
| 460 |
# stage2: train ๋ชจ๋๋ก ๋๋ ์์ ์ฑ์ ์ํด BN์ eval๋ก ์ ์งํ ์ ์์ต๋๋ค.
|
| 461 |
model.train()
|
| 462 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 463 |
if keep_bn_eval and meta.get("has_bn", False):
|
| 464 |
set_bn_eval(model.backbone)
|
| 465 |
|
|
|
|
| 487 |
if n <= 0:
|
| 488 |
return
|
| 489 |
|
| 490 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 491 |
if meta.get("unfreeze") != "last_n":
|
| 492 |
raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
|
| 493 |
|
models/microsoft__resnet-50/ds_proc.py
CHANGED
|
@@ -16,7 +16,7 @@ from transformers import AutoImageProcessor, AutoConfig
|
|
| 16 |
from transformers.image_processing_base import ImageProcessingMixin
|
| 17 |
from transformers.utils.generic import TensorType
|
| 18 |
|
| 19 |
-
from
|
| 20 |
|
| 21 |
|
| 22 |
class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
|
|
|
|
| 16 |
from transformers.image_processing_base import ImageProcessingMixin
|
| 17 |
from transformers.utils.generic import TensorType
|
| 18 |
|
| 19 |
+
from ds_cfg import BackboneID, BACKBONE_META
|
| 20 |
|
| 21 |
|
| 22 |
class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
|
models/microsoft__resnet-50/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 96388660
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32610cc9f181dd7de16c79732221106bf7431ae82693471b9410446efe103482
|
| 3 |
size 96388660
|
models/microsoft__swin-tiny-patch4-window7-224/config.json
CHANGED
|
@@ -24,7 +24,7 @@
|
|
| 24 |
"num_labels": 3,
|
| 25 |
"transformers_version": "5.1.0",
|
| 26 |
"ds_provenance": {
|
| 27 |
-
"created_at": "
|
| 28 |
"repo_id": "dsaint31/bb_mlp_224",
|
| 29 |
"subdir": "models/microsoft__swin-tiny-patch4-window7-224",
|
| 30 |
"wrapper_class": "BackboneWithMLPHeadForImageClassification",
|
|
@@ -40,15 +40,13 @@
|
|
| 40 |
"root_code_files": [
|
| 41 |
"ds_proc.py",
|
| 42 |
"ds_model.py",
|
| 43 |
-
"ds_cfg.py"
|
| 44 |
-
"ds_meta.py"
|
| 45 |
],
|
| 46 |
"subfolder_code_included": true,
|
| 47 |
"subfolder_code_files": [
|
| 48 |
"ds_proc.py",
|
| 49 |
"ds_model.py",
|
| 50 |
-
"ds_cfg.py"
|
| 51 |
-
"ds_meta.py"
|
| 52 |
],
|
| 53 |
"processor_use_fast": true
|
| 54 |
}
|
|
|
|
| 24 |
"num_labels": 3,
|
| 25 |
"transformers_version": "5.1.0",
|
| 26 |
"ds_provenance": {
|
| 27 |
+
"created_at": "20260210_163348",
|
| 28 |
"repo_id": "dsaint31/bb_mlp_224",
|
| 29 |
"subdir": "models/microsoft__swin-tiny-patch4-window7-224",
|
| 30 |
"wrapper_class": "BackboneWithMLPHeadForImageClassification",
|
|
|
|
| 40 |
"root_code_files": [
|
| 41 |
"ds_proc.py",
|
| 42 |
"ds_model.py",
|
| 43 |
+
"ds_cfg.py"
|
|
|
|
| 44 |
],
|
| 45 |
"subfolder_code_included": true,
|
| 46 |
"subfolder_code_files": [
|
| 47 |
"ds_proc.py",
|
| 48 |
"ds_model.py",
|
| 49 |
+
"ds_cfg.py"
|
|
|
|
| 50 |
],
|
| 51 |
"processor_use_fast": true
|
| 52 |
}
|
models/microsoft__swin-tiny-patch4-window7-224/ds_cfg.py
CHANGED
|
@@ -1,6 +1,149 @@
|
|
| 1 |
from transformers import PretrainedConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
from ds_meta import BackboneID, BACKBONE_META
|
| 4 |
|
| 5 |
|
| 6 |
class BackboneMLPHeadConfig(PretrainedConfig):
|
|
|
|
| 1 |
from transformers import PretrainedConfig
|
| 2 |
+
from typing import Literal, Any
|
| 3 |
+
|
| 4 |
+
# ============================================================
|
| 5 |
+
# Backbone whitelist + meta registry
|
| 6 |
+
# ============================================================
|
| 7 |
+
|
| 8 |
+
BackboneID = Literal[
|
| 9 |
+
"google/vit-base-patch16-224",
|
| 10 |
+
"microsoft/swin-tiny-patch4-window7-224",
|
| 11 |
+
"microsoft/resnet-50",
|
| 12 |
+
"google/efficientnet-b0",
|
| 13 |
+
"timm/densenet121.tv_in1k",
|
| 14 |
+
"torchvision/densenet121",
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
# ============================================================
|
| 18 |
+
# 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
|
| 19 |
+
# 2) ๋ฐฑ๋ณธ ๋ฉํ ๋ ์ง์คํธ๋ฆฌ (feature dim/rule/unfreeze rule ๊ณ ์ )
|
| 20 |
+
# ============================================================
|
| 21 |
+
# This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
|
| 22 |
+
# ์ด ํ
์ด๋ธ์ backbone๋ณ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค(source of truth)์
๋๋ค.
|
| 23 |
+
#
|
| 24 |
+
# The key type is BackboneID to ensure meta keys never drift from the whitelist.
|
| 25 |
+
# ํค ํ์
์ BackboneID๋ก ๊ณ ์ ํ์ฌ ๋ฉํ ํค๊ฐ ํ์ดํธ๋ฆฌ์คํธ์ ์ด๊ธ๋์ง ์๊ฒ ํฉ๋๋ค.
|
| 26 |
+
BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
|
| 27 |
+
# -------------------------
|
| 28 |
+
# Transformers (ViT/Swin)
|
| 29 |
+
# -------------------------
|
| 30 |
+
# These backbones come from transformers and typically output hidden states and/or pooler outputs.
|
| 31 |
+
# ์ด ๋ฐฑ๋ณธ๋ค์ transformers ๊ณ์ด์ด๋ฉฐ hidden states์ pooler ์ถ๋ ฅ ๋ฑ์ ์ ๊ณตํฉ๋๋ค.
|
| 32 |
+
|
| 33 |
+
"google/vit-base-patch16-224": {
|
| 34 |
+
# type indicates which loading/forward/extraction pathway the model code should use.
|
| 35 |
+
# type์ ๋ชจ๋ธ ์ฝ๋๊ฐ ์ด๋ค ๋ก๋ฉ/forward/feature ์ถ์ถ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 36 |
+
"type": "vit",
|
| 37 |
+
|
| 38 |
+
# feat_dim is the feature vector dimension consumed by the MLP head.
|
| 39 |
+
# feat_dim์ MLP head๊ฐ ์
๋ ฅ์ผ๋ก ๋ฐ๋ feature ๋ฒกํฐ ์ฐจ์์
๋๋ค.
|
| 40 |
+
"feat_dim": 768,
|
| 41 |
+
|
| 42 |
+
# feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
|
| 43 |
+
# feat_rule์ backbone ์ถ๋ ฅ์์ (B, feat_dim) ํ
์๋ฅผ ์ป๋ ๊ท์น์ ์ ์ํฉ๋๋ค.
|
| 44 |
+
"feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
|
| 45 |
+
# last_hidden_state[:, 0, :]๋ฅผ CLS ํ ํฐ ์๋ฒ ๋ฉ์ผ๋ก ์ฌ์ฉํฉ๋๋ค.
|
| 46 |
+
|
| 47 |
+
# unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
|
| 48 |
+
# unfreeze๋ stage2 ๋ฏธ์ธ์กฐ์ ์์ ์ด๋ค ๋ ์ด์ด๋ฅผ ํ์ง ์ ์ฑ
์ ์ ์ํฉ๋๋ค.
|
| 49 |
+
"unfreeze": "last_n", # Unfreeze the last n encoder blocks.
|
| 50 |
+
# encoder ๋ธ๋ก์ ๋ง์ง๋ง n๊ฐ๋ฅผ unfreeze ํฉ๋๋ค.
|
| 51 |
+
|
| 52 |
+
# has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
|
| 53 |
+
# has_bn์ BatchNorm ์กด์ฌ ์ฌ๋ถ์ด๋ฉฐ freeze ์ ํน๋ณ ์ทจ๊ธ์ด ํ์ํ์ง ํ๋จ์ ์ฌ์ฉํฉ๋๋ค.
|
| 54 |
+
"has_bn": False,
|
| 55 |
+
},
|
| 56 |
+
|
| 57 |
+
"microsoft/swin-tiny-patch4-window7-224": {
|
| 58 |
+
# This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
|
| 59 |
+
# ์ด ๋ฐฑ๋ณธ์ Swin Transformer์ด๋ฉฐ ๊ตฌํ์ ๋ฐ๋ผ pooler output ์ ๊ณต ์ฌ๋ถ๊ฐ ๋ฌ๋ผ์ง ์ ์์ต๋๋ค.
|
| 60 |
+
"type": "swin",
|
| 61 |
+
"feat_dim": 768,
|
| 62 |
+
|
| 63 |
+
# Prefer pooler output if available, otherwise fall back to mean pooling.
|
| 64 |
+
# pooler๊ฐ ์์ผ๋ฉด ์ฐ์ ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด mean pooling์ผ๋ก ๋์ฒดํฉ๋๋ค.
|
| 65 |
+
"feat_rule": "pool_or_mean",
|
| 66 |
+
|
| 67 |
+
# Unfreeze strategy is aligned with transformer-style encoder blocks.
|
| 68 |
+
# unfreeze ์ ๋ต์ transformer ๊ณ์ด encoder ๋ธ๋ก ๊ธฐ์ค์ผ๋ก ๋ง์ถฅ๋๋ค.
|
| 69 |
+
"unfreeze": "last_n",
|
| 70 |
+
"has_bn": False,
|
| 71 |
+
},
|
| 72 |
+
|
| 73 |
+
# -------------------------
|
| 74 |
+
# Transformers (CNNs)
|
| 75 |
+
# -------------------------
|
| 76 |
+
# These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
|
| 77 |
+
# ์ด ๋ฐฑ๋ณธ๋ค์ transformers๋ก ๋
ธ์ถ๋ CNN์ด๋ฉฐ pooled feature ๋๋ feature map์ ์ ๊ณตํฉ๋๋ค.
|
| 78 |
+
|
| 79 |
+
"microsoft/resnet-50": {
|
| 80 |
+
# This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
|
| 81 |
+
# ์ด ํญ๋ชฉ์ transformers ํธํ ResNet์ด pooler ๋๋ ์ต์ข
feature map์ ์ ๊ณตํ ์ ์๋ค๊ณ ๊ฐ์ ํฉ๋๋ค.
|
| 82 |
+
"type": "resnet",
|
| 83 |
+
"feat_dim": 2048,
|
| 84 |
+
|
| 85 |
+
# Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
|
| 86 |
+
# pooler๊ฐ ์์ผ๋ฉด ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด global average pooling(GAP)์ ์ ์ฉํฉ๋๋ค.
|
| 87 |
+
"feat_rule": "pool_or_gap",
|
| 88 |
+
|
| 89 |
+
# CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
|
| 90 |
+
# CNN๋ ๋ชจ๋ธ ์ฝ๋์์ block/stage ๋จ์๋ก last_n ์ ์ฑ
์ ์ ์ฉํ ์ ์์ต๋๋ค.
|
| 91 |
+
"unfreeze": "last_n",
|
| 92 |
+
"has_bn": True,
|
| 93 |
+
},
|
| 94 |
+
|
| 95 |
+
"google/efficientnet-b0": {
|
| 96 |
+
# This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
|
| 97 |
+
# ์ด ํญ๋ชฉ์ transformers ํธํ EfficientNet์ด pooled feature ๋๋ ์ต์ข
feature map์ ์ ๊ณตํ๋ค๊ณ ๊ฐ์ ํฉ๋๋ค.
|
| 98 |
+
"type": "efficientnet",
|
| 99 |
+
"feat_dim": 1280,
|
| 100 |
+
"feat_rule": "pool_or_gap",
|
| 101 |
+
"unfreeze": "last_n",
|
| 102 |
+
"has_bn": True,
|
| 103 |
+
},
|
| 104 |
+
|
| 105 |
+
# -------------------------
|
| 106 |
+
# timm (DenseNet via HF Hub)
|
| 107 |
+
# -------------------------
|
| 108 |
+
# This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
|
| 109 |
+
# ์ด ๋ฐฑ๋ณธ์ ๋ชจ๋ธ ๋ก๋์์ timm์ "hf_hub:" ํ๋ฆฌํฝ์ค๋ฅผ ์ฌ์ฉํด ๋ก๋ํฉ๋๋ค.
|
| 110 |
+
"timm/densenet121.tv_in1k": {
|
| 111 |
+
"type": "timm_densenet",
|
| 112 |
+
|
| 113 |
+
# DenseNet-121 final channel dimension is 1024 for the canonical architecture.
|
| 114 |
+
# DenseNet-121์ ํ์ค ์ํคํ
์ฒ์์ ์ต์ข
์ฑ๋ ์ฐจ์์ 1024์
๋๋ค.
|
| 115 |
+
"feat_dim": 1024,
|
| 116 |
+
|
| 117 |
+
# timm forward_features typically returns a feature map that you then GAP to (B, C).
|
| 118 |
+
# timm์ forward_features๋ ๋ณดํต feature map์ ๋ฐํํ๊ณ ์ดํ GAP์ผ๋ก (B, C)๋ฅผ ๋ง๋ญ๋๋ค.
|
| 119 |
+
"feat_rule": "timm_gap",
|
| 120 |
+
|
| 121 |
+
# DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
|
| 122 |
+
# DenseNet์ BatchNorm ์ฌ์ฉ์ด ๋ง์ stage1/stage2์์ freeze_bn ์ฒ๋ฆฌ๊ฐ ์ค์ํฉ๋๋ค.
|
| 123 |
+
"unfreeze": "last_n",
|
| 124 |
+
"has_bn": True,
|
| 125 |
+
},
|
| 126 |
+
|
| 127 |
+
# -------------------------
|
| 128 |
+
# torchvision (DenseNet direct)
|
| 129 |
+
# -------------------------
|
| 130 |
+
# This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
|
| 131 |
+
# ์ด ๋ฐฑ๋ณธ์ transformers/timm์ด ์๋๋ผ torchvision ์คํ์ผ ๋ก๋ฉ ๋ฐ feature ์ถ์ถ์ ๋์์ผ๋ก ํฉ๋๋ค.
|
| 132 |
+
"torchvision/densenet121": {
|
| 133 |
+
"type": "torchvision_densenet",
|
| 134 |
+
"feat_dim": 1024,
|
| 135 |
+
|
| 136 |
+
# torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
|
| 137 |
+
# torchvision DenseNet์ ๋ณดํต .features๋ฅผ ๋
ธ์ถํ๋ฉฐ GAP์ผ๋ก (B, C)๋ฅผ ์ป์ต๋๋ค.
|
| 138 |
+
"feat_rule": "torchvision_densenet_gap",
|
| 139 |
+
|
| 140 |
+
# Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
|
| 141 |
+
# unfreeze ์ ์ฑ
์ last_n์ ์ ์งํ๋, ํด์์ torchvision ๋ชจ๋ ๋ค์ด๋ฐ์ ๋ง์์ผ ํฉ๋๋ค.
|
| 142 |
+
"unfreeze": "last_n",
|
| 143 |
+
"has_bn": True,
|
| 144 |
+
},
|
| 145 |
+
}
|
| 146 |
|
|
|
|
| 147 |
|
| 148 |
|
| 149 |
class BackboneMLPHeadConfig(PretrainedConfig):
|
models/microsoft__swin-tiny-patch4-window7-224/ds_model.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
|
| 4 |
# src/ds_model.py
|
| 5 |
|
| 6 |
-
from typing import Optional, List
|
| 7 |
|
| 8 |
import torch
|
| 9 |
import torch.nn as nn
|
|
@@ -17,9 +17,60 @@ from transformers.modeling_outputs import ImageClassifierOutput
|
|
| 17 |
# --- torchvision ---
|
| 18 |
from torchvision import models as tv_models
|
| 19 |
|
| 20 |
-
from ds_cfg import BackboneMLPHeadConfig
|
| 21 |
-
from
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# ============================================================
|
| 25 |
# (3) Model: backbone + MLP head
|
|
@@ -67,7 +118,9 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 67 |
|
| 68 |
# Meta is a single source of truth for extraction and fine-tuning rules.
|
| 69 |
# meta๋ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค์
๋๋ค.
|
| 70 |
-
|
|
|
|
|
|
|
| 71 |
|
| 72 |
# Backbone skeleton is always created without pretrained weights.
|
| 73 |
# backbone skeleton์ ํญ์ pretrained weight ์์ด ์์ฑํฉ๋๋ค.
|
|
@@ -105,7 +158,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 105 |
def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
|
| 106 |
# Meta decides which loader path to use.
|
| 107 |
# meta๊ฐ ์ด๋ค ๋ก๋ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 108 |
-
meta = BACKBONE_META
|
|
|
|
|
|
|
|
|
|
| 109 |
t = meta["type"]
|
| 110 |
|
| 111 |
if t == "timm_densenet":
|
|
@@ -169,7 +225,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 169 |
from_pretrained() ์ดํ ํธ์ถํ๋ฉด ์ฒดํฌํฌ์ธํธ ๊ฐ์ค์น๋ฅผ ๋ฎ์ด์ฐ๋ฏ๋ก ์ ๋ ํธ์ถํ๋ฉด ์ ๋ฉ๋๋ค.
|
| 170 |
"""
|
| 171 |
bb = self.config.backbone_name_or_path
|
| 172 |
-
meta =
|
| 173 |
t = meta["type"]
|
| 174 |
|
| 175 |
if t == "timm_densenet":
|
|
@@ -394,7 +450,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
|
|
| 394 |
_set_requires_grad(model.backbone, False)
|
| 395 |
_set_requires_grad(model.classifier, True)
|
| 396 |
|
| 397 |
-
meta =
|
| 398 |
if freeze_bn and meta.get("has_bn", False):
|
| 399 |
set_bn_eval(model.backbone)
|
| 400 |
|
|
@@ -403,7 +459,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
|
|
| 403 |
# Stage2: train mode, optionally keeping BN layers in eval for stability.
|
| 404 |
# stage2: train ๋ชจ๋๋ก ๋๋ ์์ ์ฑ์ ์ํด BN์ eval๋ก ์ ์งํ ์ ์์ต๋๋ค.
|
| 405 |
model.train()
|
| 406 |
-
meta =
|
| 407 |
if keep_bn_eval and meta.get("has_bn", False):
|
| 408 |
set_bn_eval(model.backbone)
|
| 409 |
|
|
@@ -431,7 +487,7 @@ def unfreeze_last_stage(
|
|
| 431 |
if n <= 0:
|
| 432 |
return
|
| 433 |
|
| 434 |
-
meta =
|
| 435 |
if meta.get("unfreeze") != "last_n":
|
| 436 |
raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
|
| 437 |
|
|
|
|
| 3 |
|
| 4 |
# src/ds_model.py
|
| 5 |
|
| 6 |
+
from typing import Optional, List, Any, Dict
|
| 7 |
|
| 8 |
import torch
|
| 9 |
import torch.nn as nn
|
|
|
|
| 17 |
# --- torchvision ---
|
| 18 |
from torchvision import models as tv_models
|
| 19 |
|
| 20 |
+
from ds_cfg import BackboneMLPHeadConfig, BACKBONE_META
|
| 21 |
+
# from mlp_head import MLPHead
|
| 22 |
+
|
| 23 |
+
class MLPHead(nn.Module):
|
| 24 |
+
"""
|
| 25 |
+
๊ฐ๋จํ 2-layer MLP head.
|
| 26 |
+
|
| 27 |
+
Parameters
|
| 28 |
+
----------
|
| 29 |
+
in_dim : int
|
| 30 |
+
backbone feature dim
|
| 31 |
+
num_labels : int
|
| 32 |
+
class count
|
| 33 |
+
bottleneck : int
|
| 34 |
+
hidden dim
|
| 35 |
+
p : float
|
| 36 |
+
dropout prob
|
| 37 |
+
"""
|
| 38 |
+
def __init__(self, in_dim: int, num_labels: int, bottleneck: int = 256, p: float = 0.2):
|
| 39 |
+
super().__init__()
|
| 40 |
+
self.fc1 = nn.Linear(in_dim, bottleneck)
|
| 41 |
+
self.act = nn.GELU()
|
| 42 |
+
self.drop = nn.Dropout(p)
|
| 43 |
+
self.fc2 = nn.Linear(bottleneck, num_labels)
|
| 44 |
+
|
| 45 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 46 |
+
return self.fc2(self.drop(self.act(self.fc1(x))))
|
| 47 |
+
|
| 48 |
+
# ------------------------------------------------------------
|
| 49 |
+
# backbone_meta resolver
|
| 50 |
+
# ------------------------------------------------------------
|
| 51 |
+
def _resolve_backbone_meta(config: BackboneMLPHeadConfig, fallback_table: Dict[str, Dict[str, Any]] | None = None) -> Dict[str, Any]:
|
| 52 |
+
"""
|
| 53 |
+
Resolve runtime backbone meta.
|
| 54 |
+
|
| 55 |
+
Priority:
|
| 56 |
+
1) config.backbone_meta (preferred; required for Hub runtime determinism)
|
| 57 |
+
2) fallback_table[config.backbone_name_or_path] (backward compatibility for local/dev)
|
| 58 |
+
|
| 59 |
+
Returns a dict with at least: type, feat_rule, feat_dim (and optional has_bn/unfreeze).
|
| 60 |
+
"""
|
| 61 |
+
meta = getattr(config, "backbone_meta", None)
|
| 62 |
+
if isinstance(meta, dict) and len(meta) > 0:
|
| 63 |
+
return meta
|
| 64 |
+
|
| 65 |
+
bb = getattr(config, "backbone_name_or_path", None)
|
| 66 |
+
if fallback_table is not None and bb in fallback_table:
|
| 67 |
+
return fallback_table[bb]
|
| 68 |
+
|
| 69 |
+
raise ValueError(
|
| 70 |
+
"config.backbone_meta is missing/empty and no fallback meta is available. "
|
| 71 |
+
"Populate config.backbone_meta when saving to the Hub (single source of truth)."
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
|
| 75 |
# ============================================================
|
| 76 |
# (3) Model: backbone + MLP head
|
|
|
|
| 118 |
|
| 119 |
# Meta is a single source of truth for extraction and fine-tuning rules.
|
| 120 |
# meta๋ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค์
๋๋ค.
|
| 121 |
+
# Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
|
| 122 |
+
# Prefer config.backbone_meta to keep Hub runtime self-contained.
|
| 123 |
+
self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
|
| 124 |
|
| 125 |
# Backbone skeleton is always created without pretrained weights.
|
| 126 |
# backbone skeleton์ ํญ์ pretrained weight ์์ด ์์ฑํฉ๋๋ค.
|
|
|
|
| 158 |
def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
|
| 159 |
# Meta decides which loader path to use.
|
| 160 |
# meta๊ฐ ์ด๋ค ๋ก๋ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 161 |
+
meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
|
| 162 |
+
if meta is None:
|
| 163 |
+
raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
|
| 164 |
+
|
| 165 |
t = meta["type"]
|
| 166 |
|
| 167 |
if t == "timm_densenet":
|
|
|
|
| 225 |
from_pretrained() ์ดํ ํธ์ถํ๋ฉด ์ฒดํฌํฌ์ธํธ ๊ฐ์ค์น๋ฅผ ๋ฎ์ด์ฐ๋ฏ๋ก ์ ๋ ํธ์ถํ๋ฉด ์ ๋ฉ๋๋ค.
|
| 226 |
"""
|
| 227 |
bb = self.config.backbone_name_or_path
|
| 228 |
+
meta = self._meta
|
| 229 |
t = meta["type"]
|
| 230 |
|
| 231 |
if t == "timm_densenet":
|
|
|
|
| 450 |
_set_requires_grad(model.backbone, False)
|
| 451 |
_set_requires_grad(model.classifier, True)
|
| 452 |
|
| 453 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 454 |
if freeze_bn and meta.get("has_bn", False):
|
| 455 |
set_bn_eval(model.backbone)
|
| 456 |
|
|
|
|
| 459 |
# Stage2: train mode, optionally keeping BN layers in eval for stability.
|
| 460 |
# stage2: train ๋ชจ๋๋ก ๋๋ ์์ ์ฑ์ ์ํด BN์ eval๋ก ์ ์งํ ์ ์์ต๋๋ค.
|
| 461 |
model.train()
|
| 462 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 463 |
if keep_bn_eval and meta.get("has_bn", False):
|
| 464 |
set_bn_eval(model.backbone)
|
| 465 |
|
|
|
|
| 487 |
if n <= 0:
|
| 488 |
return
|
| 489 |
|
| 490 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 491 |
if meta.get("unfreeze") != "last_n":
|
| 492 |
raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
|
| 493 |
|
models/microsoft__swin-tiny-patch4-window7-224/ds_proc.py
CHANGED
|
@@ -16,7 +16,7 @@ from transformers import AutoImageProcessor, AutoConfig
|
|
| 16 |
from transformers.image_processing_base import ImageProcessingMixin
|
| 17 |
from transformers.utils.generic import TensorType
|
| 18 |
|
| 19 |
-
from
|
| 20 |
|
| 21 |
|
| 22 |
class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
|
|
|
|
| 16 |
from transformers.image_processing_base import ImageProcessingMixin
|
| 17 |
from transformers.utils.generic import TensorType
|
| 18 |
|
| 19 |
+
from ds_cfg import BackboneID, BACKBONE_META
|
| 20 |
|
| 21 |
|
| 22 |
class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
|
models/microsoft__swin-tiny-patch4-window7-224/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 111128348
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29c4e394ff3e53d723a82b9911acfd072ca4259b7ac667e87eadee77b645ee84
|
| 3 |
size 111128348
|
models/timm__densenet121.tv_in1k/config.json
CHANGED
|
@@ -24,7 +24,7 @@
|
|
| 24 |
"num_labels": 3,
|
| 25 |
"transformers_version": "5.1.0",
|
| 26 |
"ds_provenance": {
|
| 27 |
-
"created_at": "
|
| 28 |
"repo_id": "dsaint31/bb_mlp_224",
|
| 29 |
"subdir": "models/timm__densenet121.tv_in1k",
|
| 30 |
"wrapper_class": "BackboneWithMLPHeadForImageClassification",
|
|
@@ -40,15 +40,13 @@
|
|
| 40 |
"root_code_files": [
|
| 41 |
"ds_proc.py",
|
| 42 |
"ds_model.py",
|
| 43 |
-
"ds_cfg.py"
|
| 44 |
-
"ds_meta.py"
|
| 45 |
],
|
| 46 |
"subfolder_code_included": true,
|
| 47 |
"subfolder_code_files": [
|
| 48 |
"ds_proc.py",
|
| 49 |
"ds_model.py",
|
| 50 |
-
"ds_cfg.py"
|
| 51 |
-
"ds_meta.py"
|
| 52 |
],
|
| 53 |
"processor_use_fast": false
|
| 54 |
}
|
|
|
|
| 24 |
"num_labels": 3,
|
| 25 |
"transformers_version": "5.1.0",
|
| 26 |
"ds_provenance": {
|
| 27 |
+
"created_at": "20260210_163348",
|
| 28 |
"repo_id": "dsaint31/bb_mlp_224",
|
| 29 |
"subdir": "models/timm__densenet121.tv_in1k",
|
| 30 |
"wrapper_class": "BackboneWithMLPHeadForImageClassification",
|
|
|
|
| 40 |
"root_code_files": [
|
| 41 |
"ds_proc.py",
|
| 42 |
"ds_model.py",
|
| 43 |
+
"ds_cfg.py"
|
|
|
|
| 44 |
],
|
| 45 |
"subfolder_code_included": true,
|
| 46 |
"subfolder_code_files": [
|
| 47 |
"ds_proc.py",
|
| 48 |
"ds_model.py",
|
| 49 |
+
"ds_cfg.py"
|
|
|
|
| 50 |
],
|
| 51 |
"processor_use_fast": false
|
| 52 |
}
|
models/timm__densenet121.tv_in1k/ds_cfg.py
CHANGED
|
@@ -1,6 +1,149 @@
|
|
| 1 |
from transformers import PretrainedConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
from ds_meta import BackboneID, BACKBONE_META
|
| 4 |
|
| 5 |
|
| 6 |
class BackboneMLPHeadConfig(PretrainedConfig):
|
|
|
|
| 1 |
from transformers import PretrainedConfig
|
| 2 |
+
from typing import Literal, Any
|
| 3 |
+
|
| 4 |
+
# ============================================================
|
| 5 |
+
# Backbone whitelist + meta registry
|
| 6 |
+
# ============================================================
|
| 7 |
+
|
| 8 |
+
BackboneID = Literal[
|
| 9 |
+
"google/vit-base-patch16-224",
|
| 10 |
+
"microsoft/swin-tiny-patch4-window7-224",
|
| 11 |
+
"microsoft/resnet-50",
|
| 12 |
+
"google/efficientnet-b0",
|
| 13 |
+
"timm/densenet121.tv_in1k",
|
| 14 |
+
"torchvision/densenet121",
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
# ============================================================
|
| 18 |
+
# 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
|
| 19 |
+
# 2) ๋ฐฑ๋ณธ ๋ฉํ ๋ ์ง์คํธ๋ฆฌ (feature dim/rule/unfreeze rule ๊ณ ์ )
|
| 20 |
+
# ============================================================
|
| 21 |
+
# This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
|
| 22 |
+
# ์ด ํ
์ด๋ธ์ backbone๋ณ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค(source of truth)์
๋๋ค.
|
| 23 |
+
#
|
| 24 |
+
# The key type is BackboneID to ensure meta keys never drift from the whitelist.
|
| 25 |
+
# ํค ํ์
์ BackboneID๋ก ๊ณ ์ ํ์ฌ ๋ฉํ ํค๊ฐ ํ์ดํธ๋ฆฌ์คํธ์ ์ด๊ธ๋์ง ์๊ฒ ํฉ๋๋ค.
|
| 26 |
+
BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
|
| 27 |
+
# -------------------------
|
| 28 |
+
# Transformers (ViT/Swin)
|
| 29 |
+
# -------------------------
|
| 30 |
+
# These backbones come from transformers and typically output hidden states and/or pooler outputs.
|
| 31 |
+
# ์ด ๋ฐฑ๋ณธ๋ค์ transformers ๊ณ์ด์ด๋ฉฐ hidden states์ pooler ์ถ๋ ฅ ๋ฑ์ ์ ๊ณตํฉ๋๋ค.
|
| 32 |
+
|
| 33 |
+
"google/vit-base-patch16-224": {
|
| 34 |
+
# type indicates which loading/forward/extraction pathway the model code should use.
|
| 35 |
+
# type์ ๋ชจ๋ธ ์ฝ๋๊ฐ ์ด๋ค ๋ก๋ฉ/forward/feature ์ถ์ถ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 36 |
+
"type": "vit",
|
| 37 |
+
|
| 38 |
+
# feat_dim is the feature vector dimension consumed by the MLP head.
|
| 39 |
+
# feat_dim์ MLP head๊ฐ ์
๋ ฅ์ผ๋ก ๋ฐ๋ feature ๋ฒกํฐ ์ฐจ์์
๋๋ค.
|
| 40 |
+
"feat_dim": 768,
|
| 41 |
+
|
| 42 |
+
# feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
|
| 43 |
+
# feat_rule์ backbone ์ถ๋ ฅ์์ (B, feat_dim) ํ
์๋ฅผ ์ป๋ ๊ท์น์ ์ ์ํฉ๋๋ค.
|
| 44 |
+
"feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
|
| 45 |
+
# last_hidden_state[:, 0, :]๋ฅผ CLS ํ ํฐ ์๋ฒ ๋ฉ์ผ๋ก ์ฌ์ฉํฉ๋๋ค.
|
| 46 |
+
|
| 47 |
+
# unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
|
| 48 |
+
# unfreeze๋ stage2 ๋ฏธ์ธ์กฐ์ ์์ ์ด๋ค ๋ ์ด์ด๋ฅผ ํ์ง ์ ์ฑ
์ ์ ์ํฉ๋๋ค.
|
| 49 |
+
"unfreeze": "last_n", # Unfreeze the last n encoder blocks.
|
| 50 |
+
# encoder ๋ธ๋ก์ ๋ง์ง๋ง n๊ฐ๋ฅผ unfreeze ํฉ๋๋ค.
|
| 51 |
+
|
| 52 |
+
# has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
|
| 53 |
+
# has_bn์ BatchNorm ์กด์ฌ ์ฌ๋ถ์ด๋ฉฐ freeze ์ ํน๋ณ ์ทจ๊ธ์ด ํ์ํ์ง ํ๋จ์ ์ฌ์ฉํฉ๋๋ค.
|
| 54 |
+
"has_bn": False,
|
| 55 |
+
},
|
| 56 |
+
|
| 57 |
+
"microsoft/swin-tiny-patch4-window7-224": {
|
| 58 |
+
# This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
|
| 59 |
+
# ์ด ๋ฐฑ๋ณธ์ Swin Transformer์ด๋ฉฐ ๊ตฌํ์ ๋ฐ๋ผ pooler output ์ ๊ณต ์ฌ๋ถ๊ฐ ๋ฌ๋ผ์ง ์ ์์ต๋๋ค.
|
| 60 |
+
"type": "swin",
|
| 61 |
+
"feat_dim": 768,
|
| 62 |
+
|
| 63 |
+
# Prefer pooler output if available, otherwise fall back to mean pooling.
|
| 64 |
+
# pooler๊ฐ ์์ผ๋ฉด ์ฐ์ ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด mean pooling์ผ๋ก ๋์ฒดํฉ๋๋ค.
|
| 65 |
+
"feat_rule": "pool_or_mean",
|
| 66 |
+
|
| 67 |
+
# Unfreeze strategy is aligned with transformer-style encoder blocks.
|
| 68 |
+
# unfreeze ์ ๋ต์ transformer ๊ณ์ด encoder ๋ธ๋ก ๊ธฐ์ค์ผ๋ก ๋ง์ถฅ๋๋ค.
|
| 69 |
+
"unfreeze": "last_n",
|
| 70 |
+
"has_bn": False,
|
| 71 |
+
},
|
| 72 |
+
|
| 73 |
+
# -------------------------
|
| 74 |
+
# Transformers (CNNs)
|
| 75 |
+
# -------------------------
|
| 76 |
+
# These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
|
| 77 |
+
# ์ด ๋ฐฑ๋ณธ๋ค์ transformers๋ก ๋
ธ์ถ๋ CNN์ด๋ฉฐ pooled feature ๋๋ feature map์ ์ ๊ณตํฉ๋๋ค.
|
| 78 |
+
|
| 79 |
+
"microsoft/resnet-50": {
|
| 80 |
+
# This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
|
| 81 |
+
# ์ด ํญ๋ชฉ์ transformers ํธํ ResNet์ด pooler ๋๋ ์ต์ข
feature map์ ์ ๊ณตํ ์ ์๋ค๊ณ ๊ฐ์ ํฉ๋๋ค.
|
| 82 |
+
"type": "resnet",
|
| 83 |
+
"feat_dim": 2048,
|
| 84 |
+
|
| 85 |
+
# Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
|
| 86 |
+
# pooler๊ฐ ์์ผ๋ฉด ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด global average pooling(GAP)์ ์ ์ฉํฉ๋๋ค.
|
| 87 |
+
"feat_rule": "pool_or_gap",
|
| 88 |
+
|
| 89 |
+
# CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
|
| 90 |
+
# CNN๋ ๋ชจ๋ธ ์ฝ๋์์ block/stage ๋จ์๋ก last_n ์ ์ฑ
์ ์ ์ฉํ ์ ์์ต๋๋ค.
|
| 91 |
+
"unfreeze": "last_n",
|
| 92 |
+
"has_bn": True,
|
| 93 |
+
},
|
| 94 |
+
|
| 95 |
+
"google/efficientnet-b0": {
|
| 96 |
+
# This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
|
| 97 |
+
# ์ด ํญ๋ชฉ์ transformers ํธํ EfficientNet์ด pooled feature ๋๋ ์ต์ข
feature map์ ์ ๊ณตํ๋ค๊ณ ๊ฐ์ ํฉ๋๋ค.
|
| 98 |
+
"type": "efficientnet",
|
| 99 |
+
"feat_dim": 1280,
|
| 100 |
+
"feat_rule": "pool_or_gap",
|
| 101 |
+
"unfreeze": "last_n",
|
| 102 |
+
"has_bn": True,
|
| 103 |
+
},
|
| 104 |
+
|
| 105 |
+
# -------------------------
|
| 106 |
+
# timm (DenseNet via HF Hub)
|
| 107 |
+
# -------------------------
|
| 108 |
+
# This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
|
| 109 |
+
# ์ด ๋ฐฑ๋ณธ์ ๋ชจ๋ธ ๋ก๋์์ timm์ "hf_hub:" ํ๋ฆฌํฝ์ค๋ฅผ ์ฌ์ฉํด ๋ก๋ํฉ๋๋ค.
|
| 110 |
+
"timm/densenet121.tv_in1k": {
|
| 111 |
+
"type": "timm_densenet",
|
| 112 |
+
|
| 113 |
+
# DenseNet-121 final channel dimension is 1024 for the canonical architecture.
|
| 114 |
+
# DenseNet-121์ ํ์ค ์ํคํ
์ฒ์์ ์ต์ข
์ฑ๋ ์ฐจ์์ 1024์
๋๋ค.
|
| 115 |
+
"feat_dim": 1024,
|
| 116 |
+
|
| 117 |
+
# timm forward_features typically returns a feature map that you then GAP to (B, C).
|
| 118 |
+
# timm์ forward_features๋ ๋ณดํต feature map์ ๋ฐํํ๊ณ ์ดํ GAP์ผ๋ก (B, C)๋ฅผ ๋ง๋ญ๋๋ค.
|
| 119 |
+
"feat_rule": "timm_gap",
|
| 120 |
+
|
| 121 |
+
# DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
|
| 122 |
+
# DenseNet์ BatchNorm ์ฌ์ฉ์ด ๋ง์ stage1/stage2์์ freeze_bn ์ฒ๋ฆฌ๊ฐ ์ค์ํฉ๋๋ค.
|
| 123 |
+
"unfreeze": "last_n",
|
| 124 |
+
"has_bn": True,
|
| 125 |
+
},
|
| 126 |
+
|
| 127 |
+
# -------------------------
|
| 128 |
+
# torchvision (DenseNet direct)
|
| 129 |
+
# -------------------------
|
| 130 |
+
# This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
|
| 131 |
+
# ์ด ๋ฐฑ๋ณธ์ transformers/timm์ด ์๋๋ผ torchvision ์คํ์ผ ๋ก๋ฉ ๋ฐ feature ์ถ์ถ์ ๋์์ผ๋ก ํฉ๋๋ค.
|
| 132 |
+
"torchvision/densenet121": {
|
| 133 |
+
"type": "torchvision_densenet",
|
| 134 |
+
"feat_dim": 1024,
|
| 135 |
+
|
| 136 |
+
# torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
|
| 137 |
+
# torchvision DenseNet์ ๋ณดํต .features๋ฅผ ๋
ธ์ถํ๋ฉฐ GAP์ผ๋ก (B, C)๋ฅผ ์ป์ต๋๋ค.
|
| 138 |
+
"feat_rule": "torchvision_densenet_gap",
|
| 139 |
+
|
| 140 |
+
# Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
|
| 141 |
+
# unfreeze ์ ์ฑ
์ last_n์ ์ ์งํ๋, ํด์์ torchvision ๋ชจ๋ ๋ค์ด๋ฐ์ ๋ง์์ผ ํฉ๋๋ค.
|
| 142 |
+
"unfreeze": "last_n",
|
| 143 |
+
"has_bn": True,
|
| 144 |
+
},
|
| 145 |
+
}
|
| 146 |
|
|
|
|
| 147 |
|
| 148 |
|
| 149 |
class BackboneMLPHeadConfig(PretrainedConfig):
|
models/timm__densenet121.tv_in1k/ds_model.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
|
| 4 |
# src/ds_model.py
|
| 5 |
|
| 6 |
-
from typing import Optional, List
|
| 7 |
|
| 8 |
import torch
|
| 9 |
import torch.nn as nn
|
|
@@ -17,9 +17,60 @@ from transformers.modeling_outputs import ImageClassifierOutput
|
|
| 17 |
# --- torchvision ---
|
| 18 |
from torchvision import models as tv_models
|
| 19 |
|
| 20 |
-
from ds_cfg import BackboneMLPHeadConfig
|
| 21 |
-
from
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# ============================================================
|
| 25 |
# (3) Model: backbone + MLP head
|
|
@@ -67,7 +118,9 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 67 |
|
| 68 |
# Meta is a single source of truth for extraction and fine-tuning rules.
|
| 69 |
# meta๋ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค์
๋๋ค.
|
| 70 |
-
|
|
|
|
|
|
|
| 71 |
|
| 72 |
# Backbone skeleton is always created without pretrained weights.
|
| 73 |
# backbone skeleton์ ํญ์ pretrained weight ์์ด ์์ฑํฉ๋๋ค.
|
|
@@ -105,7 +158,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 105 |
def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
|
| 106 |
# Meta decides which loader path to use.
|
| 107 |
# meta๊ฐ ์ด๋ค ๋ก๋ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 108 |
-
meta = BACKBONE_META
|
|
|
|
|
|
|
|
|
|
| 109 |
t = meta["type"]
|
| 110 |
|
| 111 |
if t == "timm_densenet":
|
|
@@ -169,7 +225,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 169 |
from_pretrained() ์ดํ ํธ์ถํ๋ฉด ์ฒดํฌํฌ์ธํธ ๊ฐ์ค์น๋ฅผ ๋ฎ์ด์ฐ๋ฏ๋ก ์ ๋ ํธ์ถํ๋ฉด ์ ๋ฉ๋๋ค.
|
| 170 |
"""
|
| 171 |
bb = self.config.backbone_name_or_path
|
| 172 |
-
meta =
|
| 173 |
t = meta["type"]
|
| 174 |
|
| 175 |
if t == "timm_densenet":
|
|
@@ -394,7 +450,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
|
|
| 394 |
_set_requires_grad(model.backbone, False)
|
| 395 |
_set_requires_grad(model.classifier, True)
|
| 396 |
|
| 397 |
-
meta =
|
| 398 |
if freeze_bn and meta.get("has_bn", False):
|
| 399 |
set_bn_eval(model.backbone)
|
| 400 |
|
|
@@ -403,7 +459,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
|
|
| 403 |
# Stage2: train mode, optionally keeping BN layers in eval for stability.
|
| 404 |
# stage2: train ๋ชจ๋๋ก ๋๋ ์์ ์ฑ์ ์ํด BN์ eval๋ก ์ ์งํ ์ ์์ต๋๋ค.
|
| 405 |
model.train()
|
| 406 |
-
meta =
|
| 407 |
if keep_bn_eval and meta.get("has_bn", False):
|
| 408 |
set_bn_eval(model.backbone)
|
| 409 |
|
|
@@ -431,7 +487,7 @@ def unfreeze_last_stage(
|
|
| 431 |
if n <= 0:
|
| 432 |
return
|
| 433 |
|
| 434 |
-
meta =
|
| 435 |
if meta.get("unfreeze") != "last_n":
|
| 436 |
raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
|
| 437 |
|
|
|
|
| 3 |
|
| 4 |
# src/ds_model.py
|
| 5 |
|
| 6 |
+
from typing import Optional, List, Any, Dict
|
| 7 |
|
| 8 |
import torch
|
| 9 |
import torch.nn as nn
|
|
|
|
| 17 |
# --- torchvision ---
|
| 18 |
from torchvision import models as tv_models
|
| 19 |
|
| 20 |
+
from ds_cfg import BackboneMLPHeadConfig, BACKBONE_META
|
| 21 |
+
# from mlp_head import MLPHead
|
| 22 |
+
|
| 23 |
+
class MLPHead(nn.Module):
|
| 24 |
+
"""
|
| 25 |
+
๊ฐ๋จํ 2-layer MLP head.
|
| 26 |
+
|
| 27 |
+
Parameters
|
| 28 |
+
----------
|
| 29 |
+
in_dim : int
|
| 30 |
+
backbone feature dim
|
| 31 |
+
num_labels : int
|
| 32 |
+
class count
|
| 33 |
+
bottleneck : int
|
| 34 |
+
hidden dim
|
| 35 |
+
p : float
|
| 36 |
+
dropout prob
|
| 37 |
+
"""
|
| 38 |
+
def __init__(self, in_dim: int, num_labels: int, bottleneck: int = 256, p: float = 0.2):
|
| 39 |
+
super().__init__()
|
| 40 |
+
self.fc1 = nn.Linear(in_dim, bottleneck)
|
| 41 |
+
self.act = nn.GELU()
|
| 42 |
+
self.drop = nn.Dropout(p)
|
| 43 |
+
self.fc2 = nn.Linear(bottleneck, num_labels)
|
| 44 |
+
|
| 45 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 46 |
+
return self.fc2(self.drop(self.act(self.fc1(x))))
|
| 47 |
+
|
| 48 |
+
# ------------------------------------------------------------
|
| 49 |
+
# backbone_meta resolver
|
| 50 |
+
# ------------------------------------------------------------
|
| 51 |
+
def _resolve_backbone_meta(config: BackboneMLPHeadConfig, fallback_table: Dict[str, Dict[str, Any]] | None = None) -> Dict[str, Any]:
|
| 52 |
+
"""
|
| 53 |
+
Resolve runtime backbone meta.
|
| 54 |
+
|
| 55 |
+
Priority:
|
| 56 |
+
1) config.backbone_meta (preferred; required for Hub runtime determinism)
|
| 57 |
+
2) fallback_table[config.backbone_name_or_path] (backward compatibility for local/dev)
|
| 58 |
+
|
| 59 |
+
Returns a dict with at least: type, feat_rule, feat_dim (and optional has_bn/unfreeze).
|
| 60 |
+
"""
|
| 61 |
+
meta = getattr(config, "backbone_meta", None)
|
| 62 |
+
if isinstance(meta, dict) and len(meta) > 0:
|
| 63 |
+
return meta
|
| 64 |
+
|
| 65 |
+
bb = getattr(config, "backbone_name_or_path", None)
|
| 66 |
+
if fallback_table is not None and bb in fallback_table:
|
| 67 |
+
return fallback_table[bb]
|
| 68 |
+
|
| 69 |
+
raise ValueError(
|
| 70 |
+
"config.backbone_meta is missing/empty and no fallback meta is available. "
|
| 71 |
+
"Populate config.backbone_meta when saving to the Hub (single source of truth)."
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
|
| 75 |
# ============================================================
|
| 76 |
# (3) Model: backbone + MLP head
|
|
|
|
| 118 |
|
| 119 |
# Meta is a single source of truth for extraction and fine-tuning rules.
|
| 120 |
# meta๋ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค์
๋๋ค.
|
| 121 |
+
# Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
|
| 122 |
+
# Prefer config.backbone_meta to keep Hub runtime self-contained.
|
| 123 |
+
self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
|
| 124 |
|
| 125 |
# Backbone skeleton is always created without pretrained weights.
|
| 126 |
# backbone skeleton์ ํญ์ pretrained weight ์์ด ์์ฑํฉ๋๋ค.
|
|
|
|
| 158 |
def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
|
| 159 |
# Meta decides which loader path to use.
|
| 160 |
# meta๊ฐ ์ด๋ค ๋ก๋ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 161 |
+
meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
|
| 162 |
+
if meta is None:
|
| 163 |
+
raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
|
| 164 |
+
|
| 165 |
t = meta["type"]
|
| 166 |
|
| 167 |
if t == "timm_densenet":
|
|
|
|
| 225 |
from_pretrained() ์ดํ ํธ์ถํ๋ฉด ์ฒดํฌํฌ์ธํธ ๊ฐ์ค์น๋ฅผ ๋ฎ์ด์ฐ๋ฏ๋ก ์ ๋ ํธ์ถํ๋ฉด ์ ๋ฉ๋๋ค.
|
| 226 |
"""
|
| 227 |
bb = self.config.backbone_name_or_path
|
| 228 |
+
meta = self._meta
|
| 229 |
t = meta["type"]
|
| 230 |
|
| 231 |
if t == "timm_densenet":
|
|
|
|
| 450 |
_set_requires_grad(model.backbone, False)
|
| 451 |
_set_requires_grad(model.classifier, True)
|
| 452 |
|
| 453 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 454 |
if freeze_bn and meta.get("has_bn", False):
|
| 455 |
set_bn_eval(model.backbone)
|
| 456 |
|
|
|
|
| 459 |
# Stage2: train mode, optionally keeping BN layers in eval for stability.
|
| 460 |
# stage2: train ๋ชจ๋๋ก ๋๋ ์์ ์ฑ์ ์ํด BN์ eval๋ก ์ ์งํ ์ ์์ต๋๋ค.
|
| 461 |
model.train()
|
| 462 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 463 |
if keep_bn_eval and meta.get("has_bn", False):
|
| 464 |
set_bn_eval(model.backbone)
|
| 465 |
|
|
|
|
| 487 |
if n <= 0:
|
| 488 |
return
|
| 489 |
|
| 490 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 491 |
if meta.get("unfreeze") != "last_n":
|
| 492 |
raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
|
| 493 |
|
models/timm__densenet121.tv_in1k/ds_proc.py
CHANGED
|
@@ -16,7 +16,7 @@ from transformers import AutoImageProcessor, AutoConfig
|
|
| 16 |
from transformers.image_processing_base import ImageProcessingMixin
|
| 17 |
from transformers.utils.generic import TensorType
|
| 18 |
|
| 19 |
-
from
|
| 20 |
|
| 21 |
|
| 22 |
class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
|
|
|
|
| 16 |
from transformers.image_processing_base import ImageProcessingMixin
|
| 17 |
from transformers.utils.generic import TensorType
|
| 18 |
|
| 19 |
+
from ds_cfg import BackboneID, BACKBONE_META
|
| 20 |
|
| 21 |
|
| 22 |
class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
|
models/timm__densenet121.tv_in1k/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 29293620
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ebc8977157008d63a00e318686b5b2fa763f2ece83748f3701671ee629ab70c9
|
| 3 |
size 29293620
|
models/torchvision__densenet121/config.json
CHANGED
|
@@ -24,7 +24,7 @@
|
|
| 24 |
"num_labels": 3,
|
| 25 |
"transformers_version": "5.1.0",
|
| 26 |
"ds_provenance": {
|
| 27 |
-
"created_at": "
|
| 28 |
"repo_id": "dsaint31/bb_mlp_224",
|
| 29 |
"subdir": "models/torchvision__densenet121",
|
| 30 |
"wrapper_class": "BackboneWithMLPHeadForImageClassification",
|
|
@@ -40,15 +40,13 @@
|
|
| 40 |
"root_code_files": [
|
| 41 |
"ds_proc.py",
|
| 42 |
"ds_model.py",
|
| 43 |
-
"ds_cfg.py"
|
| 44 |
-
"ds_meta.py"
|
| 45 |
],
|
| 46 |
"subfolder_code_included": true,
|
| 47 |
"subfolder_code_files": [
|
| 48 |
"ds_proc.py",
|
| 49 |
"ds_model.py",
|
| 50 |
-
"ds_cfg.py"
|
| 51 |
-
"ds_meta.py"
|
| 52 |
],
|
| 53 |
"processor_use_fast": false
|
| 54 |
}
|
|
|
|
| 24 |
"num_labels": 3,
|
| 25 |
"transformers_version": "5.1.0",
|
| 26 |
"ds_provenance": {
|
| 27 |
+
"created_at": "20260210_163348",
|
| 28 |
"repo_id": "dsaint31/bb_mlp_224",
|
| 29 |
"subdir": "models/torchvision__densenet121",
|
| 30 |
"wrapper_class": "BackboneWithMLPHeadForImageClassification",
|
|
|
|
| 40 |
"root_code_files": [
|
| 41 |
"ds_proc.py",
|
| 42 |
"ds_model.py",
|
| 43 |
+
"ds_cfg.py"
|
|
|
|
| 44 |
],
|
| 45 |
"subfolder_code_included": true,
|
| 46 |
"subfolder_code_files": [
|
| 47 |
"ds_proc.py",
|
| 48 |
"ds_model.py",
|
| 49 |
+
"ds_cfg.py"
|
|
|
|
| 50 |
],
|
| 51 |
"processor_use_fast": false
|
| 52 |
}
|
models/torchvision__densenet121/ds_cfg.py
CHANGED
|
@@ -1,6 +1,149 @@
|
|
| 1 |
from transformers import PretrainedConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
-
from ds_meta import BackboneID, BACKBONE_META
|
| 4 |
|
| 5 |
|
| 6 |
class BackboneMLPHeadConfig(PretrainedConfig):
|
|
|
|
| 1 |
from transformers import PretrainedConfig
|
| 2 |
+
from typing import Literal, Any
|
| 3 |
+
|
| 4 |
+
# ============================================================
|
| 5 |
+
# Backbone whitelist + meta registry
|
| 6 |
+
# ============================================================
|
| 7 |
+
|
| 8 |
+
BackboneID = Literal[
|
| 9 |
+
"google/vit-base-patch16-224",
|
| 10 |
+
"microsoft/swin-tiny-patch4-window7-224",
|
| 11 |
+
"microsoft/resnet-50",
|
| 12 |
+
"google/efficientnet-b0",
|
| 13 |
+
"timm/densenet121.tv_in1k",
|
| 14 |
+
"torchvision/densenet121",
|
| 15 |
+
]
|
| 16 |
+
|
| 17 |
+
# ============================================================
|
| 18 |
+
# 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
|
| 19 |
+
# 2) ๋ฐฑ๋ณธ ๋ฉํ ๋ ์ง์คํธ๋ฆฌ (feature dim/rule/unfreeze rule ๊ณ ์ )
|
| 20 |
+
# ============================================================
|
| 21 |
+
# This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
|
| 22 |
+
# ์ด ํ
์ด๋ธ์ backbone๋ณ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค(source of truth)์
๋๋ค.
|
| 23 |
+
#
|
| 24 |
+
# The key type is BackboneID to ensure meta keys never drift from the whitelist.
|
| 25 |
+
# ํค ํ์
์ BackboneID๋ก ๊ณ ์ ํ์ฌ ๋ฉํ ํค๊ฐ ํ์ดํธ๋ฆฌ์คํธ์ ์ด๊ธ๋์ง ์๊ฒ ํฉ๋๋ค.
|
| 26 |
+
BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
|
| 27 |
+
# -------------------------
|
| 28 |
+
# Transformers (ViT/Swin)
|
| 29 |
+
# -------------------------
|
| 30 |
+
# These backbones come from transformers and typically output hidden states and/or pooler outputs.
|
| 31 |
+
# ์ด ๋ฐฑ๋ณธ๋ค์ transformers ๊ณ์ด์ด๋ฉฐ hidden states์ pooler ์ถ๋ ฅ ๋ฑ์ ์ ๊ณตํฉ๋๋ค.
|
| 32 |
+
|
| 33 |
+
"google/vit-base-patch16-224": {
|
| 34 |
+
# type indicates which loading/forward/extraction pathway the model code should use.
|
| 35 |
+
# type์ ๋ชจ๋ธ ์ฝ๋๊ฐ ์ด๋ค ๋ก๋ฉ/forward/feature ์ถ์ถ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 36 |
+
"type": "vit",
|
| 37 |
+
|
| 38 |
+
# feat_dim is the feature vector dimension consumed by the MLP head.
|
| 39 |
+
# feat_dim์ MLP head๊ฐ ์
๋ ฅ์ผ๋ก ๋ฐ๋ feature ๋ฒกํฐ ์ฐจ์์
๋๋ค.
|
| 40 |
+
"feat_dim": 768,
|
| 41 |
+
|
| 42 |
+
# feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
|
| 43 |
+
# feat_rule์ backbone ์ถ๋ ฅ์์ (B, feat_dim) ํ
์๋ฅผ ์ป๋ ๊ท์น์ ์ ์ํฉ๋๋ค.
|
| 44 |
+
"feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
|
| 45 |
+
# last_hidden_state[:, 0, :]๋ฅผ CLS ํ ํฐ ์๋ฒ ๋ฉ์ผ๋ก ์ฌ์ฉํฉ๋๋ค.
|
| 46 |
+
|
| 47 |
+
# unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
|
| 48 |
+
# unfreeze๋ stage2 ๋ฏธ์ธ์กฐ์ ์์ ์ด๋ค ๋ ์ด์ด๋ฅผ ํ์ง ์ ์ฑ
์ ์ ์ํฉ๋๋ค.
|
| 49 |
+
"unfreeze": "last_n", # Unfreeze the last n encoder blocks.
|
| 50 |
+
# encoder ๋ธ๋ก์ ๋ง์ง๋ง n๊ฐ๋ฅผ unfreeze ํฉ๋๋ค.
|
| 51 |
+
|
| 52 |
+
# has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
|
| 53 |
+
# has_bn์ BatchNorm ์กด์ฌ ์ฌ๋ถ์ด๋ฉฐ freeze ์ ํน๋ณ ์ทจ๊ธ์ด ํ์ํ์ง ํ๋จ์ ์ฌ์ฉํฉ๋๋ค.
|
| 54 |
+
"has_bn": False,
|
| 55 |
+
},
|
| 56 |
+
|
| 57 |
+
"microsoft/swin-tiny-patch4-window7-224": {
|
| 58 |
+
# This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
|
| 59 |
+
# ์ด ๋ฐฑ๋ณธ์ Swin Transformer์ด๋ฉฐ ๊ตฌํ์ ๋ฐ๋ผ pooler output ์ ๊ณต ์ฌ๋ถ๊ฐ ๋ฌ๋ผ์ง ์ ์์ต๋๋ค.
|
| 60 |
+
"type": "swin",
|
| 61 |
+
"feat_dim": 768,
|
| 62 |
+
|
| 63 |
+
# Prefer pooler output if available, otherwise fall back to mean pooling.
|
| 64 |
+
# pooler๊ฐ ์์ผ๋ฉด ์ฐ์ ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด mean pooling์ผ๋ก ๋์ฒดํฉ๋๋ค.
|
| 65 |
+
"feat_rule": "pool_or_mean",
|
| 66 |
+
|
| 67 |
+
# Unfreeze strategy is aligned with transformer-style encoder blocks.
|
| 68 |
+
# unfreeze ์ ๋ต์ transformer ๊ณ์ด encoder ๋ธ๋ก ๊ธฐ์ค์ผ๋ก ๋ง์ถฅ๋๋ค.
|
| 69 |
+
"unfreeze": "last_n",
|
| 70 |
+
"has_bn": False,
|
| 71 |
+
},
|
| 72 |
+
|
| 73 |
+
# -------------------------
|
| 74 |
+
# Transformers (CNNs)
|
| 75 |
+
# -------------------------
|
| 76 |
+
# These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
|
| 77 |
+
# ์ด ๋ฐฑ๋ณธ๋ค์ transformers๋ก ๋
ธ์ถ๋ CNN์ด๋ฉฐ pooled feature ๋๋ feature map์ ์ ๊ณตํฉ๋๋ค.
|
| 78 |
+
|
| 79 |
+
"microsoft/resnet-50": {
|
| 80 |
+
# This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
|
| 81 |
+
# ์ด ํญ๋ชฉ์ transformers ํธํ ResNet์ด pooler ๋๋ ์ต์ข
feature map์ ์ ๊ณตํ ์ ์๋ค๊ณ ๊ฐ์ ํฉ๋๋ค.
|
| 82 |
+
"type": "resnet",
|
| 83 |
+
"feat_dim": 2048,
|
| 84 |
+
|
| 85 |
+
# Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
|
| 86 |
+
# pooler๊ฐ ์์ผ๋ฉด ์ฌ์ฉํ๊ณ , ์์ผ๋ฉด global average pooling(GAP)์ ์ ์ฉํฉ๋๋ค.
|
| 87 |
+
"feat_rule": "pool_or_gap",
|
| 88 |
+
|
| 89 |
+
# CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
|
| 90 |
+
# CNN๋ ๋ชจ๋ธ ์ฝ๋์์ block/stage ๋จ์๋ก last_n ์ ์ฑ
์ ์ ์ฉํ ์ ์์ต๋๋ค.
|
| 91 |
+
"unfreeze": "last_n",
|
| 92 |
+
"has_bn": True,
|
| 93 |
+
},
|
| 94 |
+
|
| 95 |
+
"google/efficientnet-b0": {
|
| 96 |
+
# This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
|
| 97 |
+
# ์ด ํญ๋ชฉ์ transformers ํธํ EfficientNet์ด pooled feature ๋๋ ์ต์ข
feature map์ ์ ๊ณตํ๋ค๊ณ ๊ฐ์ ํฉ๋๋ค.
|
| 98 |
+
"type": "efficientnet",
|
| 99 |
+
"feat_dim": 1280,
|
| 100 |
+
"feat_rule": "pool_or_gap",
|
| 101 |
+
"unfreeze": "last_n",
|
| 102 |
+
"has_bn": True,
|
| 103 |
+
},
|
| 104 |
+
|
| 105 |
+
# -------------------------
|
| 106 |
+
# timm (DenseNet via HF Hub)
|
| 107 |
+
# -------------------------
|
| 108 |
+
# This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
|
| 109 |
+
# ์ด ๋ฐฑ๋ณธ์ ๋ชจ๋ธ ๋ก๋์์ timm์ "hf_hub:" ํ๋ฆฌํฝ์ค๋ฅผ ์ฌ์ฉํด ๋ก๋ํฉ๋๋ค.
|
| 110 |
+
"timm/densenet121.tv_in1k": {
|
| 111 |
+
"type": "timm_densenet",
|
| 112 |
+
|
| 113 |
+
# DenseNet-121 final channel dimension is 1024 for the canonical architecture.
|
| 114 |
+
# DenseNet-121์ ํ์ค ์ํคํ
์ฒ์์ ์ต์ข
์ฑ๋ ์ฐจ์์ 1024์
๋๋ค.
|
| 115 |
+
"feat_dim": 1024,
|
| 116 |
+
|
| 117 |
+
# timm forward_features typically returns a feature map that you then GAP to (B, C).
|
| 118 |
+
# timm์ forward_features๋ ๋ณดํต feature map์ ๋ฐํํ๊ณ ์ดํ GAP์ผ๋ก (B, C)๋ฅผ ๋ง๋ญ๋๋ค.
|
| 119 |
+
"feat_rule": "timm_gap",
|
| 120 |
+
|
| 121 |
+
# DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
|
| 122 |
+
# DenseNet์ BatchNorm ์ฌ์ฉ์ด ๋ง์ stage1/stage2์์ freeze_bn ์ฒ๋ฆฌ๊ฐ ์ค์ํฉ๋๋ค.
|
| 123 |
+
"unfreeze": "last_n",
|
| 124 |
+
"has_bn": True,
|
| 125 |
+
},
|
| 126 |
+
|
| 127 |
+
# -------------------------
|
| 128 |
+
# torchvision (DenseNet direct)
|
| 129 |
+
# -------------------------
|
| 130 |
+
# This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
|
| 131 |
+
# ์ด ๋ฐฑ๋ณธ์ transformers/timm์ด ์๋๋ผ torchvision ์คํ์ผ ๋ก๋ฉ ๋ฐ feature ์ถ์ถ์ ๋์์ผ๋ก ํฉ๋๋ค.
|
| 132 |
+
"torchvision/densenet121": {
|
| 133 |
+
"type": "torchvision_densenet",
|
| 134 |
+
"feat_dim": 1024,
|
| 135 |
+
|
| 136 |
+
# torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
|
| 137 |
+
# torchvision DenseNet์ ๋ณดํต .features๋ฅผ ๋
ธ์ถํ๋ฉฐ GAP์ผ๋ก (B, C)๋ฅผ ์ป์ต๋๋ค.
|
| 138 |
+
"feat_rule": "torchvision_densenet_gap",
|
| 139 |
+
|
| 140 |
+
# Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
|
| 141 |
+
# unfreeze ์ ์ฑ
์ last_n์ ์ ์งํ๋, ํด์์ torchvision ๋ชจ๋ ๋ค์ด๋ฐ์ ๋ง์์ผ ํฉ๋๋ค.
|
| 142 |
+
"unfreeze": "last_n",
|
| 143 |
+
"has_bn": True,
|
| 144 |
+
},
|
| 145 |
+
}
|
| 146 |
|
|
|
|
| 147 |
|
| 148 |
|
| 149 |
class BackboneMLPHeadConfig(PretrainedConfig):
|
models/torchvision__densenet121/ds_model.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
|
| 4 |
# src/ds_model.py
|
| 5 |
|
| 6 |
-
from typing import Optional, List
|
| 7 |
|
| 8 |
import torch
|
| 9 |
import torch.nn as nn
|
|
@@ -17,9 +17,60 @@ from transformers.modeling_outputs import ImageClassifierOutput
|
|
| 17 |
# --- torchvision ---
|
| 18 |
from torchvision import models as tv_models
|
| 19 |
|
| 20 |
-
from ds_cfg import BackboneMLPHeadConfig
|
| 21 |
-
from
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# ============================================================
|
| 25 |
# (3) Model: backbone + MLP head
|
|
@@ -67,7 +118,9 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 67 |
|
| 68 |
# Meta is a single source of truth for extraction and fine-tuning rules.
|
| 69 |
# meta๋ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค์
๋๋ค.
|
| 70 |
-
|
|
|
|
|
|
|
| 71 |
|
| 72 |
# Backbone skeleton is always created without pretrained weights.
|
| 73 |
# backbone skeleton์ ํญ์ pretrained weight ์์ด ์์ฑํฉ๋๋ค.
|
|
@@ -105,7 +158,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 105 |
def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
|
| 106 |
# Meta decides which loader path to use.
|
| 107 |
# meta๊ฐ ์ด๋ค ๋ก๋ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 108 |
-
meta = BACKBONE_META
|
|
|
|
|
|
|
|
|
|
| 109 |
t = meta["type"]
|
| 110 |
|
| 111 |
if t == "timm_densenet":
|
|
@@ -169,7 +225,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
|
|
| 169 |
from_pretrained() ์ดํ ํธ์ถํ๋ฉด ์ฒดํฌํฌ์ธํธ ๊ฐ์ค์น๋ฅผ ๋ฎ์ด์ฐ๋ฏ๋ก ์ ๋ ํธ์ถํ๋ฉด ์ ๋ฉ๋๋ค.
|
| 170 |
"""
|
| 171 |
bb = self.config.backbone_name_or_path
|
| 172 |
-
meta =
|
| 173 |
t = meta["type"]
|
| 174 |
|
| 175 |
if t == "timm_densenet":
|
|
@@ -394,7 +450,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
|
|
| 394 |
_set_requires_grad(model.backbone, False)
|
| 395 |
_set_requires_grad(model.classifier, True)
|
| 396 |
|
| 397 |
-
meta =
|
| 398 |
if freeze_bn and meta.get("has_bn", False):
|
| 399 |
set_bn_eval(model.backbone)
|
| 400 |
|
|
@@ -403,7 +459,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
|
|
| 403 |
# Stage2: train mode, optionally keeping BN layers in eval for stability.
|
| 404 |
# stage2: train ๋ชจ๋๋ก ๋๋ ์์ ์ฑ์ ์ํด BN์ eval๋ก ์ ์งํ ์ ์์ต๋๋ค.
|
| 405 |
model.train()
|
| 406 |
-
meta =
|
| 407 |
if keep_bn_eval and meta.get("has_bn", False):
|
| 408 |
set_bn_eval(model.backbone)
|
| 409 |
|
|
@@ -431,7 +487,7 @@ def unfreeze_last_stage(
|
|
| 431 |
if n <= 0:
|
| 432 |
return
|
| 433 |
|
| 434 |
-
meta =
|
| 435 |
if meta.get("unfreeze") != "last_n":
|
| 436 |
raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
|
| 437 |
|
|
|
|
| 3 |
|
| 4 |
# src/ds_model.py
|
| 5 |
|
| 6 |
+
from typing import Optional, List, Any, Dict
|
| 7 |
|
| 8 |
import torch
|
| 9 |
import torch.nn as nn
|
|
|
|
| 17 |
# --- torchvision ---
|
| 18 |
from torchvision import models as tv_models
|
| 19 |
|
| 20 |
+
from ds_cfg import BackboneMLPHeadConfig, BACKBONE_META
|
| 21 |
+
# from mlp_head import MLPHead
|
| 22 |
+
|
| 23 |
+
class MLPHead(nn.Module):
|
| 24 |
+
"""
|
| 25 |
+
๊ฐ๋จํ 2-layer MLP head.
|
| 26 |
+
|
| 27 |
+
Parameters
|
| 28 |
+
----------
|
| 29 |
+
in_dim : int
|
| 30 |
+
backbone feature dim
|
| 31 |
+
num_labels : int
|
| 32 |
+
class count
|
| 33 |
+
bottleneck : int
|
| 34 |
+
hidden dim
|
| 35 |
+
p : float
|
| 36 |
+
dropout prob
|
| 37 |
+
"""
|
| 38 |
+
def __init__(self, in_dim: int, num_labels: int, bottleneck: int = 256, p: float = 0.2):
|
| 39 |
+
super().__init__()
|
| 40 |
+
self.fc1 = nn.Linear(in_dim, bottleneck)
|
| 41 |
+
self.act = nn.GELU()
|
| 42 |
+
self.drop = nn.Dropout(p)
|
| 43 |
+
self.fc2 = nn.Linear(bottleneck, num_labels)
|
| 44 |
+
|
| 45 |
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 46 |
+
return self.fc2(self.drop(self.act(self.fc1(x))))
|
| 47 |
+
|
| 48 |
+
# ------------------------------------------------------------
|
| 49 |
+
# backbone_meta resolver
|
| 50 |
+
# ------------------------------------------------------------
|
| 51 |
+
def _resolve_backbone_meta(config: BackboneMLPHeadConfig, fallback_table: Dict[str, Dict[str, Any]] | None = None) -> Dict[str, Any]:
|
| 52 |
+
"""
|
| 53 |
+
Resolve runtime backbone meta.
|
| 54 |
+
|
| 55 |
+
Priority:
|
| 56 |
+
1) config.backbone_meta (preferred; required for Hub runtime determinism)
|
| 57 |
+
2) fallback_table[config.backbone_name_or_path] (backward compatibility for local/dev)
|
| 58 |
+
|
| 59 |
+
Returns a dict with at least: type, feat_rule, feat_dim (and optional has_bn/unfreeze).
|
| 60 |
+
"""
|
| 61 |
+
meta = getattr(config, "backbone_meta", None)
|
| 62 |
+
if isinstance(meta, dict) and len(meta) > 0:
|
| 63 |
+
return meta
|
| 64 |
+
|
| 65 |
+
bb = getattr(config, "backbone_name_or_path", None)
|
| 66 |
+
if fallback_table is not None and bb in fallback_table:
|
| 67 |
+
return fallback_table[bb]
|
| 68 |
+
|
| 69 |
+
raise ValueError(
|
| 70 |
+
"config.backbone_meta is missing/empty and no fallback meta is available. "
|
| 71 |
+
"Populate config.backbone_meta when saving to the Hub (single source of truth)."
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
|
| 75 |
# ============================================================
|
| 76 |
# (3) Model: backbone + MLP head
|
|
|
|
| 118 |
|
| 119 |
# Meta is a single source of truth for extraction and fine-tuning rules.
|
| 120 |
# meta๋ feature ์ถ์ถ ๋ฐ ๋ฏธ์ธ์กฐ์ ๊ท์น์ ๋จ์ผ ๊ธฐ์ค์
๋๋ค.
|
| 121 |
+
# Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
|
| 122 |
+
# Prefer config.backbone_meta to keep Hub runtime self-contained.
|
| 123 |
+
self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
|
| 124 |
|
| 125 |
# Backbone skeleton is always created without pretrained weights.
|
| 126 |
# backbone skeleton์ ํญ์ pretrained weight ์์ด ์์ฑํฉ๋๋ค.
|
|
|
|
| 158 |
def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
|
| 159 |
# Meta decides which loader path to use.
|
| 160 |
# meta๊ฐ ์ด๋ค ๋ก๋ ๊ฒฝ๋ก๋ฅผ ์ฌ์ฉํ ์ง ๊ฒฐ์ ํฉ๋๋ค.
|
| 161 |
+
meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
|
| 162 |
+
if meta is None:
|
| 163 |
+
raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
|
| 164 |
+
|
| 165 |
t = meta["type"]
|
| 166 |
|
| 167 |
if t == "timm_densenet":
|
|
|
|
| 225 |
from_pretrained() ์ดํ ํธ์ถํ๋ฉด ์ฒดํฌํฌ์ธํธ ๊ฐ์ค์น๋ฅผ ๋ฎ์ด์ฐ๋ฏ๋ก ์ ๋ ํธ์ถํ๋ฉด ์ ๋ฉ๋๋ค.
|
| 226 |
"""
|
| 227 |
bb = self.config.backbone_name_or_path
|
| 228 |
+
meta = self._meta
|
| 229 |
t = meta["type"]
|
| 230 |
|
| 231 |
if t == "timm_densenet":
|
|
|
|
| 450 |
_set_requires_grad(model.backbone, False)
|
| 451 |
_set_requires_grad(model.classifier, True)
|
| 452 |
|
| 453 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 454 |
if freeze_bn and meta.get("has_bn", False):
|
| 455 |
set_bn_eval(model.backbone)
|
| 456 |
|
|
|
|
| 459 |
# Stage2: train mode, optionally keeping BN layers in eval for stability.
|
| 460 |
# stage2: train ๋ชจ๋๋ก ๋๋ ์์ ์ฑ์ ์ํด BN์ eval๋ก ์ ์งํ ์ ์์ต๋๋ค.
|
| 461 |
model.train()
|
| 462 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 463 |
if keep_bn_eval and meta.get("has_bn", False):
|
| 464 |
set_bn_eval(model.backbone)
|
| 465 |
|
|
|
|
| 487 |
if n <= 0:
|
| 488 |
return
|
| 489 |
|
| 490 |
+
meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
|
| 491 |
if meta.get("unfreeze") != "last_n":
|
| 492 |
raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
|
| 493 |
|
models/torchvision__densenet121/ds_proc.py
CHANGED
|
@@ -16,7 +16,7 @@ from transformers import AutoImageProcessor, AutoConfig
|
|
| 16 |
from transformers.image_processing_base import ImageProcessingMixin
|
| 17 |
from transformers.utils.generic import TensorType
|
| 18 |
|
| 19 |
-
from
|
| 20 |
|
| 21 |
|
| 22 |
class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
|
|
|
|
| 16 |
from transformers.image_processing_base import ImageProcessingMixin
|
| 17 |
from transformers.utils.generic import TensorType
|
| 18 |
|
| 19 |
+
from ds_cfg import BackboneID, BACKBONE_META
|
| 20 |
|
| 21 |
|
| 22 |
class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
|
models/torchvision__densenet121/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 33394052
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:064a8e6356a4304a40f80f368a46d9b0ff4307b849cf6e458ab325d2fcfd9c63
|
| 3 |
size 33394052
|