dsaint31 commited on
Commit
bf566dc
ยท
verified ยท
1 Parent(s): 2810b72

Add/Update backbone checkpoints (count=6)

Browse files
Files changed (34) hide show
  1. ds_cfg.py +144 -1
  2. ds_model.py +66 -10
  3. ds_proc.py +1 -1
  4. manifest_20260210_163348.json +53 -0
  5. models/google__efficientnet-b0/config.json +3 -5
  6. models/google__efficientnet-b0/ds_cfg.py +144 -1
  7. models/google__efficientnet-b0/ds_model.py +66 -10
  8. models/google__efficientnet-b0/ds_proc.py +1 -1
  9. models/google__efficientnet-b0/model.safetensors +1 -1
  10. models/google__vit-base-patch16-224/config.json +3 -5
  11. models/google__vit-base-patch16-224/ds_cfg.py +144 -1
  12. models/google__vit-base-patch16-224/ds_model.py +66 -10
  13. models/google__vit-base-patch16-224/ds_proc.py +1 -1
  14. models/google__vit-base-patch16-224/model.safetensors +1 -1
  15. models/microsoft__resnet-50/config.json +3 -5
  16. models/microsoft__resnet-50/ds_cfg.py +144 -1
  17. models/microsoft__resnet-50/ds_model.py +66 -10
  18. models/microsoft__resnet-50/ds_proc.py +1 -1
  19. models/microsoft__resnet-50/model.safetensors +1 -1
  20. models/microsoft__swin-tiny-patch4-window7-224/config.json +3 -5
  21. models/microsoft__swin-tiny-patch4-window7-224/ds_cfg.py +144 -1
  22. models/microsoft__swin-tiny-patch4-window7-224/ds_model.py +66 -10
  23. models/microsoft__swin-tiny-patch4-window7-224/ds_proc.py +1 -1
  24. models/microsoft__swin-tiny-patch4-window7-224/model.safetensors +1 -1
  25. models/timm__densenet121.tv_in1k/config.json +3 -5
  26. models/timm__densenet121.tv_in1k/ds_cfg.py +144 -1
  27. models/timm__densenet121.tv_in1k/ds_model.py +66 -10
  28. models/timm__densenet121.tv_in1k/ds_proc.py +1 -1
  29. models/timm__densenet121.tv_in1k/model.safetensors +1 -1
  30. models/torchvision__densenet121/config.json +3 -5
  31. models/torchvision__densenet121/ds_cfg.py +144 -1
  32. models/torchvision__densenet121/ds_model.py +66 -10
  33. models/torchvision__densenet121/ds_proc.py +1 -1
  34. models/torchvision__densenet121/model.safetensors +1 -1
ds_cfg.py CHANGED
@@ -1,6 +1,149 @@
1
  from transformers import PretrainedConfig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- from ds_meta import BackboneID, BACKBONE_META
4
 
5
 
6
  class BackboneMLPHeadConfig(PretrainedConfig):
 
1
  from transformers import PretrainedConfig
2
+ from typing import Literal, Any
3
+
4
+ # ============================================================
5
+ # Backbone whitelist + meta registry
6
+ # ============================================================
7
+
8
+ BackboneID = Literal[
9
+ "google/vit-base-patch16-224",
10
+ "microsoft/swin-tiny-patch4-window7-224",
11
+ "microsoft/resnet-50",
12
+ "google/efficientnet-b0",
13
+ "timm/densenet121.tv_in1k",
14
+ "torchvision/densenet121",
15
+ ]
16
+
17
+ # ============================================================
18
+ # 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
19
+ # 2) ๋ฐฑ๋ณธ ๋ฉ”ํƒ€ ๋ ˆ์ง€์ŠคํŠธ๋ฆฌ (feature dim/rule/unfreeze rule ๊ณ ์ •)
20
+ # ============================================================
21
+ # This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
22
+ # ์ด ํ…Œ์ด๋ธ”์€ backbone๋ณ„ feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€(source of truth)์ž…๋‹ˆ๋‹ค.
23
+ #
24
+ # The key type is BackboneID to ensure meta keys never drift from the whitelist.
25
+ # ํ‚ค ํƒ€์ž…์„ BackboneID๋กœ ๊ณ ์ •ํ•˜์—ฌ ๋ฉ”ํƒ€ ํ‚ค๊ฐ€ ํ™”์ดํŠธ๋ฆฌ์ŠคํŠธ์™€ ์–ด๊ธ‹๋‚˜์ง€ ์•Š๊ฒŒ ํ•ฉ๋‹ˆ๋‹ค.
26
+ BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
27
+ # -------------------------
28
+ # Transformers (ViT/Swin)
29
+ # -------------------------
30
+ # These backbones come from transformers and typically output hidden states and/or pooler outputs.
31
+ # ์ด ๋ฐฑ๋ณธ๋“ค์€ transformers ๊ณ„์—ด์ด๋ฉฐ hidden states์™€ pooler ์ถœ๋ ฅ ๋“ฑ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
32
+
33
+ "google/vit-base-patch16-224": {
34
+ # type indicates which loading/forward/extraction pathway the model code should use.
35
+ # type์€ ๋ชจ๋ธ ์ฝ”๋“œ๊ฐ€ ์–ด๋–ค ๋กœ๋”ฉ/forward/feature ์ถ”์ถœ ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
36
+ "type": "vit",
37
+
38
+ # feat_dim is the feature vector dimension consumed by the MLP head.
39
+ # feat_dim์€ MLP head๊ฐ€ ์ž…๋ ฅ์œผ๋กœ ๋ฐ›๋Š” feature ๋ฒกํ„ฐ ์ฐจ์›์ž…๋‹ˆ๋‹ค.
40
+ "feat_dim": 768,
41
+
42
+ # feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
43
+ # feat_rule์€ backbone ์ถœ๋ ฅ์—์„œ (B, feat_dim) ํ…์„œ๋ฅผ ์–ป๋Š” ๊ทœ์น™์„ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.
44
+ "feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
45
+ # last_hidden_state[:, 0, :]๋ฅผ CLS ํ† ํฐ ์ž„๋ฒ ๋”ฉ์œผ๋กœ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
46
+
47
+ # unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
48
+ # unfreeze๋Š” stage2 ๋ฏธ์„ธ์กฐ์ •์—์„œ ์–ด๋–ค ๋ ˆ์ด์–ด๋ฅผ ํ’€์ง€ ์ •์ฑ…์„ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.
49
+ "unfreeze": "last_n", # Unfreeze the last n encoder blocks.
50
+ # encoder ๋ธ”๋ก์˜ ๋งˆ์ง€๋ง‰ n๊ฐœ๋ฅผ unfreeze ํ•ฉ๋‹ˆ๋‹ค.
51
+
52
+ # has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
53
+ # has_bn์€ BatchNorm ์กด์žฌ ์—ฌ๋ถ€์ด๋ฉฐ freeze ์‹œ ํŠน๋ณ„ ์ทจ๊ธ‰์ด ํ•„์š”ํ•œ์ง€ ํŒ๋‹จ์— ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
54
+ "has_bn": False,
55
+ },
56
+
57
+ "microsoft/swin-tiny-patch4-window7-224": {
58
+ # This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
59
+ # ์ด ๋ฐฑ๋ณธ์€ Swin Transformer์ด๋ฉฐ ๊ตฌํ˜„์— ๋”ฐ๋ผ pooler output ์ œ๊ณต ์—ฌ๋ถ€๊ฐ€ ๋‹ฌ๋ผ์งˆ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
60
+ "type": "swin",
61
+ "feat_dim": 768,
62
+
63
+ # Prefer pooler output if available, otherwise fall back to mean pooling.
64
+ # pooler๊ฐ€ ์žˆ์œผ๋ฉด ์šฐ์„  ์‚ฌ์šฉํ•˜๊ณ , ์—†์œผ๋ฉด mean pooling์œผ๋กœ ๋Œ€์ฒดํ•ฉ๋‹ˆ๋‹ค.
65
+ "feat_rule": "pool_or_mean",
66
+
67
+ # Unfreeze strategy is aligned with transformer-style encoder blocks.
68
+ # unfreeze ์ „๋žต์€ transformer ๊ณ„์—ด encoder ๋ธ”๋ก ๊ธฐ์ค€์œผ๋กœ ๋งž์ถฅ๋‹ˆ๋‹ค.
69
+ "unfreeze": "last_n",
70
+ "has_bn": False,
71
+ },
72
+
73
+ # -------------------------
74
+ # Transformers (CNNs)
75
+ # -------------------------
76
+ # These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
77
+ # ์ด ๋ฐฑ๋ณธ๋“ค์€ transformers๋กœ ๋…ธ์ถœ๋œ CNN์ด๋ฉฐ pooled feature ๋˜๋Š” feature map์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
78
+
79
+ "microsoft/resnet-50": {
80
+ # This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
81
+ # ์ด ํ•ญ๋ชฉ์€ transformers ํ˜ธํ™˜ ResNet์ด pooler ๋˜๋Š” ์ตœ์ข… feature map์„ ์ œ๊ณตํ•  ์ˆ˜ ์žˆ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
82
+ "type": "resnet",
83
+ "feat_dim": 2048,
84
+
85
+ # Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
86
+ # pooler๊ฐ€ ์žˆ์œผ๋ฉด ์‚ฌ์šฉํ•˜๊ณ , ์—†์œผ๋ฉด global average pooling(GAP)์„ ์ ์šฉํ•ฉ๋‹ˆ๋‹ค.
87
+ "feat_rule": "pool_or_gap",
88
+
89
+ # CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
90
+ # CNN๋„ ๋ชจ๋ธ ์ฝ”๋“œ์—์„œ block/stage ๋‹จ์œ„๋กœ last_n ์ •์ฑ…์„ ์ ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
91
+ "unfreeze": "last_n",
92
+ "has_bn": True,
93
+ },
94
+
95
+ "google/efficientnet-b0": {
96
+ # This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
97
+ # ์ด ํ•ญ๋ชฉ์€ transformers ํ˜ธํ™˜ EfficientNet์ด pooled feature ๋˜๋Š” ์ตœ์ข… feature map์„ ์ œ๊ณตํ•œ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
98
+ "type": "efficientnet",
99
+ "feat_dim": 1280,
100
+ "feat_rule": "pool_or_gap",
101
+ "unfreeze": "last_n",
102
+ "has_bn": True,
103
+ },
104
+
105
+ # -------------------------
106
+ # timm (DenseNet via HF Hub)
107
+ # -------------------------
108
+ # This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
109
+ # ์ด ๋ฐฑ๋ณธ์€ ๋ชจ๋ธ ๋กœ๋”์—์„œ timm์˜ "hf_hub:" ํ”„๋ฆฌํ”ฝ์Šค๋ฅผ ์‚ฌ์šฉํ•ด ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
110
+ "timm/densenet121.tv_in1k": {
111
+ "type": "timm_densenet",
112
+
113
+ # DenseNet-121 final channel dimension is 1024 for the canonical architecture.
114
+ # DenseNet-121์˜ ํ‘œ์ค€ ์•„ํ‚คํ…์ฒ˜์—์„œ ์ตœ์ข… ์ฑ„๋„ ์ฐจ์›์€ 1024์ž…๋‹ˆ๋‹ค.
115
+ "feat_dim": 1024,
116
+
117
+ # timm forward_features typically returns a feature map that you then GAP to (B, C).
118
+ # timm์˜ forward_features๋Š” ๋ณดํ†ต feature map์„ ๋ฐ˜ํ™˜ํ•˜๊ณ  ์ดํ›„ GAP์œผ๋กœ (B, C)๋ฅผ ๋งŒ๋“ญ๋‹ˆ๋‹ค.
119
+ "feat_rule": "timm_gap",
120
+
121
+ # DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
122
+ # DenseNet์€ BatchNorm ์‚ฌ์šฉ์ด ๋งŽ์•„ stage1/stage2์—์„œ freeze_bn ์ฒ˜๋ฆฌ๊ฐ€ ์ค‘์š”ํ•ฉ๋‹ˆ๋‹ค.
123
+ "unfreeze": "last_n",
124
+ "has_bn": True,
125
+ },
126
+
127
+ # -------------------------
128
+ # torchvision (DenseNet direct)
129
+ # -------------------------
130
+ # This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
131
+ # ์ด ๋ฐฑ๋ณธ์€ transformers/timm์ด ์•„๋‹ˆ๋ผ torchvision ์Šคํƒ€์ผ ๋กœ๋”ฉ ๋ฐ feature ์ถ”์ถœ์„ ๋Œ€์ƒ์œผ๋กœ ํ•ฉ๋‹ˆ๋‹ค.
132
+ "torchvision/densenet121": {
133
+ "type": "torchvision_densenet",
134
+ "feat_dim": 1024,
135
+
136
+ # torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
137
+ # torchvision DenseNet์€ ๋ณดํ†ต .features๋ฅผ ๋…ธ์ถœํ•˜๋ฉฐ GAP์œผ๋กœ (B, C)๋ฅผ ์–ป์Šต๋‹ˆ๋‹ค.
138
+ "feat_rule": "torchvision_densenet_gap",
139
+
140
+ # Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
141
+ # unfreeze ์ •์ฑ…์€ last_n์„ ์œ ์ง€ํ•˜๋˜, ํ•ด์„์€ torchvision ๋ชจ๋“ˆ ๋„ค์ด๋ฐ์— ๋งž์•„์•ผ ํ•ฉ๋‹ˆ๋‹ค.
142
+ "unfreeze": "last_n",
143
+ "has_bn": True,
144
+ },
145
+ }
146
 
 
147
 
148
 
149
  class BackboneMLPHeadConfig(PretrainedConfig):
ds_model.py CHANGED
@@ -3,7 +3,7 @@
3
 
4
  # src/ds_model.py
5
 
6
- from typing import Optional, List
7
 
8
  import torch
9
  import torch.nn as nn
@@ -17,9 +17,60 @@ from transformers.modeling_outputs import ImageClassifierOutput
17
  # --- torchvision ---
18
  from torchvision import models as tv_models
19
 
20
- from ds_cfg import BackboneMLPHeadConfig
21
- from ds_meta import BACKBONE_META
22
- from mlp_head import MLPHead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # ============================================================
25
  # (3) Model: backbone + MLP head
@@ -67,7 +118,9 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
67
 
68
  # Meta is a single source of truth for extraction and fine-tuning rules.
69
  # meta๋Š” feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€์ž…๋‹ˆ๋‹ค.
70
- self._meta = BACKBONE_META[config.backbone_name_or_path]
 
 
71
 
72
  # Backbone skeleton is always created without pretrained weights.
73
  # backbone skeleton์€ ํ•ญ์ƒ pretrained weight ์—†์ด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
@@ -105,7 +158,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
105
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
106
  # Meta decides which loader path to use.
107
  # meta๊ฐ€ ์–ด๋–ค ๋กœ๋” ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
108
- meta = BACKBONE_META[backbone_id]
 
 
 
109
  t = meta["type"]
110
 
111
  if t == "timm_densenet":
@@ -169,7 +225,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
169
  from_pretrained() ์ดํ›„ ํ˜ธ์ถœํ•˜๋ฉด ์ฒดํฌํฌ์ธํŠธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฎ์–ด์“ฐ๋ฏ€๋กœ ์ ˆ๋Œ€ ํ˜ธ์ถœํ•˜๋ฉด ์•ˆ ๋ฉ๋‹ˆ๋‹ค.
170
  """
171
  bb = self.config.backbone_name_or_path
172
- meta = BACKBONE_META[bb]
173
  t = meta["type"]
174
 
175
  if t == "timm_densenet":
@@ -394,7 +450,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
394
  _set_requires_grad(model.backbone, False)
395
  _set_requires_grad(model.classifier, True)
396
 
397
- meta = BACKBONE_META[model.config.backbone_name_or_path]
398
  if freeze_bn and meta.get("has_bn", False):
399
  set_bn_eval(model.backbone)
400
 
@@ -403,7 +459,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
403
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
404
  # stage2: train ๋ชจ๋“œ๋กœ ๋‘๋˜ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด BN์„ eval๋กœ ์œ ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
405
  model.train()
406
- meta = BACKBONE_META[model.config.backbone_name_or_path]
407
  if keep_bn_eval and meta.get("has_bn", False):
408
  set_bn_eval(model.backbone)
409
 
@@ -431,7 +487,7 @@ def unfreeze_last_stage(
431
  if n <= 0:
432
  return
433
 
434
- meta = BACKBONE_META[model.config.backbone_name_or_path]
435
  if meta.get("unfreeze") != "last_n":
436
  raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
437
 
 
3
 
4
  # src/ds_model.py
5
 
6
+ from typing import Optional, List, Any, Dict
7
 
8
  import torch
9
  import torch.nn as nn
 
17
  # --- torchvision ---
18
  from torchvision import models as tv_models
19
 
20
+ from ds_cfg import BackboneMLPHeadConfig, BACKBONE_META
21
+ # from mlp_head import MLPHead
22
+
23
+ class MLPHead(nn.Module):
24
+ """
25
+ ๊ฐ„๋‹จํ•œ 2-layer MLP head.
26
+
27
+ Parameters
28
+ ----------
29
+ in_dim : int
30
+ backbone feature dim
31
+ num_labels : int
32
+ class count
33
+ bottleneck : int
34
+ hidden dim
35
+ p : float
36
+ dropout prob
37
+ """
38
+ def __init__(self, in_dim: int, num_labels: int, bottleneck: int = 256, p: float = 0.2):
39
+ super().__init__()
40
+ self.fc1 = nn.Linear(in_dim, bottleneck)
41
+ self.act = nn.GELU()
42
+ self.drop = nn.Dropout(p)
43
+ self.fc2 = nn.Linear(bottleneck, num_labels)
44
+
45
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
46
+ return self.fc2(self.drop(self.act(self.fc1(x))))
47
+
48
+ # ------------------------------------------------------------
49
+ # backbone_meta resolver
50
+ # ------------------------------------------------------------
51
+ def _resolve_backbone_meta(config: BackboneMLPHeadConfig, fallback_table: Dict[str, Dict[str, Any]] | None = None) -> Dict[str, Any]:
52
+ """
53
+ Resolve runtime backbone meta.
54
+
55
+ Priority:
56
+ 1) config.backbone_meta (preferred; required for Hub runtime determinism)
57
+ 2) fallback_table[config.backbone_name_or_path] (backward compatibility for local/dev)
58
+
59
+ Returns a dict with at least: type, feat_rule, feat_dim (and optional has_bn/unfreeze).
60
+ """
61
+ meta = getattr(config, "backbone_meta", None)
62
+ if isinstance(meta, dict) and len(meta) > 0:
63
+ return meta
64
+
65
+ bb = getattr(config, "backbone_name_or_path", None)
66
+ if fallback_table is not None and bb in fallback_table:
67
+ return fallback_table[bb]
68
+
69
+ raise ValueError(
70
+ "config.backbone_meta is missing/empty and no fallback meta is available. "
71
+ "Populate config.backbone_meta when saving to the Hub (single source of truth)."
72
+ )
73
+
74
 
75
  # ============================================================
76
  # (3) Model: backbone + MLP head
 
118
 
119
  # Meta is a single source of truth for extraction and fine-tuning rules.
120
  # meta๋Š” feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€์ž…๋‹ˆ๋‹ค.
121
+ # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
122
+ # Prefer config.backbone_meta to keep Hub runtime self-contained.
123
+ self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
124
 
125
  # Backbone skeleton is always created without pretrained weights.
126
  # backbone skeleton์€ ํ•ญ์ƒ pretrained weight ์—†์ด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
 
158
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
159
  # Meta decides which loader path to use.
160
  # meta๊ฐ€ ์–ด๋–ค ๋กœ๋” ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
161
+ meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
162
+ if meta is None:
163
+ raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
164
+
165
  t = meta["type"]
166
 
167
  if t == "timm_densenet":
 
225
  from_pretrained() ์ดํ›„ ํ˜ธ์ถœํ•˜๋ฉด ์ฒดํฌํฌ์ธํŠธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฎ์–ด์“ฐ๋ฏ€๋กœ ์ ˆ๋Œ€ ํ˜ธ์ถœํ•˜๋ฉด ์•ˆ ๋ฉ๋‹ˆ๋‹ค.
226
  """
227
  bb = self.config.backbone_name_or_path
228
+ meta = self._meta
229
  t = meta["type"]
230
 
231
  if t == "timm_densenet":
 
450
  _set_requires_grad(model.backbone, False)
451
  _set_requires_grad(model.classifier, True)
452
 
453
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
454
  if freeze_bn and meta.get("has_bn", False):
455
  set_bn_eval(model.backbone)
456
 
 
459
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
460
  # stage2: train ๋ชจ๋“œ๋กœ ๋‘๋˜ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด BN์„ eval๋กœ ์œ ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
461
  model.train()
462
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
463
  if keep_bn_eval and meta.get("has_bn", False):
464
  set_bn_eval(model.backbone)
465
 
 
487
  if n <= 0:
488
  return
489
 
490
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
491
  if meta.get("unfreeze") != "last_n":
492
  raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
493
 
ds_proc.py CHANGED
@@ -16,7 +16,7 @@ from transformers import AutoImageProcessor, AutoConfig
16
  from transformers.image_processing_base import ImageProcessingMixin
17
  from transformers.utils.generic import TensorType
18
 
19
- from ds_meta import BackboneID, BACKBONE_META
20
 
21
 
22
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
 
16
  from transformers.image_processing_base import ImageProcessingMixin
17
  from transformers.utils.generic import TensorType
18
 
19
+ from ds_cfg import BackboneID, BACKBONE_META
20
 
21
 
22
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
manifest_20260210_163348.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "timestamp": "20260210_163348",
3
+ "repo_id": "dsaint31/bb_mlp_224",
4
+ "revision": "main",
5
+ "tag": null,
6
+ "num_labels": 3,
7
+ "build_device": "mps",
8
+ "count": 6,
9
+ "items": [
10
+ {
11
+ "backbone": "google/vit-base-patch16-224",
12
+ "subdir": "models/google__vit-base-patch16-224",
13
+ "dirname": "google__vit-base-patch16-224"
14
+ },
15
+ {
16
+ "backbone": "microsoft/swin-tiny-patch4-window7-224",
17
+ "subdir": "models/microsoft__swin-tiny-patch4-window7-224",
18
+ "dirname": "microsoft__swin-tiny-patch4-window7-224"
19
+ },
20
+ {
21
+ "backbone": "microsoft/resnet-50",
22
+ "subdir": "models/microsoft__resnet-50",
23
+ "dirname": "microsoft__resnet-50"
24
+ },
25
+ {
26
+ "backbone": "google/efficientnet-b0",
27
+ "subdir": "models/google__efficientnet-b0",
28
+ "dirname": "google__efficientnet-b0"
29
+ },
30
+ {
31
+ "backbone": "timm/densenet121.tv_in1k",
32
+ "subdir": "models/timm__densenet121.tv_in1k",
33
+ "dirname": "timm__densenet121.tv_in1k"
34
+ },
35
+ {
36
+ "backbone": "torchvision/densenet121",
37
+ "subdir": "models/torchvision__densenet121",
38
+ "dirname": "torchvision__densenet121"
39
+ }
40
+ ],
41
+ "root_code_included": true,
42
+ "root_code_files": [
43
+ "ds_proc.py",
44
+ "ds_model.py",
45
+ "ds_cfg.py"
46
+ ],
47
+ "subfolder_code_included": true,
48
+ "subfolder_code_files": [
49
+ "ds_proc.py",
50
+ "ds_model.py",
51
+ "ds_cfg.py"
52
+ ]
53
+ }
models/google__efficientnet-b0/config.json CHANGED
@@ -24,7 +24,7 @@
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
- "created_at": "20260210_142559",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/google__efficientnet-b0",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
@@ -40,15 +40,13 @@
40
  "root_code_files": [
41
  "ds_proc.py",
42
  "ds_model.py",
43
- "ds_cfg.py",
44
- "ds_meta.py"
45
  ],
46
  "subfolder_code_included": true,
47
  "subfolder_code_files": [
48
  "ds_proc.py",
49
  "ds_model.py",
50
- "ds_cfg.py",
51
- "ds_meta.py"
52
  ],
53
  "processor_use_fast": true
54
  }
 
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
+ "created_at": "20260210_163348",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/google__efficientnet-b0",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
 
40
  "root_code_files": [
41
  "ds_proc.py",
42
  "ds_model.py",
43
+ "ds_cfg.py"
 
44
  ],
45
  "subfolder_code_included": true,
46
  "subfolder_code_files": [
47
  "ds_proc.py",
48
  "ds_model.py",
49
+ "ds_cfg.py"
 
50
  ],
51
  "processor_use_fast": true
52
  }
models/google__efficientnet-b0/ds_cfg.py CHANGED
@@ -1,6 +1,149 @@
1
  from transformers import PretrainedConfig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- from ds_meta import BackboneID, BACKBONE_META
4
 
5
 
6
  class BackboneMLPHeadConfig(PretrainedConfig):
 
1
  from transformers import PretrainedConfig
2
+ from typing import Literal, Any
3
+
4
+ # ============================================================
5
+ # Backbone whitelist + meta registry
6
+ # ============================================================
7
+
8
+ BackboneID = Literal[
9
+ "google/vit-base-patch16-224",
10
+ "microsoft/swin-tiny-patch4-window7-224",
11
+ "microsoft/resnet-50",
12
+ "google/efficientnet-b0",
13
+ "timm/densenet121.tv_in1k",
14
+ "torchvision/densenet121",
15
+ ]
16
+
17
+ # ============================================================
18
+ # 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
19
+ # 2) ๋ฐฑ๋ณธ ๋ฉ”ํƒ€ ๋ ˆ์ง€์ŠคํŠธ๋ฆฌ (feature dim/rule/unfreeze rule ๊ณ ์ •)
20
+ # ============================================================
21
+ # This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
22
+ # ์ด ํ…Œ์ด๋ธ”์€ backbone๋ณ„ feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€(source of truth)์ž…๋‹ˆ๋‹ค.
23
+ #
24
+ # The key type is BackboneID to ensure meta keys never drift from the whitelist.
25
+ # ํ‚ค ํƒ€์ž…์„ BackboneID๋กœ ๊ณ ์ •ํ•˜์—ฌ ๋ฉ”ํƒ€ ํ‚ค๊ฐ€ ํ™”์ดํŠธ๋ฆฌ์ŠคํŠธ์™€ ์–ด๊ธ‹๋‚˜์ง€ ์•Š๊ฒŒ ํ•ฉ๋‹ˆ๋‹ค.
26
+ BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
27
+ # -------------------------
28
+ # Transformers (ViT/Swin)
29
+ # -------------------------
30
+ # These backbones come from transformers and typically output hidden states and/or pooler outputs.
31
+ # ์ด ๋ฐฑ๋ณธ๋“ค์€ transformers ๊ณ„์—ด์ด๋ฉฐ hidden states์™€ pooler ์ถœ๋ ฅ ๋“ฑ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
32
+
33
+ "google/vit-base-patch16-224": {
34
+ # type indicates which loading/forward/extraction pathway the model code should use.
35
+ # type์€ ๋ชจ๋ธ ์ฝ”๋“œ๊ฐ€ ์–ด๋–ค ๋กœ๋”ฉ/forward/feature ์ถ”์ถœ ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
36
+ "type": "vit",
37
+
38
+ # feat_dim is the feature vector dimension consumed by the MLP head.
39
+ # feat_dim์€ MLP head๊ฐ€ ์ž…๋ ฅ์œผ๋กœ ๋ฐ›๋Š” feature ๋ฒกํ„ฐ ์ฐจ์›์ž…๋‹ˆ๋‹ค.
40
+ "feat_dim": 768,
41
+
42
+ # feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
43
+ # feat_rule์€ backbone ์ถœ๋ ฅ์—์„œ (B, feat_dim) ํ…์„œ๋ฅผ ์–ป๋Š” ๊ทœ์น™์„ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.
44
+ "feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
45
+ # last_hidden_state[:, 0, :]๋ฅผ CLS ํ† ํฐ ์ž„๋ฒ ๋”ฉ์œผ๋กœ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
46
+
47
+ # unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
48
+ # unfreeze๋Š” stage2 ๋ฏธ์„ธ์กฐ์ •์—์„œ ์–ด๋–ค ๋ ˆ์ด์–ด๋ฅผ ํ’€์ง€ ์ •์ฑ…์„ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.
49
+ "unfreeze": "last_n", # Unfreeze the last n encoder blocks.
50
+ # encoder ๋ธ”๋ก์˜ ๋งˆ์ง€๋ง‰ n๊ฐœ๋ฅผ unfreeze ํ•ฉ๋‹ˆ๋‹ค.
51
+
52
+ # has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
53
+ # has_bn์€ BatchNorm ์กด์žฌ ์—ฌ๋ถ€์ด๋ฉฐ freeze ์‹œ ํŠน๋ณ„ ์ทจ๊ธ‰์ด ํ•„์š”ํ•œ์ง€ ํŒ๋‹จ์— ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
54
+ "has_bn": False,
55
+ },
56
+
57
+ "microsoft/swin-tiny-patch4-window7-224": {
58
+ # This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
59
+ # ์ด ๋ฐฑ๋ณธ์€ Swin Transformer์ด๋ฉฐ ๊ตฌํ˜„์— ๋”ฐ๋ผ pooler output ์ œ๊ณต ์—ฌ๋ถ€๊ฐ€ ๋‹ฌ๋ผ์งˆ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
60
+ "type": "swin",
61
+ "feat_dim": 768,
62
+
63
+ # Prefer pooler output if available, otherwise fall back to mean pooling.
64
+ # pooler๊ฐ€ ์žˆ์œผ๋ฉด ์šฐ์„  ์‚ฌ์šฉํ•˜๊ณ , ์—†์œผ๋ฉด mean pooling์œผ๋กœ ๋Œ€์ฒดํ•ฉ๋‹ˆ๋‹ค.
65
+ "feat_rule": "pool_or_mean",
66
+
67
+ # Unfreeze strategy is aligned with transformer-style encoder blocks.
68
+ # unfreeze ์ „๋žต์€ transformer ๊ณ„์—ด encoder ๋ธ”๋ก ๊ธฐ์ค€์œผ๋กœ ๋งž์ถฅ๋‹ˆ๋‹ค.
69
+ "unfreeze": "last_n",
70
+ "has_bn": False,
71
+ },
72
+
73
+ # -------------------------
74
+ # Transformers (CNNs)
75
+ # -------------------------
76
+ # These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
77
+ # ์ด ๋ฐฑ๋ณธ๋“ค์€ transformers๋กœ ๋…ธ์ถœ๋œ CNN์ด๋ฉฐ pooled feature ๋˜๋Š” feature map์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
78
+
79
+ "microsoft/resnet-50": {
80
+ # This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
81
+ # ์ด ํ•ญ๋ชฉ์€ transformers ํ˜ธํ™˜ ResNet์ด pooler ๋˜๋Š” ์ตœ์ข… feature map์„ ์ œ๊ณตํ•  ์ˆ˜ ์žˆ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
82
+ "type": "resnet",
83
+ "feat_dim": 2048,
84
+
85
+ # Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
86
+ # pooler๊ฐ€ ์žˆ์œผ๋ฉด ์‚ฌ์šฉํ•˜๊ณ , ์—†์œผ๋ฉด global average pooling(GAP)์„ ์ ์šฉํ•ฉ๋‹ˆ๋‹ค.
87
+ "feat_rule": "pool_or_gap",
88
+
89
+ # CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
90
+ # CNN๋„ ๋ชจ๋ธ ์ฝ”๋“œ์—์„œ block/stage ๋‹จ์œ„๋กœ last_n ์ •์ฑ…์„ ์ ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
91
+ "unfreeze": "last_n",
92
+ "has_bn": True,
93
+ },
94
+
95
+ "google/efficientnet-b0": {
96
+ # This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
97
+ # ์ด ํ•ญ๋ชฉ์€ transformers ํ˜ธํ™˜ EfficientNet์ด pooled feature ๋˜๋Š” ์ตœ์ข… feature map์„ ์ œ๊ณตํ•œ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
98
+ "type": "efficientnet",
99
+ "feat_dim": 1280,
100
+ "feat_rule": "pool_or_gap",
101
+ "unfreeze": "last_n",
102
+ "has_bn": True,
103
+ },
104
+
105
+ # -------------------------
106
+ # timm (DenseNet via HF Hub)
107
+ # -------------------------
108
+ # This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
109
+ # ์ด ๋ฐฑ๋ณธ์€ ๋ชจ๋ธ ๋กœ๋”์—์„œ timm์˜ "hf_hub:" ํ”„๋ฆฌํ”ฝ์Šค๋ฅผ ์‚ฌ์šฉํ•ด ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
110
+ "timm/densenet121.tv_in1k": {
111
+ "type": "timm_densenet",
112
+
113
+ # DenseNet-121 final channel dimension is 1024 for the canonical architecture.
114
+ # DenseNet-121์˜ ํ‘œ์ค€ ์•„ํ‚คํ…์ฒ˜์—์„œ ์ตœ์ข… ์ฑ„๋„ ์ฐจ์›์€ 1024์ž…๋‹ˆ๋‹ค.
115
+ "feat_dim": 1024,
116
+
117
+ # timm forward_features typically returns a feature map that you then GAP to (B, C).
118
+ # timm์˜ forward_features๋Š” ๋ณดํ†ต feature map์„ ๋ฐ˜ํ™˜ํ•˜๊ณ  ์ดํ›„ GAP์œผ๋กœ (B, C)๋ฅผ ๋งŒ๋“ญ๋‹ˆ๋‹ค.
119
+ "feat_rule": "timm_gap",
120
+
121
+ # DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
122
+ # DenseNet์€ BatchNorm ์‚ฌ์šฉ์ด ๋งŽ์•„ stage1/stage2์—์„œ freeze_bn ์ฒ˜๋ฆฌ๊ฐ€ ์ค‘์š”ํ•ฉ๋‹ˆ๋‹ค.
123
+ "unfreeze": "last_n",
124
+ "has_bn": True,
125
+ },
126
+
127
+ # -------------------------
128
+ # torchvision (DenseNet direct)
129
+ # -------------------------
130
+ # This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
131
+ # ์ด ๋ฐฑ๋ณธ์€ transformers/timm์ด ์•„๋‹ˆ๋ผ torchvision ์Šคํƒ€์ผ ๋กœ๋”ฉ ๋ฐ feature ์ถ”์ถœ์„ ๋Œ€์ƒ์œผ๋กœ ํ•ฉ๋‹ˆ๋‹ค.
132
+ "torchvision/densenet121": {
133
+ "type": "torchvision_densenet",
134
+ "feat_dim": 1024,
135
+
136
+ # torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
137
+ # torchvision DenseNet์€ ๋ณดํ†ต .features๋ฅผ ๋…ธ์ถœํ•˜๋ฉฐ GAP์œผ๋กœ (B, C)๋ฅผ ์–ป์Šต๋‹ˆ๋‹ค.
138
+ "feat_rule": "torchvision_densenet_gap",
139
+
140
+ # Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
141
+ # unfreeze ์ •์ฑ…์€ last_n์„ ์œ ์ง€ํ•˜๋˜, ํ•ด์„์€ torchvision ๋ชจ๋“ˆ ๋„ค์ด๋ฐ์— ๋งž์•„์•ผ ํ•ฉ๋‹ˆ๋‹ค.
142
+ "unfreeze": "last_n",
143
+ "has_bn": True,
144
+ },
145
+ }
146
 
 
147
 
148
 
149
  class BackboneMLPHeadConfig(PretrainedConfig):
models/google__efficientnet-b0/ds_model.py CHANGED
@@ -3,7 +3,7 @@
3
 
4
  # src/ds_model.py
5
 
6
- from typing import Optional, List
7
 
8
  import torch
9
  import torch.nn as nn
@@ -17,9 +17,60 @@ from transformers.modeling_outputs import ImageClassifierOutput
17
  # --- torchvision ---
18
  from torchvision import models as tv_models
19
 
20
- from ds_cfg import BackboneMLPHeadConfig
21
- from ds_meta import BACKBONE_META
22
- from mlp_head import MLPHead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # ============================================================
25
  # (3) Model: backbone + MLP head
@@ -67,7 +118,9 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
67
 
68
  # Meta is a single source of truth for extraction and fine-tuning rules.
69
  # meta๋Š” feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€์ž…๋‹ˆ๋‹ค.
70
- self._meta = BACKBONE_META[config.backbone_name_or_path]
 
 
71
 
72
  # Backbone skeleton is always created without pretrained weights.
73
  # backbone skeleton์€ ํ•ญ์ƒ pretrained weight ์—†์ด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
@@ -105,7 +158,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
105
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
106
  # Meta decides which loader path to use.
107
  # meta๊ฐ€ ์–ด๋–ค ๋กœ๋” ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
108
- meta = BACKBONE_META[backbone_id]
 
 
 
109
  t = meta["type"]
110
 
111
  if t == "timm_densenet":
@@ -169,7 +225,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
169
  from_pretrained() ์ดํ›„ ํ˜ธ์ถœํ•˜๋ฉด ์ฒดํฌํฌ์ธํŠธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฎ์–ด์“ฐ๋ฏ€๋กœ ์ ˆ๋Œ€ ํ˜ธ์ถœํ•˜๋ฉด ์•ˆ ๋ฉ๋‹ˆ๋‹ค.
170
  """
171
  bb = self.config.backbone_name_or_path
172
- meta = BACKBONE_META[bb]
173
  t = meta["type"]
174
 
175
  if t == "timm_densenet":
@@ -394,7 +450,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
394
  _set_requires_grad(model.backbone, False)
395
  _set_requires_grad(model.classifier, True)
396
 
397
- meta = BACKBONE_META[model.config.backbone_name_or_path]
398
  if freeze_bn and meta.get("has_bn", False):
399
  set_bn_eval(model.backbone)
400
 
@@ -403,7 +459,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
403
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
404
  # stage2: train ๋ชจ๋“œ๋กœ ๋‘๋˜ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด BN์„ eval๋กœ ์œ ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
405
  model.train()
406
- meta = BACKBONE_META[model.config.backbone_name_or_path]
407
  if keep_bn_eval and meta.get("has_bn", False):
408
  set_bn_eval(model.backbone)
409
 
@@ -431,7 +487,7 @@ def unfreeze_last_stage(
431
  if n <= 0:
432
  return
433
 
434
- meta = BACKBONE_META[model.config.backbone_name_or_path]
435
  if meta.get("unfreeze") != "last_n":
436
  raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
437
 
 
3
 
4
  # src/ds_model.py
5
 
6
+ from typing import Optional, List, Any, Dict
7
 
8
  import torch
9
  import torch.nn as nn
 
17
  # --- torchvision ---
18
  from torchvision import models as tv_models
19
 
20
+ from ds_cfg import BackboneMLPHeadConfig, BACKBONE_META
21
+ # from mlp_head import MLPHead
22
+
23
+ class MLPHead(nn.Module):
24
+ """
25
+ ๊ฐ„๋‹จํ•œ 2-layer MLP head.
26
+
27
+ Parameters
28
+ ----------
29
+ in_dim : int
30
+ backbone feature dim
31
+ num_labels : int
32
+ class count
33
+ bottleneck : int
34
+ hidden dim
35
+ p : float
36
+ dropout prob
37
+ """
38
+ def __init__(self, in_dim: int, num_labels: int, bottleneck: int = 256, p: float = 0.2):
39
+ super().__init__()
40
+ self.fc1 = nn.Linear(in_dim, bottleneck)
41
+ self.act = nn.GELU()
42
+ self.drop = nn.Dropout(p)
43
+ self.fc2 = nn.Linear(bottleneck, num_labels)
44
+
45
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
46
+ return self.fc2(self.drop(self.act(self.fc1(x))))
47
+
48
+ # ------------------------------------------------------------
49
+ # backbone_meta resolver
50
+ # ------------------------------------------------------------
51
+ def _resolve_backbone_meta(config: BackboneMLPHeadConfig, fallback_table: Dict[str, Dict[str, Any]] | None = None) -> Dict[str, Any]:
52
+ """
53
+ Resolve runtime backbone meta.
54
+
55
+ Priority:
56
+ 1) config.backbone_meta (preferred; required for Hub runtime determinism)
57
+ 2) fallback_table[config.backbone_name_or_path] (backward compatibility for local/dev)
58
+
59
+ Returns a dict with at least: type, feat_rule, feat_dim (and optional has_bn/unfreeze).
60
+ """
61
+ meta = getattr(config, "backbone_meta", None)
62
+ if isinstance(meta, dict) and len(meta) > 0:
63
+ return meta
64
+
65
+ bb = getattr(config, "backbone_name_or_path", None)
66
+ if fallback_table is not None and bb in fallback_table:
67
+ return fallback_table[bb]
68
+
69
+ raise ValueError(
70
+ "config.backbone_meta is missing/empty and no fallback meta is available. "
71
+ "Populate config.backbone_meta when saving to the Hub (single source of truth)."
72
+ )
73
+
74
 
75
  # ============================================================
76
  # (3) Model: backbone + MLP head
 
118
 
119
  # Meta is a single source of truth for extraction and fine-tuning rules.
120
  # meta๋Š” feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€์ž…๋‹ˆ๋‹ค.
121
+ # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
122
+ # Prefer config.backbone_meta to keep Hub runtime self-contained.
123
+ self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
124
 
125
  # Backbone skeleton is always created without pretrained weights.
126
  # backbone skeleton์€ ํ•ญ์ƒ pretrained weight ์—†์ด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
 
158
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
159
  # Meta decides which loader path to use.
160
  # meta๊ฐ€ ์–ด๋–ค ๋กœ๋” ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
161
+ meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
162
+ if meta is None:
163
+ raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
164
+
165
  t = meta["type"]
166
 
167
  if t == "timm_densenet":
 
225
  from_pretrained() ์ดํ›„ ํ˜ธ์ถœํ•˜๋ฉด ์ฒดํฌํฌ์ธํŠธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฎ์–ด์“ฐ๋ฏ€๋กœ ์ ˆ๋Œ€ ํ˜ธ์ถœํ•˜๋ฉด ์•ˆ ๋ฉ๋‹ˆ๋‹ค.
226
  """
227
  bb = self.config.backbone_name_or_path
228
+ meta = self._meta
229
  t = meta["type"]
230
 
231
  if t == "timm_densenet":
 
450
  _set_requires_grad(model.backbone, False)
451
  _set_requires_grad(model.classifier, True)
452
 
453
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
454
  if freeze_bn and meta.get("has_bn", False):
455
  set_bn_eval(model.backbone)
456
 
 
459
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
460
  # stage2: train ๋ชจ๋“œ๋กœ ๋‘๋˜ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด BN์„ eval๋กœ ์œ ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
461
  model.train()
462
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
463
  if keep_bn_eval and meta.get("has_bn", False):
464
  set_bn_eval(model.backbone)
465
 
 
487
  if n <= 0:
488
  return
489
 
490
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
491
  if meta.get("unfreeze") != "last_n":
492
  raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
493
 
models/google__efficientnet-b0/ds_proc.py CHANGED
@@ -16,7 +16,7 @@ from transformers import AutoImageProcessor, AutoConfig
16
  from transformers.image_processing_base import ImageProcessingMixin
17
  from transformers.utils.generic import TensorType
18
 
19
- from ds_meta import BackboneID, BACKBONE_META
20
 
21
 
22
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
 
16
  from transformers.image_processing_base import ImageProcessingMixin
17
  from transformers.utils.generic import TensorType
18
 
19
+ from ds_cfg import BackboneID, BACKBONE_META
20
 
21
 
22
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
models/google__efficientnet-b0/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7020f515e549776c727f92374975faf8bb9878444809463d1d1069e08f68d735
3
  size 17558436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05324e85fb965b74d443308262fc7c776bcc001035e3d66bf63a52b0ba4ce300
3
  size 17558436
models/google__vit-base-patch16-224/config.json CHANGED
@@ -24,7 +24,7 @@
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
- "created_at": "20260210_142559",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/google__vit-base-patch16-224",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
@@ -40,15 +40,13 @@
40
  "root_code_files": [
41
  "ds_proc.py",
42
  "ds_model.py",
43
- "ds_cfg.py",
44
- "ds_meta.py"
45
  ],
46
  "subfolder_code_included": true,
47
  "subfolder_code_files": [
48
  "ds_proc.py",
49
  "ds_model.py",
50
- "ds_cfg.py",
51
- "ds_meta.py"
52
  ],
53
  "processor_use_fast": true
54
  }
 
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
+ "created_at": "20260210_163348",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/google__vit-base-patch16-224",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
 
40
  "root_code_files": [
41
  "ds_proc.py",
42
  "ds_model.py",
43
+ "ds_cfg.py"
 
44
  ],
45
  "subfolder_code_included": true,
46
  "subfolder_code_files": [
47
  "ds_proc.py",
48
  "ds_model.py",
49
+ "ds_cfg.py"
 
50
  ],
51
  "processor_use_fast": true
52
  }
models/google__vit-base-patch16-224/ds_cfg.py CHANGED
@@ -1,6 +1,149 @@
1
  from transformers import PretrainedConfig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- from ds_meta import BackboneID, BACKBONE_META
4
 
5
 
6
  class BackboneMLPHeadConfig(PretrainedConfig):
 
1
  from transformers import PretrainedConfig
2
+ from typing import Literal, Any
3
+
4
+ # ============================================================
5
+ # Backbone whitelist + meta registry
6
+ # ============================================================
7
+
8
+ BackboneID = Literal[
9
+ "google/vit-base-patch16-224",
10
+ "microsoft/swin-tiny-patch4-window7-224",
11
+ "microsoft/resnet-50",
12
+ "google/efficientnet-b0",
13
+ "timm/densenet121.tv_in1k",
14
+ "torchvision/densenet121",
15
+ ]
16
+
17
+ # ============================================================
18
+ # 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
19
+ # 2) ๋ฐฑ๋ณธ ๋ฉ”ํƒ€ ๋ ˆ์ง€์ŠคํŠธ๋ฆฌ (feature dim/rule/unfreeze rule ๊ณ ์ •)
20
+ # ============================================================
21
+ # This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
22
+ # ์ด ํ…Œ์ด๋ธ”์€ backbone๋ณ„ feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€(source of truth)์ž…๋‹ˆ๋‹ค.
23
+ #
24
+ # The key type is BackboneID to ensure meta keys never drift from the whitelist.
25
+ # ํ‚ค ํƒ€์ž…์„ BackboneID๋กœ ๊ณ ์ •ํ•˜์—ฌ ๋ฉ”ํƒ€ ํ‚ค๊ฐ€ ํ™”์ดํŠธ๋ฆฌ์ŠคํŠธ์™€ ์–ด๊ธ‹๋‚˜์ง€ ์•Š๊ฒŒ ํ•ฉ๋‹ˆ๋‹ค.
26
+ BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
27
+ # -------------------------
28
+ # Transformers (ViT/Swin)
29
+ # -------------------------
30
+ # These backbones come from transformers and typically output hidden states and/or pooler outputs.
31
+ # ์ด ๋ฐฑ๋ณธ๋“ค์€ transformers ๊ณ„์—ด์ด๋ฉฐ hidden states์™€ pooler ์ถœ๋ ฅ ๋“ฑ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
32
+
33
+ "google/vit-base-patch16-224": {
34
+ # type indicates which loading/forward/extraction pathway the model code should use.
35
+ # type์€ ๋ชจ๋ธ ์ฝ”๋“œ๊ฐ€ ์–ด๋–ค ๋กœ๋”ฉ/forward/feature ์ถ”์ถœ ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
36
+ "type": "vit",
37
+
38
+ # feat_dim is the feature vector dimension consumed by the MLP head.
39
+ # feat_dim์€ MLP head๊ฐ€ ์ž…๋ ฅ์œผ๋กœ ๋ฐ›๋Š” feature ๋ฒกํ„ฐ ์ฐจ์›์ž…๋‹ˆ๋‹ค.
40
+ "feat_dim": 768,
41
+
42
+ # feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
43
+ # feat_rule์€ backbone ์ถœ๋ ฅ์—์„œ (B, feat_dim) ํ…์„œ๋ฅผ ์–ป๋Š” ๊ทœ์น™์„ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.
44
+ "feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
45
+ # last_hidden_state[:, 0, :]๋ฅผ CLS ํ† ํฐ ์ž„๋ฒ ๋”ฉ์œผ๋กœ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
46
+
47
+ # unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
48
+ # unfreeze๋Š” stage2 ๋ฏธ์„ธ์กฐ์ •์—์„œ ์–ด๋–ค ๋ ˆ์ด์–ด๋ฅผ ํ’€์ง€ ์ •์ฑ…์„ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.
49
+ "unfreeze": "last_n", # Unfreeze the last n encoder blocks.
50
+ # encoder ๋ธ”๋ก์˜ ๋งˆ์ง€๋ง‰ n๊ฐœ๋ฅผ unfreeze ํ•ฉ๋‹ˆ๋‹ค.
51
+
52
+ # has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
53
+ # has_bn์€ BatchNorm ์กด์žฌ ์—ฌ๋ถ€์ด๋ฉฐ freeze ์‹œ ํŠน๋ณ„ ์ทจ๊ธ‰์ด ํ•„์š”ํ•œ์ง€ ํŒ๋‹จ์— ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
54
+ "has_bn": False,
55
+ },
56
+
57
+ "microsoft/swin-tiny-patch4-window7-224": {
58
+ # This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
59
+ # ์ด ๋ฐฑ๋ณธ์€ Swin Transformer์ด๋ฉฐ ๊ตฌํ˜„์— ๋”ฐ๋ผ pooler output ์ œ๊ณต ์—ฌ๋ถ€๊ฐ€ ๋‹ฌ๋ผ์งˆ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
60
+ "type": "swin",
61
+ "feat_dim": 768,
62
+
63
+ # Prefer pooler output if available, otherwise fall back to mean pooling.
64
+ # pooler๊ฐ€ ์žˆ์œผ๋ฉด ์šฐ์„  ์‚ฌ์šฉํ•˜๊ณ , ์—†์œผ๋ฉด mean pooling์œผ๋กœ ๋Œ€์ฒดํ•ฉ๋‹ˆ๋‹ค.
65
+ "feat_rule": "pool_or_mean",
66
+
67
+ # Unfreeze strategy is aligned with transformer-style encoder blocks.
68
+ # unfreeze ์ „๋žต์€ transformer ๊ณ„์—ด encoder ๋ธ”๋ก ๊ธฐ์ค€์œผ๋กœ ๋งž์ถฅ๋‹ˆ๋‹ค.
69
+ "unfreeze": "last_n",
70
+ "has_bn": False,
71
+ },
72
+
73
+ # -------------------------
74
+ # Transformers (CNNs)
75
+ # -------------------------
76
+ # These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
77
+ # ์ด ๋ฐฑ๋ณธ๋“ค์€ transformers๋กœ ๋…ธ์ถœ๋œ CNN์ด๋ฉฐ pooled feature ๋˜๋Š” feature map์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
78
+
79
+ "microsoft/resnet-50": {
80
+ # This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
81
+ # ์ด ํ•ญ๋ชฉ์€ transformers ํ˜ธํ™˜ ResNet์ด pooler ๋˜๋Š” ์ตœ์ข… feature map์„ ์ œ๊ณตํ•  ์ˆ˜ ์žˆ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
82
+ "type": "resnet",
83
+ "feat_dim": 2048,
84
+
85
+ # Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
86
+ # pooler๊ฐ€ ์žˆ์œผ๋ฉด ์‚ฌ์šฉํ•˜๊ณ , ์—†์œผ๋ฉด global average pooling(GAP)์„ ์ ์šฉํ•ฉ๋‹ˆ๋‹ค.
87
+ "feat_rule": "pool_or_gap",
88
+
89
+ # CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
90
+ # CNN๋„ ๋ชจ๋ธ ์ฝ”๋“œ์—์„œ block/stage ๋‹จ์œ„๋กœ last_n ์ •์ฑ…์„ ์ ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
91
+ "unfreeze": "last_n",
92
+ "has_bn": True,
93
+ },
94
+
95
+ "google/efficientnet-b0": {
96
+ # This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
97
+ # ์ด ํ•ญ๋ชฉ์€ transformers ํ˜ธํ™˜ EfficientNet์ด pooled feature ๋˜๋Š” ์ตœ์ข… feature map์„ ์ œ๊ณตํ•œ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
98
+ "type": "efficientnet",
99
+ "feat_dim": 1280,
100
+ "feat_rule": "pool_or_gap",
101
+ "unfreeze": "last_n",
102
+ "has_bn": True,
103
+ },
104
+
105
+ # -------------------------
106
+ # timm (DenseNet via HF Hub)
107
+ # -------------------------
108
+ # This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
109
+ # ์ด ๋ฐฑ๋ณธ์€ ๋ชจ๋ธ ๋กœ๋”์—์„œ timm์˜ "hf_hub:" ํ”„๋ฆฌํ”ฝ์Šค๋ฅผ ์‚ฌ์šฉํ•ด ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
110
+ "timm/densenet121.tv_in1k": {
111
+ "type": "timm_densenet",
112
+
113
+ # DenseNet-121 final channel dimension is 1024 for the canonical architecture.
114
+ # DenseNet-121์˜ ํ‘œ์ค€ ์•„ํ‚คํ…์ฒ˜์—์„œ ์ตœ์ข… ์ฑ„๋„ ์ฐจ์›์€ 1024์ž…๋‹ˆ๋‹ค.
115
+ "feat_dim": 1024,
116
+
117
+ # timm forward_features typically returns a feature map that you then GAP to (B, C).
118
+ # timm์˜ forward_features๋Š” ๋ณดํ†ต feature map์„ ๋ฐ˜ํ™˜ํ•˜๊ณ  ์ดํ›„ GAP์œผ๋กœ (B, C)๋ฅผ ๋งŒ๋“ญ๋‹ˆ๋‹ค.
119
+ "feat_rule": "timm_gap",
120
+
121
+ # DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
122
+ # DenseNet์€ BatchNorm ์‚ฌ์šฉ์ด ๋งŽ์•„ stage1/stage2์—์„œ freeze_bn ์ฒ˜๋ฆฌ๊ฐ€ ์ค‘์š”ํ•ฉ๋‹ˆ๋‹ค.
123
+ "unfreeze": "last_n",
124
+ "has_bn": True,
125
+ },
126
+
127
+ # -------------------------
128
+ # torchvision (DenseNet direct)
129
+ # -------------------------
130
+ # This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
131
+ # ์ด ๋ฐฑ๋ณธ์€ transformers/timm์ด ์•„๋‹ˆ๋ผ torchvision ์Šคํƒ€์ผ ๋กœ๋”ฉ ๋ฐ feature ์ถ”์ถœ์„ ๋Œ€์ƒ์œผ๋กœ ํ•ฉ๋‹ˆ๋‹ค.
132
+ "torchvision/densenet121": {
133
+ "type": "torchvision_densenet",
134
+ "feat_dim": 1024,
135
+
136
+ # torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
137
+ # torchvision DenseNet์€ ๋ณดํ†ต .features๋ฅผ ๋…ธ์ถœํ•˜๋ฉฐ GAP์œผ๋กœ (B, C)๋ฅผ ์–ป์Šต๋‹ˆ๋‹ค.
138
+ "feat_rule": "torchvision_densenet_gap",
139
+
140
+ # Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
141
+ # unfreeze ์ •์ฑ…์€ last_n์„ ์œ ์ง€ํ•˜๋˜, ํ•ด์„์€ torchvision ๋ชจ๋“ˆ ๋„ค์ด๋ฐ์— ๋งž์•„์•ผ ํ•ฉ๋‹ˆ๋‹ค.
142
+ "unfreeze": "last_n",
143
+ "has_bn": True,
144
+ },
145
+ }
146
 
 
147
 
148
 
149
  class BackboneMLPHeadConfig(PretrainedConfig):
models/google__vit-base-patch16-224/ds_model.py CHANGED
@@ -3,7 +3,7 @@
3
 
4
  # src/ds_model.py
5
 
6
- from typing import Optional, List
7
 
8
  import torch
9
  import torch.nn as nn
@@ -17,9 +17,60 @@ from transformers.modeling_outputs import ImageClassifierOutput
17
  # --- torchvision ---
18
  from torchvision import models as tv_models
19
 
20
- from ds_cfg import BackboneMLPHeadConfig
21
- from ds_meta import BACKBONE_META
22
- from mlp_head import MLPHead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # ============================================================
25
  # (3) Model: backbone + MLP head
@@ -67,7 +118,9 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
67
 
68
  # Meta is a single source of truth for extraction and fine-tuning rules.
69
  # meta๋Š” feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€์ž…๋‹ˆ๋‹ค.
70
- self._meta = BACKBONE_META[config.backbone_name_or_path]
 
 
71
 
72
  # Backbone skeleton is always created without pretrained weights.
73
  # backbone skeleton์€ ํ•ญ์ƒ pretrained weight ์—†์ด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
@@ -105,7 +158,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
105
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
106
  # Meta decides which loader path to use.
107
  # meta๊ฐ€ ์–ด๋–ค ๋กœ๋” ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
108
- meta = BACKBONE_META[backbone_id]
 
 
 
109
  t = meta["type"]
110
 
111
  if t == "timm_densenet":
@@ -169,7 +225,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
169
  from_pretrained() ์ดํ›„ ํ˜ธ์ถœํ•˜๋ฉด ์ฒดํฌํฌ์ธํŠธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฎ์–ด์“ฐ๋ฏ€๋กœ ์ ˆ๋Œ€ ํ˜ธ์ถœํ•˜๋ฉด ์•ˆ ๋ฉ๋‹ˆ๋‹ค.
170
  """
171
  bb = self.config.backbone_name_or_path
172
- meta = BACKBONE_META[bb]
173
  t = meta["type"]
174
 
175
  if t == "timm_densenet":
@@ -394,7 +450,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
394
  _set_requires_grad(model.backbone, False)
395
  _set_requires_grad(model.classifier, True)
396
 
397
- meta = BACKBONE_META[model.config.backbone_name_or_path]
398
  if freeze_bn and meta.get("has_bn", False):
399
  set_bn_eval(model.backbone)
400
 
@@ -403,7 +459,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
403
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
404
  # stage2: train ๋ชจ๋“œ๋กœ ๋‘๋˜ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด BN์„ eval๋กœ ์œ ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
405
  model.train()
406
- meta = BACKBONE_META[model.config.backbone_name_or_path]
407
  if keep_bn_eval and meta.get("has_bn", False):
408
  set_bn_eval(model.backbone)
409
 
@@ -431,7 +487,7 @@ def unfreeze_last_stage(
431
  if n <= 0:
432
  return
433
 
434
- meta = BACKBONE_META[model.config.backbone_name_or_path]
435
  if meta.get("unfreeze") != "last_n":
436
  raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
437
 
 
3
 
4
  # src/ds_model.py
5
 
6
+ from typing import Optional, List, Any, Dict
7
 
8
  import torch
9
  import torch.nn as nn
 
17
  # --- torchvision ---
18
  from torchvision import models as tv_models
19
 
20
+ from ds_cfg import BackboneMLPHeadConfig, BACKBONE_META
21
+ # from mlp_head import MLPHead
22
+
23
+ class MLPHead(nn.Module):
24
+ """
25
+ ๊ฐ„๋‹จํ•œ 2-layer MLP head.
26
+
27
+ Parameters
28
+ ----------
29
+ in_dim : int
30
+ backbone feature dim
31
+ num_labels : int
32
+ class count
33
+ bottleneck : int
34
+ hidden dim
35
+ p : float
36
+ dropout prob
37
+ """
38
+ def __init__(self, in_dim: int, num_labels: int, bottleneck: int = 256, p: float = 0.2):
39
+ super().__init__()
40
+ self.fc1 = nn.Linear(in_dim, bottleneck)
41
+ self.act = nn.GELU()
42
+ self.drop = nn.Dropout(p)
43
+ self.fc2 = nn.Linear(bottleneck, num_labels)
44
+
45
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
46
+ return self.fc2(self.drop(self.act(self.fc1(x))))
47
+
48
+ # ------------------------------------------------------------
49
+ # backbone_meta resolver
50
+ # ------------------------------------------------------------
51
+ def _resolve_backbone_meta(config: BackboneMLPHeadConfig, fallback_table: Dict[str, Dict[str, Any]] | None = None) -> Dict[str, Any]:
52
+ """
53
+ Resolve runtime backbone meta.
54
+
55
+ Priority:
56
+ 1) config.backbone_meta (preferred; required for Hub runtime determinism)
57
+ 2) fallback_table[config.backbone_name_or_path] (backward compatibility for local/dev)
58
+
59
+ Returns a dict with at least: type, feat_rule, feat_dim (and optional has_bn/unfreeze).
60
+ """
61
+ meta = getattr(config, "backbone_meta", None)
62
+ if isinstance(meta, dict) and len(meta) > 0:
63
+ return meta
64
+
65
+ bb = getattr(config, "backbone_name_or_path", None)
66
+ if fallback_table is not None and bb in fallback_table:
67
+ return fallback_table[bb]
68
+
69
+ raise ValueError(
70
+ "config.backbone_meta is missing/empty and no fallback meta is available. "
71
+ "Populate config.backbone_meta when saving to the Hub (single source of truth)."
72
+ )
73
+
74
 
75
  # ============================================================
76
  # (3) Model: backbone + MLP head
 
118
 
119
  # Meta is a single source of truth for extraction and fine-tuning rules.
120
  # meta๋Š” feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€์ž…๋‹ˆ๋‹ค.
121
+ # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
122
+ # Prefer config.backbone_meta to keep Hub runtime self-contained.
123
+ self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
124
 
125
  # Backbone skeleton is always created without pretrained weights.
126
  # backbone skeleton์€ ํ•ญ์ƒ pretrained weight ์—†์ด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
 
158
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
159
  # Meta decides which loader path to use.
160
  # meta๊ฐ€ ์–ด๋–ค ๋กœ๋” ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
161
+ meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
162
+ if meta is None:
163
+ raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
164
+
165
  t = meta["type"]
166
 
167
  if t == "timm_densenet":
 
225
  from_pretrained() ์ดํ›„ ํ˜ธ์ถœํ•˜๋ฉด ์ฒดํฌํฌ์ธํŠธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฎ์–ด์“ฐ๋ฏ€๋กœ ์ ˆ๋Œ€ ํ˜ธ์ถœํ•˜๋ฉด ์•ˆ ๋ฉ๋‹ˆ๋‹ค.
226
  """
227
  bb = self.config.backbone_name_or_path
228
+ meta = self._meta
229
  t = meta["type"]
230
 
231
  if t == "timm_densenet":
 
450
  _set_requires_grad(model.backbone, False)
451
  _set_requires_grad(model.classifier, True)
452
 
453
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
454
  if freeze_bn and meta.get("has_bn", False):
455
  set_bn_eval(model.backbone)
456
 
 
459
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
460
  # stage2: train ๋ชจ๋“œ๋กœ ๋‘๋˜ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด BN์„ eval๋กœ ์œ ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
461
  model.train()
462
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
463
  if keep_bn_eval and meta.get("has_bn", False):
464
  set_bn_eval(model.backbone)
465
 
 
487
  if n <= 0:
488
  return
489
 
490
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
491
  if meta.get("unfreeze") != "last_n":
492
  raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
493
 
models/google__vit-base-patch16-224/ds_proc.py CHANGED
@@ -16,7 +16,7 @@ from transformers import AutoImageProcessor, AutoConfig
16
  from transformers.image_processing_base import ImageProcessingMixin
17
  from transformers.utils.generic import TensorType
18
 
19
- from ds_meta import BackboneID, BACKBONE_META
20
 
21
 
22
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
 
16
  from transformers.image_processing_base import ImageProcessingMixin
17
  from transformers.utils.generic import TensorType
18
 
19
+ from ds_cfg import BackboneID, BACKBONE_META
20
 
21
 
22
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
models/google__vit-base-patch16-224/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c051b46d9d05d5ee0182d84ec6a01a6b2f03e4a3197e1989dbb2119b1c1554a9
3
  size 346372132
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:152041a83fb83b3877e72054e3d74e4542e1b28dba1ca7a70682b03efd68bae4
3
  size 346372132
models/microsoft__resnet-50/config.json CHANGED
@@ -24,7 +24,7 @@
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
- "created_at": "20260210_142559",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/microsoft__resnet-50",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
@@ -40,15 +40,13 @@
40
  "root_code_files": [
41
  "ds_proc.py",
42
  "ds_model.py",
43
- "ds_cfg.py",
44
- "ds_meta.py"
45
  ],
46
  "subfolder_code_included": true,
47
  "subfolder_code_files": [
48
  "ds_proc.py",
49
  "ds_model.py",
50
- "ds_cfg.py",
51
- "ds_meta.py"
52
  ],
53
  "processor_use_fast": true
54
  }
 
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
+ "created_at": "20260210_163348",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/microsoft__resnet-50",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
 
40
  "root_code_files": [
41
  "ds_proc.py",
42
  "ds_model.py",
43
+ "ds_cfg.py"
 
44
  ],
45
  "subfolder_code_included": true,
46
  "subfolder_code_files": [
47
  "ds_proc.py",
48
  "ds_model.py",
49
+ "ds_cfg.py"
 
50
  ],
51
  "processor_use_fast": true
52
  }
models/microsoft__resnet-50/ds_cfg.py CHANGED
@@ -1,6 +1,149 @@
1
  from transformers import PretrainedConfig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- from ds_meta import BackboneID, BACKBONE_META
4
 
5
 
6
  class BackboneMLPHeadConfig(PretrainedConfig):
 
1
  from transformers import PretrainedConfig
2
+ from typing import Literal, Any
3
+
4
+ # ============================================================
5
+ # Backbone whitelist + meta registry
6
+ # ============================================================
7
+
8
+ BackboneID = Literal[
9
+ "google/vit-base-patch16-224",
10
+ "microsoft/swin-tiny-patch4-window7-224",
11
+ "microsoft/resnet-50",
12
+ "google/efficientnet-b0",
13
+ "timm/densenet121.tv_in1k",
14
+ "torchvision/densenet121",
15
+ ]
16
+
17
+ # ============================================================
18
+ # 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
19
+ # 2) ๋ฐฑ๋ณธ ๋ฉ”ํƒ€ ๋ ˆ์ง€์ŠคํŠธ๋ฆฌ (feature dim/rule/unfreeze rule ๊ณ ์ •)
20
+ # ============================================================
21
+ # This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
22
+ # ์ด ํ…Œ์ด๋ธ”์€ backbone๋ณ„ feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€(source of truth)์ž…๋‹ˆ๋‹ค.
23
+ #
24
+ # The key type is BackboneID to ensure meta keys never drift from the whitelist.
25
+ # ํ‚ค ํƒ€์ž…์„ BackboneID๋กœ ๊ณ ์ •ํ•˜์—ฌ ๋ฉ”ํƒ€ ํ‚ค๊ฐ€ ํ™”์ดํŠธ๋ฆฌ์ŠคํŠธ์™€ ์–ด๊ธ‹๋‚˜์ง€ ์•Š๊ฒŒ ํ•ฉ๋‹ˆ๋‹ค.
26
+ BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
27
+ # -------------------------
28
+ # Transformers (ViT/Swin)
29
+ # -------------------------
30
+ # These backbones come from transformers and typically output hidden states and/or pooler outputs.
31
+ # ์ด ๋ฐฑ๋ณธ๋“ค์€ transformers ๊ณ„์—ด์ด๋ฉฐ hidden states์™€ pooler ์ถœ๋ ฅ ๋“ฑ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
32
+
33
+ "google/vit-base-patch16-224": {
34
+ # type indicates which loading/forward/extraction pathway the model code should use.
35
+ # type์€ ๋ชจ๋ธ ์ฝ”๋“œ๊ฐ€ ์–ด๋–ค ๋กœ๋”ฉ/forward/feature ์ถ”์ถœ ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
36
+ "type": "vit",
37
+
38
+ # feat_dim is the feature vector dimension consumed by the MLP head.
39
+ # feat_dim์€ MLP head๊ฐ€ ์ž…๋ ฅ์œผ๋กœ ๋ฐ›๋Š” feature ๋ฒกํ„ฐ ์ฐจ์›์ž…๋‹ˆ๋‹ค.
40
+ "feat_dim": 768,
41
+
42
+ # feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
43
+ # feat_rule์€ backbone ์ถœ๋ ฅ์—์„œ (B, feat_dim) ํ…์„œ๋ฅผ ์–ป๋Š” ๊ทœ์น™์„ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.
44
+ "feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
45
+ # last_hidden_state[:, 0, :]๋ฅผ CLS ํ† ํฐ ์ž„๋ฒ ๋”ฉ์œผ๋กœ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
46
+
47
+ # unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
48
+ # unfreeze๋Š” stage2 ๋ฏธ์„ธ์กฐ์ •์—์„œ ์–ด๋–ค ๋ ˆ์ด์–ด๋ฅผ ํ’€์ง€ ์ •์ฑ…์„ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.
49
+ "unfreeze": "last_n", # Unfreeze the last n encoder blocks.
50
+ # encoder ๋ธ”๋ก์˜ ๋งˆ์ง€๋ง‰ n๊ฐœ๋ฅผ unfreeze ํ•ฉ๋‹ˆ๋‹ค.
51
+
52
+ # has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
53
+ # has_bn์€ BatchNorm ์กด์žฌ ์—ฌ๋ถ€์ด๋ฉฐ freeze ์‹œ ํŠน๋ณ„ ์ทจ๊ธ‰์ด ํ•„์š”ํ•œ์ง€ ํŒ๋‹จ์— ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
54
+ "has_bn": False,
55
+ },
56
+
57
+ "microsoft/swin-tiny-patch4-window7-224": {
58
+ # This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
59
+ # ์ด ๋ฐฑ๋ณธ์€ Swin Transformer์ด๋ฉฐ ๊ตฌํ˜„์— ๋”ฐ๋ผ pooler output ์ œ๊ณต ์—ฌ๋ถ€๊ฐ€ ๋‹ฌ๋ผ์งˆ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
60
+ "type": "swin",
61
+ "feat_dim": 768,
62
+
63
+ # Prefer pooler output if available, otherwise fall back to mean pooling.
64
+ # pooler๊ฐ€ ์žˆ์œผ๋ฉด ์šฐ์„  ์‚ฌ์šฉํ•˜๊ณ , ์—†์œผ๋ฉด mean pooling์œผ๋กœ ๋Œ€์ฒดํ•ฉ๋‹ˆ๋‹ค.
65
+ "feat_rule": "pool_or_mean",
66
+
67
+ # Unfreeze strategy is aligned with transformer-style encoder blocks.
68
+ # unfreeze ์ „๋žต์€ transformer ๊ณ„์—ด encoder ๋ธ”๋ก ๊ธฐ์ค€์œผ๋กœ ๋งž์ถฅ๋‹ˆ๋‹ค.
69
+ "unfreeze": "last_n",
70
+ "has_bn": False,
71
+ },
72
+
73
+ # -------------------------
74
+ # Transformers (CNNs)
75
+ # -------------------------
76
+ # These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
77
+ # ์ด ๋ฐฑ๋ณธ๋“ค์€ transformers๋กœ ๋…ธ์ถœ๋œ CNN์ด๋ฉฐ pooled feature ๋˜๋Š” feature map์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
78
+
79
+ "microsoft/resnet-50": {
80
+ # This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
81
+ # ์ด ํ•ญ๋ชฉ์€ transformers ํ˜ธํ™˜ ResNet์ด pooler ๋˜๋Š” ์ตœ์ข… feature map์„ ์ œ๊ณตํ•  ์ˆ˜ ์žˆ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
82
+ "type": "resnet",
83
+ "feat_dim": 2048,
84
+
85
+ # Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
86
+ # pooler๊ฐ€ ์žˆ์œผ๋ฉด ์‚ฌ์šฉํ•˜๊ณ , ์—†์œผ๋ฉด global average pooling(GAP)์„ ์ ์šฉํ•ฉ๋‹ˆ๋‹ค.
87
+ "feat_rule": "pool_or_gap",
88
+
89
+ # CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
90
+ # CNN๋„ ๋ชจ๋ธ ์ฝ”๋“œ์—์„œ block/stage ๋‹จ์œ„๋กœ last_n ์ •์ฑ…์„ ์ ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
91
+ "unfreeze": "last_n",
92
+ "has_bn": True,
93
+ },
94
+
95
+ "google/efficientnet-b0": {
96
+ # This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
97
+ # ์ด ํ•ญ๋ชฉ์€ transformers ํ˜ธํ™˜ EfficientNet์ด pooled feature ๋˜๋Š” ์ตœ์ข… feature map์„ ์ œ๊ณตํ•œ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
98
+ "type": "efficientnet",
99
+ "feat_dim": 1280,
100
+ "feat_rule": "pool_or_gap",
101
+ "unfreeze": "last_n",
102
+ "has_bn": True,
103
+ },
104
+
105
+ # -------------------------
106
+ # timm (DenseNet via HF Hub)
107
+ # -------------------------
108
+ # This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
109
+ # ์ด ๋ฐฑ๋ณธ์€ ๋ชจ๋ธ ๋กœ๋”์—์„œ timm์˜ "hf_hub:" ํ”„๋ฆฌํ”ฝ์Šค๋ฅผ ์‚ฌ์šฉํ•ด ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
110
+ "timm/densenet121.tv_in1k": {
111
+ "type": "timm_densenet",
112
+
113
+ # DenseNet-121 final channel dimension is 1024 for the canonical architecture.
114
+ # DenseNet-121์˜ ํ‘œ์ค€ ์•„ํ‚คํ…์ฒ˜์—์„œ ์ตœ์ข… ์ฑ„๋„ ์ฐจ์›์€ 1024์ž…๋‹ˆ๋‹ค.
115
+ "feat_dim": 1024,
116
+
117
+ # timm forward_features typically returns a feature map that you then GAP to (B, C).
118
+ # timm์˜ forward_features๋Š” ๋ณดํ†ต feature map์„ ๋ฐ˜ํ™˜ํ•˜๊ณ  ์ดํ›„ GAP์œผ๋กœ (B, C)๋ฅผ ๋งŒ๋“ญ๋‹ˆ๋‹ค.
119
+ "feat_rule": "timm_gap",
120
+
121
+ # DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
122
+ # DenseNet์€ BatchNorm ์‚ฌ์šฉ์ด ๋งŽ์•„ stage1/stage2์—์„œ freeze_bn ์ฒ˜๋ฆฌ๊ฐ€ ์ค‘์š”ํ•ฉ๋‹ˆ๋‹ค.
123
+ "unfreeze": "last_n",
124
+ "has_bn": True,
125
+ },
126
+
127
+ # -------------------------
128
+ # torchvision (DenseNet direct)
129
+ # -------------------------
130
+ # This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
131
+ # ์ด ๋ฐฑ๋ณธ์€ transformers/timm์ด ์•„๋‹ˆ๋ผ torchvision ์Šคํƒ€์ผ ๋กœ๋”ฉ ๋ฐ feature ์ถ”์ถœ์„ ๋Œ€์ƒ์œผ๋กœ ํ•ฉ๋‹ˆ๋‹ค.
132
+ "torchvision/densenet121": {
133
+ "type": "torchvision_densenet",
134
+ "feat_dim": 1024,
135
+
136
+ # torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
137
+ # torchvision DenseNet์€ ๋ณดํ†ต .features๋ฅผ ๋…ธ์ถœํ•˜๋ฉฐ GAP์œผ๋กœ (B, C)๋ฅผ ์–ป์Šต๋‹ˆ๋‹ค.
138
+ "feat_rule": "torchvision_densenet_gap",
139
+
140
+ # Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
141
+ # unfreeze ์ •์ฑ…์€ last_n์„ ์œ ์ง€ํ•˜๋˜, ํ•ด์„์€ torchvision ๋ชจ๋“ˆ ๋„ค์ด๋ฐ์— ๋งž์•„์•ผ ํ•ฉ๋‹ˆ๋‹ค.
142
+ "unfreeze": "last_n",
143
+ "has_bn": True,
144
+ },
145
+ }
146
 
 
147
 
148
 
149
  class BackboneMLPHeadConfig(PretrainedConfig):
models/microsoft__resnet-50/ds_model.py CHANGED
@@ -3,7 +3,7 @@
3
 
4
  # src/ds_model.py
5
 
6
- from typing import Optional, List
7
 
8
  import torch
9
  import torch.nn as nn
@@ -17,9 +17,60 @@ from transformers.modeling_outputs import ImageClassifierOutput
17
  # --- torchvision ---
18
  from torchvision import models as tv_models
19
 
20
- from ds_cfg import BackboneMLPHeadConfig
21
- from ds_meta import BACKBONE_META
22
- from mlp_head import MLPHead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # ============================================================
25
  # (3) Model: backbone + MLP head
@@ -67,7 +118,9 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
67
 
68
  # Meta is a single source of truth for extraction and fine-tuning rules.
69
  # meta๋Š” feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€์ž…๋‹ˆ๋‹ค.
70
- self._meta = BACKBONE_META[config.backbone_name_or_path]
 
 
71
 
72
  # Backbone skeleton is always created without pretrained weights.
73
  # backbone skeleton์€ ํ•ญ์ƒ pretrained weight ์—†์ด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
@@ -105,7 +158,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
105
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
106
  # Meta decides which loader path to use.
107
  # meta๊ฐ€ ์–ด๋–ค ๋กœ๋” ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
108
- meta = BACKBONE_META[backbone_id]
 
 
 
109
  t = meta["type"]
110
 
111
  if t == "timm_densenet":
@@ -169,7 +225,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
169
  from_pretrained() ์ดํ›„ ํ˜ธ์ถœํ•˜๋ฉด ์ฒดํฌํฌ์ธํŠธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฎ์–ด์“ฐ๋ฏ€๋กœ ์ ˆ๋Œ€ ํ˜ธ์ถœํ•˜๋ฉด ์•ˆ ๋ฉ๋‹ˆ๋‹ค.
170
  """
171
  bb = self.config.backbone_name_or_path
172
- meta = BACKBONE_META[bb]
173
  t = meta["type"]
174
 
175
  if t == "timm_densenet":
@@ -394,7 +450,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
394
  _set_requires_grad(model.backbone, False)
395
  _set_requires_grad(model.classifier, True)
396
 
397
- meta = BACKBONE_META[model.config.backbone_name_or_path]
398
  if freeze_bn and meta.get("has_bn", False):
399
  set_bn_eval(model.backbone)
400
 
@@ -403,7 +459,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
403
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
404
  # stage2: train ๋ชจ๋“œ๋กœ ๋‘๋˜ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด BN์„ eval๋กœ ์œ ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
405
  model.train()
406
- meta = BACKBONE_META[model.config.backbone_name_or_path]
407
  if keep_bn_eval and meta.get("has_bn", False):
408
  set_bn_eval(model.backbone)
409
 
@@ -431,7 +487,7 @@ def unfreeze_last_stage(
431
  if n <= 0:
432
  return
433
 
434
- meta = BACKBONE_META[model.config.backbone_name_or_path]
435
  if meta.get("unfreeze") != "last_n":
436
  raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
437
 
 
3
 
4
  # src/ds_model.py
5
 
6
+ from typing import Optional, List, Any, Dict
7
 
8
  import torch
9
  import torch.nn as nn
 
17
  # --- torchvision ---
18
  from torchvision import models as tv_models
19
 
20
+ from ds_cfg import BackboneMLPHeadConfig, BACKBONE_META
21
+ # from mlp_head import MLPHead
22
+
23
+ class MLPHead(nn.Module):
24
+ """
25
+ ๊ฐ„๋‹จํ•œ 2-layer MLP head.
26
+
27
+ Parameters
28
+ ----------
29
+ in_dim : int
30
+ backbone feature dim
31
+ num_labels : int
32
+ class count
33
+ bottleneck : int
34
+ hidden dim
35
+ p : float
36
+ dropout prob
37
+ """
38
+ def __init__(self, in_dim: int, num_labels: int, bottleneck: int = 256, p: float = 0.2):
39
+ super().__init__()
40
+ self.fc1 = nn.Linear(in_dim, bottleneck)
41
+ self.act = nn.GELU()
42
+ self.drop = nn.Dropout(p)
43
+ self.fc2 = nn.Linear(bottleneck, num_labels)
44
+
45
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
46
+ return self.fc2(self.drop(self.act(self.fc1(x))))
47
+
48
+ # ------------------------------------------------------------
49
+ # backbone_meta resolver
50
+ # ------------------------------------------------------------
51
+ def _resolve_backbone_meta(config: BackboneMLPHeadConfig, fallback_table: Dict[str, Dict[str, Any]] | None = None) -> Dict[str, Any]:
52
+ """
53
+ Resolve runtime backbone meta.
54
+
55
+ Priority:
56
+ 1) config.backbone_meta (preferred; required for Hub runtime determinism)
57
+ 2) fallback_table[config.backbone_name_or_path] (backward compatibility for local/dev)
58
+
59
+ Returns a dict with at least: type, feat_rule, feat_dim (and optional has_bn/unfreeze).
60
+ """
61
+ meta = getattr(config, "backbone_meta", None)
62
+ if isinstance(meta, dict) and len(meta) > 0:
63
+ return meta
64
+
65
+ bb = getattr(config, "backbone_name_or_path", None)
66
+ if fallback_table is not None and bb in fallback_table:
67
+ return fallback_table[bb]
68
+
69
+ raise ValueError(
70
+ "config.backbone_meta is missing/empty and no fallback meta is available. "
71
+ "Populate config.backbone_meta when saving to the Hub (single source of truth)."
72
+ )
73
+
74
 
75
  # ============================================================
76
  # (3) Model: backbone + MLP head
 
118
 
119
  # Meta is a single source of truth for extraction and fine-tuning rules.
120
  # meta๋Š” feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€์ž…๋‹ˆ๋‹ค.
121
+ # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
122
+ # Prefer config.backbone_meta to keep Hub runtime self-contained.
123
+ self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
124
 
125
  # Backbone skeleton is always created without pretrained weights.
126
  # backbone skeleton์€ ํ•ญ์ƒ pretrained weight ์—†์ด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
 
158
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
159
  # Meta decides which loader path to use.
160
  # meta๊ฐ€ ์–ด๋–ค ๋กœ๋” ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
161
+ meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
162
+ if meta is None:
163
+ raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
164
+
165
  t = meta["type"]
166
 
167
  if t == "timm_densenet":
 
225
  from_pretrained() ์ดํ›„ ํ˜ธ์ถœํ•˜๋ฉด ์ฒดํฌํฌ์ธํŠธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฎ์–ด์“ฐ๋ฏ€๋กœ ์ ˆ๋Œ€ ํ˜ธ์ถœํ•˜๋ฉด ์•ˆ ๋ฉ๋‹ˆ๋‹ค.
226
  """
227
  bb = self.config.backbone_name_or_path
228
+ meta = self._meta
229
  t = meta["type"]
230
 
231
  if t == "timm_densenet":
 
450
  _set_requires_grad(model.backbone, False)
451
  _set_requires_grad(model.classifier, True)
452
 
453
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
454
  if freeze_bn and meta.get("has_bn", False):
455
  set_bn_eval(model.backbone)
456
 
 
459
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
460
  # stage2: train ๋ชจ๋“œ๋กœ ๋‘๋˜ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด BN์„ eval๋กœ ์œ ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
461
  model.train()
462
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
463
  if keep_bn_eval and meta.get("has_bn", False):
464
  set_bn_eval(model.backbone)
465
 
 
487
  if n <= 0:
488
  return
489
 
490
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
491
  if meta.get("unfreeze") != "last_n":
492
  raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
493
 
models/microsoft__resnet-50/ds_proc.py CHANGED
@@ -16,7 +16,7 @@ from transformers import AutoImageProcessor, AutoConfig
16
  from transformers.image_processing_base import ImageProcessingMixin
17
  from transformers.utils.generic import TensorType
18
 
19
- from ds_meta import BackboneID, BACKBONE_META
20
 
21
 
22
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
 
16
  from transformers.image_processing_base import ImageProcessingMixin
17
  from transformers.utils.generic import TensorType
18
 
19
+ from ds_cfg import BackboneID, BACKBONE_META
20
 
21
 
22
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
models/microsoft__resnet-50/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:672f7ce423ab8366b6988ab17a5946d3a9191438e7b2831ee0ac03e9ee9b28f0
3
  size 96388660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32610cc9f181dd7de16c79732221106bf7431ae82693471b9410446efe103482
3
  size 96388660
models/microsoft__swin-tiny-patch4-window7-224/config.json CHANGED
@@ -24,7 +24,7 @@
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
- "created_at": "20260210_142559",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/microsoft__swin-tiny-patch4-window7-224",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
@@ -40,15 +40,13 @@
40
  "root_code_files": [
41
  "ds_proc.py",
42
  "ds_model.py",
43
- "ds_cfg.py",
44
- "ds_meta.py"
45
  ],
46
  "subfolder_code_included": true,
47
  "subfolder_code_files": [
48
  "ds_proc.py",
49
  "ds_model.py",
50
- "ds_cfg.py",
51
- "ds_meta.py"
52
  ],
53
  "processor_use_fast": true
54
  }
 
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
+ "created_at": "20260210_163348",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/microsoft__swin-tiny-patch4-window7-224",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
 
40
  "root_code_files": [
41
  "ds_proc.py",
42
  "ds_model.py",
43
+ "ds_cfg.py"
 
44
  ],
45
  "subfolder_code_included": true,
46
  "subfolder_code_files": [
47
  "ds_proc.py",
48
  "ds_model.py",
49
+ "ds_cfg.py"
 
50
  ],
51
  "processor_use_fast": true
52
  }
models/microsoft__swin-tiny-patch4-window7-224/ds_cfg.py CHANGED
@@ -1,6 +1,149 @@
1
  from transformers import PretrainedConfig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- from ds_meta import BackboneID, BACKBONE_META
4
 
5
 
6
  class BackboneMLPHeadConfig(PretrainedConfig):
 
1
  from transformers import PretrainedConfig
2
+ from typing import Literal, Any
3
+
4
+ # ============================================================
5
+ # Backbone whitelist + meta registry
6
+ # ============================================================
7
+
8
+ BackboneID = Literal[
9
+ "google/vit-base-patch16-224",
10
+ "microsoft/swin-tiny-patch4-window7-224",
11
+ "microsoft/resnet-50",
12
+ "google/efficientnet-b0",
13
+ "timm/densenet121.tv_in1k",
14
+ "torchvision/densenet121",
15
+ ]
16
+
17
+ # ============================================================
18
+ # 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
19
+ # 2) ๋ฐฑ๋ณธ ๋ฉ”ํƒ€ ๋ ˆ์ง€์ŠคํŠธ๋ฆฌ (feature dim/rule/unfreeze rule ๊ณ ์ •)
20
+ # ============================================================
21
+ # This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
22
+ # ์ด ํ…Œ์ด๋ธ”์€ backbone๋ณ„ feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€(source of truth)์ž…๋‹ˆ๋‹ค.
23
+ #
24
+ # The key type is BackboneID to ensure meta keys never drift from the whitelist.
25
+ # ํ‚ค ํƒ€์ž…์„ BackboneID๋กœ ๊ณ ์ •ํ•˜์—ฌ ๋ฉ”ํƒ€ ํ‚ค๊ฐ€ ํ™”์ดํŠธ๋ฆฌ์ŠคํŠธ์™€ ์–ด๊ธ‹๋‚˜์ง€ ์•Š๊ฒŒ ํ•ฉ๋‹ˆ๋‹ค.
26
+ BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
27
+ # -------------------------
28
+ # Transformers (ViT/Swin)
29
+ # -------------------------
30
+ # These backbones come from transformers and typically output hidden states and/or pooler outputs.
31
+ # ์ด ๋ฐฑ๋ณธ๋“ค์€ transformers ๊ณ„์—ด์ด๋ฉฐ hidden states์™€ pooler ์ถœ๋ ฅ ๋“ฑ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
32
+
33
+ "google/vit-base-patch16-224": {
34
+ # type indicates which loading/forward/extraction pathway the model code should use.
35
+ # type์€ ๋ชจ๋ธ ์ฝ”๋“œ๊ฐ€ ์–ด๋–ค ๋กœ๋”ฉ/forward/feature ์ถ”์ถœ ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
36
+ "type": "vit",
37
+
38
+ # feat_dim is the feature vector dimension consumed by the MLP head.
39
+ # feat_dim์€ MLP head๊ฐ€ ์ž…๋ ฅ์œผ๋กœ ๋ฐ›๋Š” feature ๋ฒกํ„ฐ ์ฐจ์›์ž…๋‹ˆ๋‹ค.
40
+ "feat_dim": 768,
41
+
42
+ # feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
43
+ # feat_rule์€ backbone ์ถœ๋ ฅ์—์„œ (B, feat_dim) ํ…์„œ๋ฅผ ์–ป๋Š” ๊ทœ์น™์„ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.
44
+ "feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
45
+ # last_hidden_state[:, 0, :]๋ฅผ CLS ํ† ํฐ ์ž„๋ฒ ๋”ฉ์œผ๋กœ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
46
+
47
+ # unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
48
+ # unfreeze๋Š” stage2 ๋ฏธ์„ธ์กฐ์ •์—์„œ ์–ด๋–ค ๋ ˆ์ด์–ด๋ฅผ ํ’€์ง€ ์ •์ฑ…์„ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.
49
+ "unfreeze": "last_n", # Unfreeze the last n encoder blocks.
50
+ # encoder ๋ธ”๋ก์˜ ๋งˆ์ง€๋ง‰ n๊ฐœ๋ฅผ unfreeze ํ•ฉ๋‹ˆ๋‹ค.
51
+
52
+ # has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
53
+ # has_bn์€ BatchNorm ์กด์žฌ ์—ฌ๋ถ€์ด๋ฉฐ freeze ์‹œ ํŠน๋ณ„ ์ทจ๊ธ‰์ด ํ•„์š”ํ•œ์ง€ ํŒ๋‹จ์— ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
54
+ "has_bn": False,
55
+ },
56
+
57
+ "microsoft/swin-tiny-patch4-window7-224": {
58
+ # This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
59
+ # ์ด ๋ฐฑ๋ณธ์€ Swin Transformer์ด๋ฉฐ ๊ตฌํ˜„์— ๋”ฐ๋ผ pooler output ์ œ๊ณต ์—ฌ๋ถ€๊ฐ€ ๋‹ฌ๋ผ์งˆ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
60
+ "type": "swin",
61
+ "feat_dim": 768,
62
+
63
+ # Prefer pooler output if available, otherwise fall back to mean pooling.
64
+ # pooler๊ฐ€ ์žˆ์œผ๋ฉด ์šฐ์„  ์‚ฌ์šฉํ•˜๊ณ , ์—†์œผ๋ฉด mean pooling์œผ๋กœ ๋Œ€์ฒดํ•ฉ๋‹ˆ๋‹ค.
65
+ "feat_rule": "pool_or_mean",
66
+
67
+ # Unfreeze strategy is aligned with transformer-style encoder blocks.
68
+ # unfreeze ์ „๋žต์€ transformer ๊ณ„์—ด encoder ๋ธ”๋ก ๊ธฐ์ค€์œผ๋กœ ๋งž์ถฅ๋‹ˆ๋‹ค.
69
+ "unfreeze": "last_n",
70
+ "has_bn": False,
71
+ },
72
+
73
+ # -------------------------
74
+ # Transformers (CNNs)
75
+ # -------------------------
76
+ # These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
77
+ # ์ด ๋ฐฑ๋ณธ๋“ค์€ transformers๋กœ ๋…ธ์ถœ๋œ CNN์ด๋ฉฐ pooled feature ๋˜๋Š” feature map์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
78
+
79
+ "microsoft/resnet-50": {
80
+ # This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
81
+ # ์ด ํ•ญ๋ชฉ์€ transformers ํ˜ธํ™˜ ResNet์ด pooler ๋˜๋Š” ์ตœ์ข… feature map์„ ์ œ๊ณตํ•  ์ˆ˜ ์žˆ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
82
+ "type": "resnet",
83
+ "feat_dim": 2048,
84
+
85
+ # Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
86
+ # pooler๊ฐ€ ์žˆ์œผ๋ฉด ์‚ฌ์šฉํ•˜๊ณ , ์—†์œผ๋ฉด global average pooling(GAP)์„ ์ ์šฉํ•ฉ๋‹ˆ๋‹ค.
87
+ "feat_rule": "pool_or_gap",
88
+
89
+ # CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
90
+ # CNN๋„ ๋ชจ๋ธ ์ฝ”๋“œ์—์„œ block/stage ๋‹จ์œ„๋กœ last_n ์ •์ฑ…์„ ์ ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
91
+ "unfreeze": "last_n",
92
+ "has_bn": True,
93
+ },
94
+
95
+ "google/efficientnet-b0": {
96
+ # This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
97
+ # ์ด ํ•ญ๋ชฉ์€ transformers ํ˜ธํ™˜ EfficientNet์ด pooled feature ๋˜๋Š” ์ตœ์ข… feature map์„ ์ œ๊ณตํ•œ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
98
+ "type": "efficientnet",
99
+ "feat_dim": 1280,
100
+ "feat_rule": "pool_or_gap",
101
+ "unfreeze": "last_n",
102
+ "has_bn": True,
103
+ },
104
+
105
+ # -------------------------
106
+ # timm (DenseNet via HF Hub)
107
+ # -------------------------
108
+ # This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
109
+ # ์ด ๋ฐฑ๋ณธ์€ ๋ชจ๋ธ ๋กœ๋”์—์„œ timm์˜ "hf_hub:" ํ”„๋ฆฌํ”ฝ์Šค๋ฅผ ์‚ฌ์šฉํ•ด ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
110
+ "timm/densenet121.tv_in1k": {
111
+ "type": "timm_densenet",
112
+
113
+ # DenseNet-121 final channel dimension is 1024 for the canonical architecture.
114
+ # DenseNet-121์˜ ํ‘œ์ค€ ์•„ํ‚คํ…์ฒ˜์—์„œ ์ตœ์ข… ์ฑ„๋„ ์ฐจ์›์€ 1024์ž…๋‹ˆ๋‹ค.
115
+ "feat_dim": 1024,
116
+
117
+ # timm forward_features typically returns a feature map that you then GAP to (B, C).
118
+ # timm์˜ forward_features๋Š” ๋ณดํ†ต feature map์„ ๋ฐ˜ํ™˜ํ•˜๊ณ  ์ดํ›„ GAP์œผ๋กœ (B, C)๋ฅผ ๋งŒ๋“ญ๋‹ˆ๋‹ค.
119
+ "feat_rule": "timm_gap",
120
+
121
+ # DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
122
+ # DenseNet์€ BatchNorm ์‚ฌ์šฉ์ด ๋งŽ์•„ stage1/stage2์—์„œ freeze_bn ์ฒ˜๋ฆฌ๊ฐ€ ์ค‘์š”ํ•ฉ๋‹ˆ๋‹ค.
123
+ "unfreeze": "last_n",
124
+ "has_bn": True,
125
+ },
126
+
127
+ # -------------------------
128
+ # torchvision (DenseNet direct)
129
+ # -------------------------
130
+ # This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
131
+ # ์ด ๋ฐฑ๋ณธ์€ transformers/timm์ด ์•„๋‹ˆ๋ผ torchvision ์Šคํƒ€์ผ ๋กœ๋”ฉ ๋ฐ feature ์ถ”์ถœ์„ ๋Œ€์ƒ์œผ๋กœ ํ•ฉ๋‹ˆ๋‹ค.
132
+ "torchvision/densenet121": {
133
+ "type": "torchvision_densenet",
134
+ "feat_dim": 1024,
135
+
136
+ # torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
137
+ # torchvision DenseNet์€ ๋ณดํ†ต .features๋ฅผ ๋…ธ์ถœํ•˜๋ฉฐ GAP์œผ๋กœ (B, C)๋ฅผ ์–ป์Šต๋‹ˆ๋‹ค.
138
+ "feat_rule": "torchvision_densenet_gap",
139
+
140
+ # Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
141
+ # unfreeze ์ •์ฑ…์€ last_n์„ ์œ ์ง€ํ•˜๋˜, ํ•ด์„์€ torchvision ๋ชจ๋“ˆ ๋„ค์ด๋ฐ์— ๋งž์•„์•ผ ํ•ฉ๋‹ˆ๋‹ค.
142
+ "unfreeze": "last_n",
143
+ "has_bn": True,
144
+ },
145
+ }
146
 
 
147
 
148
 
149
  class BackboneMLPHeadConfig(PretrainedConfig):
models/microsoft__swin-tiny-patch4-window7-224/ds_model.py CHANGED
@@ -3,7 +3,7 @@
3
 
4
  # src/ds_model.py
5
 
6
- from typing import Optional, List
7
 
8
  import torch
9
  import torch.nn as nn
@@ -17,9 +17,60 @@ from transformers.modeling_outputs import ImageClassifierOutput
17
  # --- torchvision ---
18
  from torchvision import models as tv_models
19
 
20
- from ds_cfg import BackboneMLPHeadConfig
21
- from ds_meta import BACKBONE_META
22
- from mlp_head import MLPHead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # ============================================================
25
  # (3) Model: backbone + MLP head
@@ -67,7 +118,9 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
67
 
68
  # Meta is a single source of truth for extraction and fine-tuning rules.
69
  # meta๋Š” feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€์ž…๋‹ˆ๋‹ค.
70
- self._meta = BACKBONE_META[config.backbone_name_or_path]
 
 
71
 
72
  # Backbone skeleton is always created without pretrained weights.
73
  # backbone skeleton์€ ํ•ญ์ƒ pretrained weight ์—†์ด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
@@ -105,7 +158,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
105
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
106
  # Meta decides which loader path to use.
107
  # meta๊ฐ€ ์–ด๋–ค ๋กœ๋” ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
108
- meta = BACKBONE_META[backbone_id]
 
 
 
109
  t = meta["type"]
110
 
111
  if t == "timm_densenet":
@@ -169,7 +225,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
169
  from_pretrained() ์ดํ›„ ํ˜ธ์ถœํ•˜๋ฉด ์ฒดํฌํฌ์ธํŠธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฎ์–ด์“ฐ๋ฏ€๋กœ ์ ˆ๋Œ€ ํ˜ธ์ถœํ•˜๋ฉด ์•ˆ ๋ฉ๋‹ˆ๋‹ค.
170
  """
171
  bb = self.config.backbone_name_or_path
172
- meta = BACKBONE_META[bb]
173
  t = meta["type"]
174
 
175
  if t == "timm_densenet":
@@ -394,7 +450,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
394
  _set_requires_grad(model.backbone, False)
395
  _set_requires_grad(model.classifier, True)
396
 
397
- meta = BACKBONE_META[model.config.backbone_name_or_path]
398
  if freeze_bn and meta.get("has_bn", False):
399
  set_bn_eval(model.backbone)
400
 
@@ -403,7 +459,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
403
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
404
  # stage2: train ๋ชจ๋“œ๋กœ ๋‘๋˜ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด BN์„ eval๋กœ ์œ ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
405
  model.train()
406
- meta = BACKBONE_META[model.config.backbone_name_or_path]
407
  if keep_bn_eval and meta.get("has_bn", False):
408
  set_bn_eval(model.backbone)
409
 
@@ -431,7 +487,7 @@ def unfreeze_last_stage(
431
  if n <= 0:
432
  return
433
 
434
- meta = BACKBONE_META[model.config.backbone_name_or_path]
435
  if meta.get("unfreeze") != "last_n":
436
  raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
437
 
 
3
 
4
  # src/ds_model.py
5
 
6
+ from typing import Optional, List, Any, Dict
7
 
8
  import torch
9
  import torch.nn as nn
 
17
  # --- torchvision ---
18
  from torchvision import models as tv_models
19
 
20
+ from ds_cfg import BackboneMLPHeadConfig, BACKBONE_META
21
+ # from mlp_head import MLPHead
22
+
23
+ class MLPHead(nn.Module):
24
+ """
25
+ ๊ฐ„๋‹จํ•œ 2-layer MLP head.
26
+
27
+ Parameters
28
+ ----------
29
+ in_dim : int
30
+ backbone feature dim
31
+ num_labels : int
32
+ class count
33
+ bottleneck : int
34
+ hidden dim
35
+ p : float
36
+ dropout prob
37
+ """
38
+ def __init__(self, in_dim: int, num_labels: int, bottleneck: int = 256, p: float = 0.2):
39
+ super().__init__()
40
+ self.fc1 = nn.Linear(in_dim, bottleneck)
41
+ self.act = nn.GELU()
42
+ self.drop = nn.Dropout(p)
43
+ self.fc2 = nn.Linear(bottleneck, num_labels)
44
+
45
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
46
+ return self.fc2(self.drop(self.act(self.fc1(x))))
47
+
48
+ # ------------------------------------------------------------
49
+ # backbone_meta resolver
50
+ # ------------------------------------------------------------
51
+ def _resolve_backbone_meta(config: BackboneMLPHeadConfig, fallback_table: Dict[str, Dict[str, Any]] | None = None) -> Dict[str, Any]:
52
+ """
53
+ Resolve runtime backbone meta.
54
+
55
+ Priority:
56
+ 1) config.backbone_meta (preferred; required for Hub runtime determinism)
57
+ 2) fallback_table[config.backbone_name_or_path] (backward compatibility for local/dev)
58
+
59
+ Returns a dict with at least: type, feat_rule, feat_dim (and optional has_bn/unfreeze).
60
+ """
61
+ meta = getattr(config, "backbone_meta", None)
62
+ if isinstance(meta, dict) and len(meta) > 0:
63
+ return meta
64
+
65
+ bb = getattr(config, "backbone_name_or_path", None)
66
+ if fallback_table is not None and bb in fallback_table:
67
+ return fallback_table[bb]
68
+
69
+ raise ValueError(
70
+ "config.backbone_meta is missing/empty and no fallback meta is available. "
71
+ "Populate config.backbone_meta when saving to the Hub (single source of truth)."
72
+ )
73
+
74
 
75
  # ============================================================
76
  # (3) Model: backbone + MLP head
 
118
 
119
  # Meta is a single source of truth for extraction and fine-tuning rules.
120
  # meta๋Š” feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€์ž…๋‹ˆ๋‹ค.
121
+ # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
122
+ # Prefer config.backbone_meta to keep Hub runtime self-contained.
123
+ self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
124
 
125
  # Backbone skeleton is always created without pretrained weights.
126
  # backbone skeleton์€ ํ•ญ์ƒ pretrained weight ์—†์ด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
 
158
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
159
  # Meta decides which loader path to use.
160
  # meta๊ฐ€ ์–ด๋–ค ๋กœ๋” ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
161
+ meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
162
+ if meta is None:
163
+ raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
164
+
165
  t = meta["type"]
166
 
167
  if t == "timm_densenet":
 
225
  from_pretrained() ์ดํ›„ ํ˜ธ์ถœํ•˜๋ฉด ์ฒดํฌํฌ์ธํŠธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฎ์–ด์“ฐ๋ฏ€๋กœ ์ ˆ๋Œ€ ํ˜ธ์ถœํ•˜๋ฉด ์•ˆ ๋ฉ๋‹ˆ๋‹ค.
226
  """
227
  bb = self.config.backbone_name_or_path
228
+ meta = self._meta
229
  t = meta["type"]
230
 
231
  if t == "timm_densenet":
 
450
  _set_requires_grad(model.backbone, False)
451
  _set_requires_grad(model.classifier, True)
452
 
453
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
454
  if freeze_bn and meta.get("has_bn", False):
455
  set_bn_eval(model.backbone)
456
 
 
459
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
460
  # stage2: train ๋ชจ๋“œ๋กœ ๋‘๋˜ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด BN์„ eval๋กœ ์œ ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
461
  model.train()
462
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
463
  if keep_bn_eval and meta.get("has_bn", False):
464
  set_bn_eval(model.backbone)
465
 
 
487
  if n <= 0:
488
  return
489
 
490
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
491
  if meta.get("unfreeze") != "last_n":
492
  raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
493
 
models/microsoft__swin-tiny-patch4-window7-224/ds_proc.py CHANGED
@@ -16,7 +16,7 @@ from transformers import AutoImageProcessor, AutoConfig
16
  from transformers.image_processing_base import ImageProcessingMixin
17
  from transformers.utils.generic import TensorType
18
 
19
- from ds_meta import BackboneID, BACKBONE_META
20
 
21
 
22
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
 
16
  from transformers.image_processing_base import ImageProcessingMixin
17
  from transformers.utils.generic import TensorType
18
 
19
+ from ds_cfg import BackboneID, BACKBONE_META
20
 
21
 
22
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
models/microsoft__swin-tiny-patch4-window7-224/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6a66b3ed3993cd9e93967288f9019ef2c355abdeacb5db056bd9cc5192b2624
3
  size 111128348
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29c4e394ff3e53d723a82b9911acfd072ca4259b7ac667e87eadee77b645ee84
3
  size 111128348
models/timm__densenet121.tv_in1k/config.json CHANGED
@@ -24,7 +24,7 @@
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
- "created_at": "20260210_142559",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/timm__densenet121.tv_in1k",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
@@ -40,15 +40,13 @@
40
  "root_code_files": [
41
  "ds_proc.py",
42
  "ds_model.py",
43
- "ds_cfg.py",
44
- "ds_meta.py"
45
  ],
46
  "subfolder_code_included": true,
47
  "subfolder_code_files": [
48
  "ds_proc.py",
49
  "ds_model.py",
50
- "ds_cfg.py",
51
- "ds_meta.py"
52
  ],
53
  "processor_use_fast": false
54
  }
 
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
+ "created_at": "20260210_163348",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/timm__densenet121.tv_in1k",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
 
40
  "root_code_files": [
41
  "ds_proc.py",
42
  "ds_model.py",
43
+ "ds_cfg.py"
 
44
  ],
45
  "subfolder_code_included": true,
46
  "subfolder_code_files": [
47
  "ds_proc.py",
48
  "ds_model.py",
49
+ "ds_cfg.py"
 
50
  ],
51
  "processor_use_fast": false
52
  }
models/timm__densenet121.tv_in1k/ds_cfg.py CHANGED
@@ -1,6 +1,149 @@
1
  from transformers import PretrainedConfig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- from ds_meta import BackboneID, BACKBONE_META
4
 
5
 
6
  class BackboneMLPHeadConfig(PretrainedConfig):
 
1
  from transformers import PretrainedConfig
2
+ from typing import Literal, Any
3
+
4
+ # ============================================================
5
+ # Backbone whitelist + meta registry
6
+ # ============================================================
7
+
8
+ BackboneID = Literal[
9
+ "google/vit-base-patch16-224",
10
+ "microsoft/swin-tiny-patch4-window7-224",
11
+ "microsoft/resnet-50",
12
+ "google/efficientnet-b0",
13
+ "timm/densenet121.tv_in1k",
14
+ "torchvision/densenet121",
15
+ ]
16
+
17
+ # ============================================================
18
+ # 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
19
+ # 2) ๋ฐฑ๋ณธ ๋ฉ”ํƒ€ ๋ ˆ์ง€์ŠคํŠธ๋ฆฌ (feature dim/rule/unfreeze rule ๊ณ ์ •)
20
+ # ============================================================
21
+ # This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
22
+ # ์ด ํ…Œ์ด๋ธ”์€ backbone๋ณ„ feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€(source of truth)์ž…๋‹ˆ๋‹ค.
23
+ #
24
+ # The key type is BackboneID to ensure meta keys never drift from the whitelist.
25
+ # ํ‚ค ํƒ€์ž…์„ BackboneID๋กœ ๊ณ ์ •ํ•˜์—ฌ ๋ฉ”ํƒ€ ํ‚ค๊ฐ€ ํ™”์ดํŠธ๋ฆฌ์ŠคํŠธ์™€ ์–ด๊ธ‹๋‚˜์ง€ ์•Š๊ฒŒ ํ•ฉ๋‹ˆ๋‹ค.
26
+ BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
27
+ # -------------------------
28
+ # Transformers (ViT/Swin)
29
+ # -------------------------
30
+ # These backbones come from transformers and typically output hidden states and/or pooler outputs.
31
+ # ์ด ๋ฐฑ๋ณธ๋“ค์€ transformers ๊ณ„์—ด์ด๋ฉฐ hidden states์™€ pooler ์ถœ๋ ฅ ๋“ฑ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
32
+
33
+ "google/vit-base-patch16-224": {
34
+ # type indicates which loading/forward/extraction pathway the model code should use.
35
+ # type์€ ๋ชจ๋ธ ์ฝ”๋“œ๊ฐ€ ์–ด๋–ค ๋กœ๋”ฉ/forward/feature ์ถ”์ถœ ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
36
+ "type": "vit",
37
+
38
+ # feat_dim is the feature vector dimension consumed by the MLP head.
39
+ # feat_dim์€ MLP head๊ฐ€ ์ž…๋ ฅ์œผ๋กœ ๋ฐ›๋Š” feature ๋ฒกํ„ฐ ์ฐจ์›์ž…๋‹ˆ๋‹ค.
40
+ "feat_dim": 768,
41
+
42
+ # feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
43
+ # feat_rule์€ backbone ์ถœ๋ ฅ์—์„œ (B, feat_dim) ํ…์„œ๋ฅผ ์–ป๋Š” ๊ทœ์น™์„ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.
44
+ "feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
45
+ # last_hidden_state[:, 0, :]๋ฅผ CLS ํ† ํฐ ์ž„๋ฒ ๋”ฉ์œผ๋กœ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
46
+
47
+ # unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
48
+ # unfreeze๋Š” stage2 ๋ฏธ์„ธ์กฐ์ •์—์„œ ์–ด๋–ค ๋ ˆ์ด์–ด๋ฅผ ํ’€์ง€ ์ •์ฑ…์„ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.
49
+ "unfreeze": "last_n", # Unfreeze the last n encoder blocks.
50
+ # encoder ๋ธ”๋ก์˜ ๋งˆ์ง€๋ง‰ n๊ฐœ๋ฅผ unfreeze ํ•ฉ๋‹ˆ๋‹ค.
51
+
52
+ # has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
53
+ # has_bn์€ BatchNorm ์กด์žฌ ์—ฌ๋ถ€์ด๋ฉฐ freeze ์‹œ ํŠน๋ณ„ ์ทจ๊ธ‰์ด ํ•„์š”ํ•œ์ง€ ํŒ๋‹จ์— ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
54
+ "has_bn": False,
55
+ },
56
+
57
+ "microsoft/swin-tiny-patch4-window7-224": {
58
+ # This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
59
+ # ์ด ๋ฐฑ๋ณธ์€ Swin Transformer์ด๋ฉฐ ๊ตฌํ˜„์— ๋”ฐ๋ผ pooler output ์ œ๊ณต ์—ฌ๋ถ€๊ฐ€ ๋‹ฌ๋ผ์งˆ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
60
+ "type": "swin",
61
+ "feat_dim": 768,
62
+
63
+ # Prefer pooler output if available, otherwise fall back to mean pooling.
64
+ # pooler๊ฐ€ ์žˆ์œผ๋ฉด ์šฐ์„  ์‚ฌ์šฉํ•˜๊ณ , ์—†์œผ๋ฉด mean pooling์œผ๋กœ ๋Œ€์ฒดํ•ฉ๋‹ˆ๋‹ค.
65
+ "feat_rule": "pool_or_mean",
66
+
67
+ # Unfreeze strategy is aligned with transformer-style encoder blocks.
68
+ # unfreeze ์ „๋žต์€ transformer ๊ณ„์—ด encoder ๋ธ”๋ก ๊ธฐ์ค€์œผ๋กœ ๋งž์ถฅ๋‹ˆ๋‹ค.
69
+ "unfreeze": "last_n",
70
+ "has_bn": False,
71
+ },
72
+
73
+ # -------------------------
74
+ # Transformers (CNNs)
75
+ # -------------------------
76
+ # These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
77
+ # ์ด ๋ฐฑ๋ณธ๋“ค์€ transformers๋กœ ๋…ธ์ถœ๋œ CNN์ด๋ฉฐ pooled feature ๋˜๋Š” feature map์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
78
+
79
+ "microsoft/resnet-50": {
80
+ # This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
81
+ # ์ด ํ•ญ๋ชฉ์€ transformers ํ˜ธํ™˜ ResNet์ด pooler ๋˜๋Š” ์ตœ์ข… feature map์„ ์ œ๊ณตํ•  ์ˆ˜ ์žˆ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
82
+ "type": "resnet",
83
+ "feat_dim": 2048,
84
+
85
+ # Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
86
+ # pooler๊ฐ€ ์žˆ์œผ๋ฉด ์‚ฌ์šฉํ•˜๊ณ , ์—†์œผ๋ฉด global average pooling(GAP)์„ ์ ์šฉํ•ฉ๋‹ˆ๋‹ค.
87
+ "feat_rule": "pool_or_gap",
88
+
89
+ # CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
90
+ # CNN๋„ ๋ชจ๋ธ ์ฝ”๋“œ์—์„œ block/stage ๋‹จ์œ„๋กœ last_n ์ •์ฑ…์„ ์ ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
91
+ "unfreeze": "last_n",
92
+ "has_bn": True,
93
+ },
94
+
95
+ "google/efficientnet-b0": {
96
+ # This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
97
+ # ์ด ํ•ญ๋ชฉ์€ transformers ํ˜ธํ™˜ EfficientNet์ด pooled feature ๋˜๋Š” ์ตœ์ข… feature map์„ ์ œ๊ณตํ•œ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
98
+ "type": "efficientnet",
99
+ "feat_dim": 1280,
100
+ "feat_rule": "pool_or_gap",
101
+ "unfreeze": "last_n",
102
+ "has_bn": True,
103
+ },
104
+
105
+ # -------------------------
106
+ # timm (DenseNet via HF Hub)
107
+ # -------------------------
108
+ # This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
109
+ # ์ด ๋ฐฑ๋ณธ์€ ๋ชจ๋ธ ๋กœ๋”์—์„œ timm์˜ "hf_hub:" ํ”„๋ฆฌํ”ฝ์Šค๋ฅผ ์‚ฌ์šฉํ•ด ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
110
+ "timm/densenet121.tv_in1k": {
111
+ "type": "timm_densenet",
112
+
113
+ # DenseNet-121 final channel dimension is 1024 for the canonical architecture.
114
+ # DenseNet-121์˜ ํ‘œ์ค€ ์•„ํ‚คํ…์ฒ˜์—์„œ ์ตœ์ข… ์ฑ„๋„ ์ฐจ์›์€ 1024์ž…๋‹ˆ๋‹ค.
115
+ "feat_dim": 1024,
116
+
117
+ # timm forward_features typically returns a feature map that you then GAP to (B, C).
118
+ # timm์˜ forward_features๋Š” ๋ณดํ†ต feature map์„ ๋ฐ˜ํ™˜ํ•˜๊ณ  ์ดํ›„ GAP์œผ๋กœ (B, C)๋ฅผ ๋งŒ๋“ญ๋‹ˆ๋‹ค.
119
+ "feat_rule": "timm_gap",
120
+
121
+ # DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
122
+ # DenseNet์€ BatchNorm ์‚ฌ์šฉ์ด ๋งŽ์•„ stage1/stage2์—์„œ freeze_bn ์ฒ˜๋ฆฌ๊ฐ€ ์ค‘์š”ํ•ฉ๋‹ˆ๋‹ค.
123
+ "unfreeze": "last_n",
124
+ "has_bn": True,
125
+ },
126
+
127
+ # -------------------------
128
+ # torchvision (DenseNet direct)
129
+ # -------------------------
130
+ # This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
131
+ # ์ด ๋ฐฑ๋ณธ์€ transformers/timm์ด ์•„๋‹ˆ๋ผ torchvision ์Šคํƒ€์ผ ๋กœ๋”ฉ ๋ฐ feature ์ถ”์ถœ์„ ๋Œ€์ƒ์œผ๋กœ ํ•ฉ๋‹ˆ๋‹ค.
132
+ "torchvision/densenet121": {
133
+ "type": "torchvision_densenet",
134
+ "feat_dim": 1024,
135
+
136
+ # torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
137
+ # torchvision DenseNet์€ ๋ณดํ†ต .features๋ฅผ ๋…ธ์ถœํ•˜๋ฉฐ GAP์œผ๋กœ (B, C)๋ฅผ ์–ป์Šต๋‹ˆ๋‹ค.
138
+ "feat_rule": "torchvision_densenet_gap",
139
+
140
+ # Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
141
+ # unfreeze ์ •์ฑ…์€ last_n์„ ์œ ์ง€ํ•˜๋˜, ํ•ด์„์€ torchvision ๋ชจ๋“ˆ ๋„ค์ด๋ฐ์— ๋งž์•„์•ผ ํ•ฉ๋‹ˆ๋‹ค.
142
+ "unfreeze": "last_n",
143
+ "has_bn": True,
144
+ },
145
+ }
146
 
 
147
 
148
 
149
  class BackboneMLPHeadConfig(PretrainedConfig):
models/timm__densenet121.tv_in1k/ds_model.py CHANGED
@@ -3,7 +3,7 @@
3
 
4
  # src/ds_model.py
5
 
6
- from typing import Optional, List
7
 
8
  import torch
9
  import torch.nn as nn
@@ -17,9 +17,60 @@ from transformers.modeling_outputs import ImageClassifierOutput
17
  # --- torchvision ---
18
  from torchvision import models as tv_models
19
 
20
- from ds_cfg import BackboneMLPHeadConfig
21
- from ds_meta import BACKBONE_META
22
- from mlp_head import MLPHead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # ============================================================
25
  # (3) Model: backbone + MLP head
@@ -67,7 +118,9 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
67
 
68
  # Meta is a single source of truth for extraction and fine-tuning rules.
69
  # meta๋Š” feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€์ž…๋‹ˆ๋‹ค.
70
- self._meta = BACKBONE_META[config.backbone_name_or_path]
 
 
71
 
72
  # Backbone skeleton is always created without pretrained weights.
73
  # backbone skeleton์€ ํ•ญ์ƒ pretrained weight ์—†์ด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
@@ -105,7 +158,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
105
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
106
  # Meta decides which loader path to use.
107
  # meta๊ฐ€ ์–ด๋–ค ๋กœ๋” ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
108
- meta = BACKBONE_META[backbone_id]
 
 
 
109
  t = meta["type"]
110
 
111
  if t == "timm_densenet":
@@ -169,7 +225,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
169
  from_pretrained() ์ดํ›„ ํ˜ธ์ถœํ•˜๋ฉด ์ฒดํฌํฌ์ธํŠธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฎ์–ด์“ฐ๋ฏ€๋กœ ์ ˆ๋Œ€ ํ˜ธ์ถœํ•˜๋ฉด ์•ˆ ๋ฉ๋‹ˆ๋‹ค.
170
  """
171
  bb = self.config.backbone_name_or_path
172
- meta = BACKBONE_META[bb]
173
  t = meta["type"]
174
 
175
  if t == "timm_densenet":
@@ -394,7 +450,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
394
  _set_requires_grad(model.backbone, False)
395
  _set_requires_grad(model.classifier, True)
396
 
397
- meta = BACKBONE_META[model.config.backbone_name_or_path]
398
  if freeze_bn and meta.get("has_bn", False):
399
  set_bn_eval(model.backbone)
400
 
@@ -403,7 +459,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
403
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
404
  # stage2: train ๋ชจ๋“œ๋กœ ๋‘๋˜ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด BN์„ eval๋กœ ์œ ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
405
  model.train()
406
- meta = BACKBONE_META[model.config.backbone_name_or_path]
407
  if keep_bn_eval and meta.get("has_bn", False):
408
  set_bn_eval(model.backbone)
409
 
@@ -431,7 +487,7 @@ def unfreeze_last_stage(
431
  if n <= 0:
432
  return
433
 
434
- meta = BACKBONE_META[model.config.backbone_name_or_path]
435
  if meta.get("unfreeze") != "last_n":
436
  raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
437
 
 
3
 
4
  # src/ds_model.py
5
 
6
+ from typing import Optional, List, Any, Dict
7
 
8
  import torch
9
  import torch.nn as nn
 
17
  # --- torchvision ---
18
  from torchvision import models as tv_models
19
 
20
+ from ds_cfg import BackboneMLPHeadConfig, BACKBONE_META
21
+ # from mlp_head import MLPHead
22
+
23
+ class MLPHead(nn.Module):
24
+ """
25
+ ๊ฐ„๋‹จํ•œ 2-layer MLP head.
26
+
27
+ Parameters
28
+ ----------
29
+ in_dim : int
30
+ backbone feature dim
31
+ num_labels : int
32
+ class count
33
+ bottleneck : int
34
+ hidden dim
35
+ p : float
36
+ dropout prob
37
+ """
38
+ def __init__(self, in_dim: int, num_labels: int, bottleneck: int = 256, p: float = 0.2):
39
+ super().__init__()
40
+ self.fc1 = nn.Linear(in_dim, bottleneck)
41
+ self.act = nn.GELU()
42
+ self.drop = nn.Dropout(p)
43
+ self.fc2 = nn.Linear(bottleneck, num_labels)
44
+
45
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
46
+ return self.fc2(self.drop(self.act(self.fc1(x))))
47
+
48
+ # ------------------------------------------------------------
49
+ # backbone_meta resolver
50
+ # ------------------------------------------------------------
51
+ def _resolve_backbone_meta(config: BackboneMLPHeadConfig, fallback_table: Dict[str, Dict[str, Any]] | None = None) -> Dict[str, Any]:
52
+ """
53
+ Resolve runtime backbone meta.
54
+
55
+ Priority:
56
+ 1) config.backbone_meta (preferred; required for Hub runtime determinism)
57
+ 2) fallback_table[config.backbone_name_or_path] (backward compatibility for local/dev)
58
+
59
+ Returns a dict with at least: type, feat_rule, feat_dim (and optional has_bn/unfreeze).
60
+ """
61
+ meta = getattr(config, "backbone_meta", None)
62
+ if isinstance(meta, dict) and len(meta) > 0:
63
+ return meta
64
+
65
+ bb = getattr(config, "backbone_name_or_path", None)
66
+ if fallback_table is not None and bb in fallback_table:
67
+ return fallback_table[bb]
68
+
69
+ raise ValueError(
70
+ "config.backbone_meta is missing/empty and no fallback meta is available. "
71
+ "Populate config.backbone_meta when saving to the Hub (single source of truth)."
72
+ )
73
+
74
 
75
  # ============================================================
76
  # (3) Model: backbone + MLP head
 
118
 
119
  # Meta is a single source of truth for extraction and fine-tuning rules.
120
  # meta๋Š” feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€์ž…๋‹ˆ๋‹ค.
121
+ # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
122
+ # Prefer config.backbone_meta to keep Hub runtime self-contained.
123
+ self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
124
 
125
  # Backbone skeleton is always created without pretrained weights.
126
  # backbone skeleton์€ ํ•ญ์ƒ pretrained weight ์—†์ด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
 
158
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
159
  # Meta decides which loader path to use.
160
  # meta๊ฐ€ ์–ด๋–ค ๋กœ๋” ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
161
+ meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
162
+ if meta is None:
163
+ raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
164
+
165
  t = meta["type"]
166
 
167
  if t == "timm_densenet":
 
225
  from_pretrained() ์ดํ›„ ํ˜ธ์ถœํ•˜๋ฉด ์ฒดํฌํฌ์ธํŠธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฎ์–ด์“ฐ๋ฏ€๋กœ ์ ˆ๋Œ€ ํ˜ธ์ถœํ•˜๋ฉด ์•ˆ ๋ฉ๋‹ˆ๋‹ค.
226
  """
227
  bb = self.config.backbone_name_or_path
228
+ meta = self._meta
229
  t = meta["type"]
230
 
231
  if t == "timm_densenet":
 
450
  _set_requires_grad(model.backbone, False)
451
  _set_requires_grad(model.classifier, True)
452
 
453
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
454
  if freeze_bn and meta.get("has_bn", False):
455
  set_bn_eval(model.backbone)
456
 
 
459
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
460
  # stage2: train ๋ชจ๋“œ๋กœ ๋‘๋˜ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด BN์„ eval๋กœ ์œ ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
461
  model.train()
462
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
463
  if keep_bn_eval and meta.get("has_bn", False):
464
  set_bn_eval(model.backbone)
465
 
 
487
  if n <= 0:
488
  return
489
 
490
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
491
  if meta.get("unfreeze") != "last_n":
492
  raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
493
 
models/timm__densenet121.tv_in1k/ds_proc.py CHANGED
@@ -16,7 +16,7 @@ from transformers import AutoImageProcessor, AutoConfig
16
  from transformers.image_processing_base import ImageProcessingMixin
17
  from transformers.utils.generic import TensorType
18
 
19
- from ds_meta import BackboneID, BACKBONE_META
20
 
21
 
22
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
 
16
  from transformers.image_processing_base import ImageProcessingMixin
17
  from transformers.utils.generic import TensorType
18
 
19
+ from ds_cfg import BackboneID, BACKBONE_META
20
 
21
 
22
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
models/timm__densenet121.tv_in1k/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eda54a9ead50e30b80b8b0e60e9024149fd0cdeada25ea7023aa27333235090f
3
  size 29293620
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebc8977157008d63a00e318686b5b2fa763f2ece83748f3701671ee629ab70c9
3
  size 29293620
models/torchvision__densenet121/config.json CHANGED
@@ -24,7 +24,7 @@
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
- "created_at": "20260210_142559",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/torchvision__densenet121",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
@@ -40,15 +40,13 @@
40
  "root_code_files": [
41
  "ds_proc.py",
42
  "ds_model.py",
43
- "ds_cfg.py",
44
- "ds_meta.py"
45
  ],
46
  "subfolder_code_included": true,
47
  "subfolder_code_files": [
48
  "ds_proc.py",
49
  "ds_model.py",
50
- "ds_cfg.py",
51
- "ds_meta.py"
52
  ],
53
  "processor_use_fast": false
54
  }
 
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
+ "created_at": "20260210_163348",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/torchvision__densenet121",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
 
40
  "root_code_files": [
41
  "ds_proc.py",
42
  "ds_model.py",
43
+ "ds_cfg.py"
 
44
  ],
45
  "subfolder_code_included": true,
46
  "subfolder_code_files": [
47
  "ds_proc.py",
48
  "ds_model.py",
49
+ "ds_cfg.py"
 
50
  ],
51
  "processor_use_fast": false
52
  }
models/torchvision__densenet121/ds_cfg.py CHANGED
@@ -1,6 +1,149 @@
1
  from transformers import PretrainedConfig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- from ds_meta import BackboneID, BACKBONE_META
4
 
5
 
6
  class BackboneMLPHeadConfig(PretrainedConfig):
 
1
  from transformers import PretrainedConfig
2
+ from typing import Literal, Any
3
+
4
+ # ============================================================
5
+ # Backbone whitelist + meta registry
6
+ # ============================================================
7
+
8
+ BackboneID = Literal[
9
+ "google/vit-base-patch16-224",
10
+ "microsoft/swin-tiny-patch4-window7-224",
11
+ "microsoft/resnet-50",
12
+ "google/efficientnet-b0",
13
+ "timm/densenet121.tv_in1k",
14
+ "torchvision/densenet121",
15
+ ]
16
+
17
+ # ============================================================
18
+ # 2) Backbone metadata registry (Feature dim/rule/unfreeze rule)
19
+ # 2) ๋ฐฑ๋ณธ ๋ฉ”ํƒ€ ๋ ˆ์ง€์ŠคํŠธ๋ฆฌ (feature dim/rule/unfreeze rule ๊ณ ์ •)
20
+ # ============================================================
21
+ # This table is the single source of truth for feature extraction and fine-tuning rules per backbone.
22
+ # ์ด ํ…Œ์ด๋ธ”์€ backbone๋ณ„ feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€(source of truth)์ž…๋‹ˆ๋‹ค.
23
+ #
24
+ # The key type is BackboneID to ensure meta keys never drift from the whitelist.
25
+ # ํ‚ค ํƒ€์ž…์„ BackboneID๋กœ ๊ณ ์ •ํ•˜์—ฌ ๋ฉ”ํƒ€ ํ‚ค๊ฐ€ ํ™”์ดํŠธ๋ฆฌ์ŠคํŠธ์™€ ์–ด๊ธ‹๋‚˜์ง€ ์•Š๊ฒŒ ํ•ฉ๋‹ˆ๋‹ค.
26
+ BACKBONE_META: dict[BackboneID, dict[str, Any]] = {
27
+ # -------------------------
28
+ # Transformers (ViT/Swin)
29
+ # -------------------------
30
+ # These backbones come from transformers and typically output hidden states and/or pooler outputs.
31
+ # ์ด ๋ฐฑ๋ณธ๋“ค์€ transformers ๊ณ„์—ด์ด๋ฉฐ hidden states์™€ pooler ์ถœ๋ ฅ ๋“ฑ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
32
+
33
+ "google/vit-base-patch16-224": {
34
+ # type indicates which loading/forward/extraction pathway the model code should use.
35
+ # type์€ ๋ชจ๋ธ ์ฝ”๋“œ๊ฐ€ ์–ด๋–ค ๋กœ๋”ฉ/forward/feature ์ถ”์ถœ ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
36
+ "type": "vit",
37
+
38
+ # feat_dim is the feature vector dimension consumed by the MLP head.
39
+ # feat_dim์€ MLP head๊ฐ€ ์ž…๋ ฅ์œผ๋กœ ๋ฐ›๋Š” feature ๋ฒกํ„ฐ ์ฐจ์›์ž…๋‹ˆ๋‹ค.
40
+ "feat_dim": 768,
41
+
42
+ # feat_rule defines how to get a (B, feat_dim) tensor from backbone outputs.
43
+ # feat_rule์€ backbone ์ถœ๋ ฅ์—์„œ (B, feat_dim) ํ…์„œ๋ฅผ ์–ป๋Š” ๊ทœ์น™์„ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.
44
+ "feat_rule": "cls", # Use last_hidden_state[:, 0, :] as CLS token embedding.
45
+ # last_hidden_state[:, 0, :]๋ฅผ CLS ํ† ํฐ ์ž„๋ฒ ๋”ฉ์œผ๋กœ ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
46
+
47
+ # unfreeze defines the policy to unfreeze layers during stage2 fine-tuning.
48
+ # unfreeze๋Š” stage2 ๋ฏธ์„ธ์กฐ์ •์—์„œ ์–ด๋–ค ๋ ˆ์ด์–ด๋ฅผ ํ’€์ง€ ์ •์ฑ…์„ ์ •์˜ํ•ฉ๋‹ˆ๋‹ค.
49
+ "unfreeze": "last_n", # Unfreeze the last n encoder blocks.
50
+ # encoder ๋ธ”๋ก์˜ ๋งˆ์ง€๋ง‰ n๊ฐœ๋ฅผ unfreeze ํ•ฉ๋‹ˆ๋‹ค.
51
+
52
+ # has_bn indicates whether BatchNorm exists and should be handled carefully when freezing.
53
+ # has_bn์€ BatchNorm ์กด์žฌ ์—ฌ๋ถ€์ด๋ฉฐ freeze ์‹œ ํŠน๋ณ„ ์ทจ๊ธ‰์ด ํ•„์š”ํ•œ์ง€ ํŒ๋‹จ์— ์‚ฌ์šฉํ•ฉ๋‹ˆ๋‹ค.
54
+ "has_bn": False,
55
+ },
56
+
57
+ "microsoft/swin-tiny-patch4-window7-224": {
58
+ # This backbone is a Swin Transformer, which may or may not provide a pooler output depending on implementation.
59
+ # ์ด ๋ฐฑ๋ณธ์€ Swin Transformer์ด๋ฉฐ ๊ตฌํ˜„์— ๋”ฐ๋ผ pooler output ์ œ๊ณต ์—ฌ๋ถ€๊ฐ€ ๋‹ฌ๋ผ์งˆ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
60
+ "type": "swin",
61
+ "feat_dim": 768,
62
+
63
+ # Prefer pooler output if available, otherwise fall back to mean pooling.
64
+ # pooler๊ฐ€ ์žˆ์œผ๋ฉด ์šฐ์„  ์‚ฌ์šฉํ•˜๊ณ , ์—†์œผ๋ฉด mean pooling์œผ๋กœ ๋Œ€์ฒดํ•ฉ๋‹ˆ๋‹ค.
65
+ "feat_rule": "pool_or_mean",
66
+
67
+ # Unfreeze strategy is aligned with transformer-style encoder blocks.
68
+ # unfreeze ์ „๋žต์€ transformer ๊ณ„์—ด encoder ๋ธ”๋ก ๊ธฐ์ค€์œผ๋กœ ๋งž์ถฅ๋‹ˆ๋‹ค.
69
+ "unfreeze": "last_n",
70
+ "has_bn": False,
71
+ },
72
+
73
+ # -------------------------
74
+ # Transformers (CNNs)
75
+ # -------------------------
76
+ # These backbones are CNNs exposed via transformers, usually producing pooled feature vectors or feature maps.
77
+ # ์ด ๋ฐฑ๋ณธ๋“ค์€ transformers๋กœ ๋…ธ์ถœ๋œ CNN์ด๋ฉฐ pooled feature ๋˜๋Š” feature map์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค.
78
+
79
+ "microsoft/resnet-50": {
80
+ # This entry assumes a transformers-compatible ResNet that can expose pooler or a final feature map.
81
+ # ์ด ํ•ญ๋ชฉ์€ transformers ํ˜ธํ™˜ ResNet์ด pooler ๋˜๋Š” ์ตœ์ข… feature map์„ ์ œ๊ณตํ•  ์ˆ˜ ์žˆ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
82
+ "type": "resnet",
83
+ "feat_dim": 2048,
84
+
85
+ # Use pooler output if the model provides it, otherwise apply global average pooling (GAP).
86
+ # pooler๊ฐ€ ์žˆ์œผ๋ฉด ์‚ฌ์šฉํ•˜๊ณ , ์—†์œผ๋ฉด global average pooling(GAP)์„ ์ ์šฉํ•ฉ๋‹ˆ๋‹ค.
87
+ "feat_rule": "pool_or_gap",
88
+
89
+ # CNN unfreeze policy can still be expressed as "last_n" at a block/stage granularity in your model code.
90
+ # CNN๋„ ๋ชจ๋ธ ์ฝ”๋“œ์—์„œ block/stage ๋‹จ์œ„๋กœ last_n ์ •์ฑ…์„ ์ ์šฉํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
91
+ "unfreeze": "last_n",
92
+ "has_bn": True,
93
+ },
94
+
95
+ "google/efficientnet-b0": {
96
+ # This entry assumes a transformers-compatible EfficientNet that exposes pooled features or a final feature map.
97
+ # ์ด ํ•ญ๋ชฉ์€ transformers ํ˜ธํ™˜ EfficientNet์ด pooled feature ๋˜๋Š” ์ตœ์ข… feature map์„ ์ œ๊ณตํ•œ๋‹ค๊ณ  ๊ฐ€์ •ํ•ฉ๋‹ˆ๋‹ค.
98
+ "type": "efficientnet",
99
+ "feat_dim": 1280,
100
+ "feat_rule": "pool_or_gap",
101
+ "unfreeze": "last_n",
102
+ "has_bn": True,
103
+ },
104
+
105
+ # -------------------------
106
+ # timm (DenseNet via HF Hub)
107
+ # -------------------------
108
+ # This backbone is loaded via timm using the "hf_hub:" prefix in your model loader.
109
+ # ์ด ๋ฐฑ๋ณธ์€ ๋ชจ๋ธ ๋กœ๋”์—์„œ timm์˜ "hf_hub:" ํ”„๋ฆฌํ”ฝ์Šค๋ฅผ ์‚ฌ์šฉํ•ด ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
110
+ "timm/densenet121.tv_in1k": {
111
+ "type": "timm_densenet",
112
+
113
+ # DenseNet-121 final channel dimension is 1024 for the canonical architecture.
114
+ # DenseNet-121์˜ ํ‘œ์ค€ ์•„ํ‚คํ…์ฒ˜์—์„œ ์ตœ์ข… ์ฑ„๋„ ์ฐจ์›์€ 1024์ž…๋‹ˆ๋‹ค.
115
+ "feat_dim": 1024,
116
+
117
+ # timm forward_features typically returns a feature map that you then GAP to (B, C).
118
+ # timm์˜ forward_features๋Š” ๋ณดํ†ต feature map์„ ๋ฐ˜ํ™˜ํ•˜๊ณ  ์ดํ›„ GAP์œผ๋กœ (B, C)๋ฅผ ๋งŒ๋“ญ๋‹ˆ๋‹ค.
119
+ "feat_rule": "timm_gap",
120
+
121
+ # DenseNet uses BatchNorm heavily, so freeze_bn behavior matters for stage1/stage2.
122
+ # DenseNet์€ BatchNorm ์‚ฌ์šฉ์ด ๋งŽ์•„ stage1/stage2์—์„œ freeze_bn ์ฒ˜๋ฆฌ๊ฐ€ ์ค‘์š”ํ•ฉ๋‹ˆ๋‹ค.
123
+ "unfreeze": "last_n",
124
+ "has_bn": True,
125
+ },
126
+
127
+ # -------------------------
128
+ # torchvision (DenseNet direct)
129
+ # -------------------------
130
+ # This backbone is intended for torchvision-style loading and feature extraction, not transformers/timm.
131
+ # ์ด ๋ฐฑ๋ณธ์€ transformers/timm์ด ์•„๋‹ˆ๋ผ torchvision ์Šคํƒ€์ผ ๋กœ๋”ฉ ๋ฐ feature ์ถ”์ถœ์„ ๋Œ€์ƒ์œผ๋กœ ํ•ฉ๋‹ˆ๋‹ค.
132
+ "torchvision/densenet121": {
133
+ "type": "torchvision_densenet",
134
+ "feat_dim": 1024,
135
+
136
+ # torchvision DenseNet usually exposes .features and you apply GAP to obtain (B, C).
137
+ # torchvision DenseNet์€ ๋ณดํ†ต .features๋ฅผ ๋…ธ์ถœํ•˜๋ฉฐ GAP์œผ๋กœ (B, C)๋ฅผ ์–ป์Šต๋‹ˆ๋‹ค.
138
+ "feat_rule": "torchvision_densenet_gap",
139
+
140
+ # Unfreeze policy remains last_n, but the interpretation must match torchvision module naming.
141
+ # unfreeze ์ •์ฑ…์€ last_n์„ ์œ ์ง€ํ•˜๋˜, ํ•ด์„์€ torchvision ๋ชจ๋“ˆ ๋„ค์ด๋ฐ์— ๋งž์•„์•ผ ํ•ฉ๋‹ˆ๋‹ค.
142
+ "unfreeze": "last_n",
143
+ "has_bn": True,
144
+ },
145
+ }
146
 
 
147
 
148
 
149
  class BackboneMLPHeadConfig(PretrainedConfig):
models/torchvision__densenet121/ds_model.py CHANGED
@@ -3,7 +3,7 @@
3
 
4
  # src/ds_model.py
5
 
6
- from typing import Optional, List
7
 
8
  import torch
9
  import torch.nn as nn
@@ -17,9 +17,60 @@ from transformers.modeling_outputs import ImageClassifierOutput
17
  # --- torchvision ---
18
  from torchvision import models as tv_models
19
 
20
- from ds_cfg import BackboneMLPHeadConfig
21
- from ds_meta import BACKBONE_META
22
- from mlp_head import MLPHead
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # ============================================================
25
  # (3) Model: backbone + MLP head
@@ -67,7 +118,9 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
67
 
68
  # Meta is a single source of truth for extraction and fine-tuning rules.
69
  # meta๋Š” feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€์ž…๋‹ˆ๋‹ค.
70
- self._meta = BACKBONE_META[config.backbone_name_or_path]
 
 
71
 
72
  # Backbone skeleton is always created without pretrained weights.
73
  # backbone skeleton์€ ํ•ญ์ƒ pretrained weight ์—†์ด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
@@ -105,7 +158,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
105
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
106
  # Meta decides which loader path to use.
107
  # meta๊ฐ€ ์–ด๋–ค ๋กœ๋” ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
108
- meta = BACKBONE_META[backbone_id]
 
 
 
109
  t = meta["type"]
110
 
111
  if t == "timm_densenet":
@@ -169,7 +225,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
169
  from_pretrained() ์ดํ›„ ํ˜ธ์ถœํ•˜๋ฉด ์ฒดํฌํฌ์ธํŠธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฎ์–ด์“ฐ๋ฏ€๋กœ ์ ˆ๋Œ€ ํ˜ธ์ถœํ•˜๋ฉด ์•ˆ ๋ฉ๋‹ˆ๋‹ค.
170
  """
171
  bb = self.config.backbone_name_or_path
172
- meta = BACKBONE_META[bb]
173
  t = meta["type"]
174
 
175
  if t == "timm_densenet":
@@ -394,7 +450,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
394
  _set_requires_grad(model.backbone, False)
395
  _set_requires_grad(model.classifier, True)
396
 
397
- meta = BACKBONE_META[model.config.backbone_name_or_path]
398
  if freeze_bn and meta.get("has_bn", False):
399
  set_bn_eval(model.backbone)
400
 
@@ -403,7 +459,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
403
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
404
  # stage2: train ๋ชจ๋“œ๋กœ ๋‘๋˜ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด BN์„ eval๋กœ ์œ ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
405
  model.train()
406
- meta = BACKBONE_META[model.config.backbone_name_or_path]
407
  if keep_bn_eval and meta.get("has_bn", False):
408
  set_bn_eval(model.backbone)
409
 
@@ -431,7 +487,7 @@ def unfreeze_last_stage(
431
  if n <= 0:
432
  return
433
 
434
- meta = BACKBONE_META[model.config.backbone_name_or_path]
435
  if meta.get("unfreeze") != "last_n":
436
  raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
437
 
 
3
 
4
  # src/ds_model.py
5
 
6
+ from typing import Optional, List, Any, Dict
7
 
8
  import torch
9
  import torch.nn as nn
 
17
  # --- torchvision ---
18
  from torchvision import models as tv_models
19
 
20
+ from ds_cfg import BackboneMLPHeadConfig, BACKBONE_META
21
+ # from mlp_head import MLPHead
22
+
23
+ class MLPHead(nn.Module):
24
+ """
25
+ ๊ฐ„๋‹จํ•œ 2-layer MLP head.
26
+
27
+ Parameters
28
+ ----------
29
+ in_dim : int
30
+ backbone feature dim
31
+ num_labels : int
32
+ class count
33
+ bottleneck : int
34
+ hidden dim
35
+ p : float
36
+ dropout prob
37
+ """
38
+ def __init__(self, in_dim: int, num_labels: int, bottleneck: int = 256, p: float = 0.2):
39
+ super().__init__()
40
+ self.fc1 = nn.Linear(in_dim, bottleneck)
41
+ self.act = nn.GELU()
42
+ self.drop = nn.Dropout(p)
43
+ self.fc2 = nn.Linear(bottleneck, num_labels)
44
+
45
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
46
+ return self.fc2(self.drop(self.act(self.fc1(x))))
47
+
48
+ # ------------------------------------------------------------
49
+ # backbone_meta resolver
50
+ # ------------------------------------------------------------
51
+ def _resolve_backbone_meta(config: BackboneMLPHeadConfig, fallback_table: Dict[str, Dict[str, Any]] | None = None) -> Dict[str, Any]:
52
+ """
53
+ Resolve runtime backbone meta.
54
+
55
+ Priority:
56
+ 1) config.backbone_meta (preferred; required for Hub runtime determinism)
57
+ 2) fallback_table[config.backbone_name_or_path] (backward compatibility for local/dev)
58
+
59
+ Returns a dict with at least: type, feat_rule, feat_dim (and optional has_bn/unfreeze).
60
+ """
61
+ meta = getattr(config, "backbone_meta", None)
62
+ if isinstance(meta, dict) and len(meta) > 0:
63
+ return meta
64
+
65
+ bb = getattr(config, "backbone_name_or_path", None)
66
+ if fallback_table is not None and bb in fallback_table:
67
+ return fallback_table[bb]
68
+
69
+ raise ValueError(
70
+ "config.backbone_meta is missing/empty and no fallback meta is available. "
71
+ "Populate config.backbone_meta when saving to the Hub (single source of truth)."
72
+ )
73
+
74
 
75
  # ============================================================
76
  # (3) Model: backbone + MLP head
 
118
 
119
  # Meta is a single source of truth for extraction and fine-tuning rules.
120
  # meta๋Š” feature ์ถ”์ถœ ๋ฐ ๋ฏธ์„ธ์กฐ์ • ๊ทœ์น™์˜ ๋‹จ์ผ ๊ธฐ์ค€์ž…๋‹ˆ๋‹ค.
121
+ # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
122
+ # Prefer config.backbone_meta to keep Hub runtime self-contained.
123
+ self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
124
 
125
  # Backbone skeleton is always created without pretrained weights.
126
  # backbone skeleton์€ ํ•ญ์ƒ pretrained weight ์—†์ด ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
 
158
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
159
  # Meta decides which loader path to use.
160
  # meta๊ฐ€ ์–ด๋–ค ๋กœ๋” ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉํ• ์ง€ ๊ฒฐ์ •ํ•ฉ๋‹ˆ๋‹ค.
161
+ meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
162
+ if meta is None:
163
+ raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
164
+
165
  t = meta["type"]
166
 
167
  if t == "timm_densenet":
 
225
  from_pretrained() ์ดํ›„ ํ˜ธ์ถœํ•˜๋ฉด ์ฒดํฌํฌ์ธํŠธ ๊ฐ€์ค‘์น˜๋ฅผ ๋ฎ์–ด์“ฐ๋ฏ€๋กœ ์ ˆ๋Œ€ ํ˜ธ์ถœํ•˜๋ฉด ์•ˆ ๋ฉ๋‹ˆ๋‹ค.
226
  """
227
  bb = self.config.backbone_name_or_path
228
+ meta = self._meta
229
  t = meta["type"]
230
 
231
  if t == "timm_densenet":
 
450
  _set_requires_grad(model.backbone, False)
451
  _set_requires_grad(model.classifier, True)
452
 
453
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
454
  if freeze_bn and meta.get("has_bn", False):
455
  set_bn_eval(model.backbone)
456
 
 
459
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
460
  # stage2: train ๋ชจ๋“œ๋กœ ๋‘๋˜ ์•ˆ์ •์„ฑ์„ ์œ„ํ•ด BN์„ eval๋กœ ์œ ์ง€ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.
461
  model.train()
462
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
463
  if keep_bn_eval and meta.get("has_bn", False):
464
  set_bn_eval(model.backbone)
465
 
 
487
  if n <= 0:
488
  return
489
 
490
+ meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
491
  if meta.get("unfreeze") != "last_n":
492
  raise RuntimeError(f"Unexpected unfreeze rule: {meta.get('unfreeze')} (expected 'last_n')")
493
 
models/torchvision__densenet121/ds_proc.py CHANGED
@@ -16,7 +16,7 @@ from transformers import AutoImageProcessor, AutoConfig
16
  from transformers.image_processing_base import ImageProcessingMixin
17
  from transformers.utils.generic import TensorType
18
 
19
- from ds_meta import BackboneID, BACKBONE_META
20
 
21
 
22
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
 
16
  from transformers.image_processing_base import ImageProcessingMixin
17
  from transformers.utils.generic import TensorType
18
 
19
+ from ds_cfg import BackboneID, BACKBONE_META
20
 
21
 
22
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
models/torchvision__densenet121/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8f2bb78b2e777c1612bca3678fd638acbcba9ca4ff460616987ad3ad94dab19
3
  size 33394052
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064a8e6356a4304a40f80f368a46d9b0ff4307b849cf6e458ab325d2fcfd9c63
3
  size 33394052