dsaint31 commited on
Commit
200cb5d
·
verified ·
1 Parent(s): e115a15

Add/Update backbone checkpoints (count=6)

Browse files
ds_model.py CHANGED
@@ -94,14 +94,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
94
 
95
  def __init__(self, config: BackboneMLPHeadConfig):
96
  # PreTrainedModel expects a config object and stores it internally.
97
- # PreTrainedModel은 config 객체를 받아 내부에 저장하는 전제를 가집니다.
98
  super().__init__(config)
99
 
100
  # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
101
- # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않습니다.
102
  #
103
  # Note: Transformers may create configs with no args, but models are conventionally created with configs.
104
- # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성됩니다.
105
  if config.backbone_name_or_path is None:
106
  raise ValueError(
107
  "config.backbone_name_or_path is None. "
@@ -109,10 +109,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
109
  )
110
 
111
  # Fail-fast: training/inference requires a positive number of labels.
112
- # fail-fast: 학습/추론은 num_labels가 양수여야 합니다.
113
  #
114
  # Config may exist in a minimal form for internal serialization paths, but the model should not.
115
- # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 그러면 안 됩니다.
116
  if int(getattr(config, "num_labels", 0)) <= 0:
117
  raise ValueError(
118
  f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
@@ -120,17 +120,17 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
120
  )
121
 
122
  # Meta is a single source of truth for extraction and fine-tuning rules.
123
- # meta는 feature 추출 및 미세조정 규칙의 단일 기준입니다.
124
- # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
125
  # Prefer config.backbone_meta to keep Hub runtime self-contained.
126
  self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
127
 
128
  # Backbone skeleton is always created without pretrained weights.
129
- # backbone skeleton은 항상 pretrained weight 없이 생성합니다.
130
  self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
131
 
132
  # Head shape is driven by meta feat_dim and config.num_labels.
133
- # head shape은 meta의 feat_dim과 config.num_labels로 결정됩니다.
134
  self.classifier = MLPHead(
135
  in_dim=int(self._meta["feat_dim"]),
136
  num_labels=int(config.num_labels),
@@ -139,16 +139,20 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
139
  )
140
 
141
  # HF initialization hook, but we override init_weights to initialize head-only.
142
- # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화합니다.
143
  self.post_init()
144
 
145
  def init_weights(self):
146
  """
147
  Initialize only the head to avoid touching the backbone skeleton.
148
- backbone skeleton을 건드리지 않기 위해 head만 초기화합니다.
149
 
150
  HF's default init may traverse the entire module tree, which is undesirable here.
151
- HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서는 부적절합니다.
 
 
 
 
152
  """
153
  if getattr(self, "classifier", None) is not None:
154
  self.classifier.apply(self._init_weights)
@@ -160,7 +164,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
160
  # ----------------------------
161
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
162
  # Meta decides which loader path to use.
163
- # meta가 어떤 로더 경로를 사용할지 결정합니다.
164
  meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
165
  if meta is None:
166
  raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
@@ -174,14 +178,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
174
  return self._build_torchvision_densenet_skeleton(backbone_id)
175
 
176
  # For transformers backbones: build a random-weight skeleton from config only.
177
- # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성합니다.
178
  bb_cfg = AutoConfig.from_pretrained(backbone_id)
179
  return AutoModel.from_config(bb_cfg)
180
 
181
  @staticmethod
182
  def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
183
  # timm is an optional dependency and should be imported lazily.
184
- # timm은 옵션 의존성이므로 지연 import 합니다.
185
  try:
186
  import timm
187
  except Exception as e:
@@ -190,7 +194,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
190
  ) from e
191
 
192
  # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
193
- # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0)합니다.
194
  return timm.create_model(
195
  f"hf_hub:{hf_repo_id}",
196
  pretrained=False,
@@ -200,12 +204,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
200
  @staticmethod
201
  def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
202
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
203
- # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
204
  if model_id != "torchvision/densenet121":
205
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
206
 
207
  # Build structure only (weights=None) to avoid implicit pretrained loading.
208
- # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None)합니다.
209
  m = tv_models.densenet121(weights=None)
210
  return m
211
 
@@ -222,10 +226,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
222
  ):
223
  """
224
  Fresh-start only: inject pretrained backbone weights into the skeleton.
225
- fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입합니다.
226
 
227
  Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
228
- from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 절대 호출하면 안 됩니다.
229
  """
230
  bb = self.config.backbone_name_or_path
231
  meta = self._meta
@@ -240,7 +244,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
240
  return
241
 
242
  # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
243
- # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사합니다.
244
  ref = AutoModel.from_pretrained(
245
  bb,
246
  low_cpu_mem_usage=low_cpu_mem_usage,
@@ -248,18 +252,18 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
248
  )
249
 
250
  # strict=False is used to tolerate harmless key differences across minor versions.
251
- # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용합니다.
252
  self.backbone.load_state_dict(ref.state_dict(), strict=False)
253
  del ref
254
 
255
  @torch.no_grad()
256
  def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
257
  # timm must be present for timm backbones.
258
- # timm 백본에는 timm 설치가 필요합니다.
259
  import timm
260
 
261
  # Create a pretrained reference model and copy its weights strictly.
262
- # pretrained reference 모델을 만들고 가중치를 strict하게 복사합니다.
263
  ref = timm.create_model(
264
  f"hf_hub:{hf_repo_id}",
265
  pretrained=True,
@@ -272,12 +276,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
272
  @torch.no_grad()
273
  def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
274
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
275
- # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
276
  if model_id != "torchvision/densenet121":
277
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
278
 
279
  # Use torchvision's default pretrained weights for densenet121.
280
- # torchvision의 densenet121 기본 pretrained weights를 사용합니다.
281
  ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
282
 
283
  self.backbone.load_state_dict(ref.state_dict(), strict=True)
@@ -290,7 +294,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
290
  @staticmethod
291
  def _pool_or_gap(outputs) -> torch.Tensor:
292
  # Some transformers vision CNNs provide pooler_output explicitly.
293
- # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공합니다.
294
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
295
  x = outputs.pooler_output
296
  if x.dim() == 2:
@@ -300,7 +304,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
300
  raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
301
 
302
  # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
303
- # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용합니다.
304
  x = outputs.last_hidden_state
305
  if x.dim() == 4:
306
  return x.mean(dim=(2, 3))
@@ -312,29 +316,29 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
312
 
313
  def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
314
  # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
315
- # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적으로 유지되어야 합니다.
316
  rule = self._meta["feat_rule"]
317
 
318
  if rule == "cls":
319
  # ViT-style: use CLS token embedding from last_hidden_state.
320
- # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용합니다.
321
  return outputs.last_hidden_state[:, 0, :]
322
 
323
  if rule == "pool_or_mean":
324
  # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
325
- # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용합니다.
326
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
327
  return outputs.pooler_output
328
  return outputs.last_hidden_state.mean(dim=1)
329
 
330
  if rule == "pool_or_gap":
331
  # CNN-style: use pooler_output if present, else GAP over spatial dims.
332
- # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용합니다.
333
  return self._pool_or_gap(outputs)
334
 
335
  if rule == "timm_gap":
336
  # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
337
- # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 만듭니다.
338
  if not isinstance(outputs, torch.Tensor):
339
  raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
340
  if outputs.dim() != 4:
@@ -343,7 +347,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
343
 
344
  if rule == "torchvision_densenet_gap":
345
  # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
346
- # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요합니다.
347
  if not isinstance(outputs, torch.Tensor):
348
  raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
349
  if outputs.dim() != 4:
@@ -362,7 +366,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
362
  **kwargs,
363
  ):
364
  # Type decides the backbone forward path and output format.
365
- # type이 backbone forward 경로 및 출력 포맷을 결정합니다.
366
  t = self._meta["type"]
367
 
368
  if t == "timm_densenet":
@@ -394,7 +398,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
394
 
395
  else:
396
  # Transformers vision models are called with pixel_values and return ModelOutput.
397
- # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환합니다.
398
  outputs = self.backbone(
399
  pixel_values=pixel_values,
400
  output_attentions=output_attentions,
@@ -407,13 +411,13 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
407
  attentions = getattr(outputs, "attentions", None)
408
 
409
  # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
410
- # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환합니다.
411
  logits = self.classifier(feats)
412
 
413
  loss = None
414
  if labels is not None:
415
  # Cross entropy expects labels as class indices in [0, num_labels).
416
- # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대합니다.
417
  loss = F.cross_entropy(logits, labels)
418
 
419
  if not return_dict:
@@ -434,14 +438,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
434
  # ============================================================
435
  def _set_requires_grad(module: nn.Module, flag: bool):
436
  # Toggle requires_grad for all parameters in a module.
437
- # 모듈의 모든 파라미터에 대해 requires_grad를 토글합니다.
438
  for p in module.parameters():
439
  p.requires_grad = flag
440
 
441
 
442
  def set_bn_eval(module: nn.Module):
443
  # Put BatchNorm layers into eval mode to freeze running stats.
444
- # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정합니다.
445
  for m in module.modules():
446
  if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
447
  m.eval()
@@ -449,7 +453,7 @@ def set_bn_eval(module: nn.Module):
449
 
450
  def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
451
  # Stage1: freeze backbone and train only the head.
452
- # stage1: backbone을 freeze하고 head만 학습합니다.
453
  _set_requires_grad(model.backbone, False)
454
  _set_requires_grad(model.classifier, True)
455
 
@@ -460,7 +464,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
460
 
461
  def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
462
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
463
- # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있습니다.
464
  model.train()
465
  meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
466
  if keep_bn_eval and meta.get("has_bn", False):
@@ -469,7 +473,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
469
 
470
  def trainable_summary(model: nn.Module):
471
  # Print a compact summary of trainable parameters.
472
- # 학습 가능 파라미터 요약을 간단히 출력합니다.
473
  total = sum(p.numel() for p in model.parameters())
474
  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
475
  ratio = trainable / total if total > 0 else 0.0
@@ -483,7 +487,7 @@ def unfreeze_last_stage(
483
  keep_bn_eval: bool = True,
484
  ):
485
  # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
486
- # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 ���현합니다.
487
  freeze_backbone(model, freeze_bn=keep_bn_eval)
488
 
489
  n = int(last_n)
@@ -498,7 +502,7 @@ def unfreeze_last_stage(
498
 
499
  if bb_type == "vit":
500
  # ViT blocks live under backbone.encoder.layer in the transformers implementation.
501
- # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 있습니다.
502
  blocks = list(model.backbone.encoder.layer)
503
  for blk in blocks[-n:]:
504
  _set_requires_grad(blk, True)
@@ -506,7 +510,7 @@ def unfreeze_last_stage(
506
 
507
  if bb_type == "swin":
508
  # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
509
- # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze 합니다.
510
  stages = list(model.backbone.encoder.layers)
511
  blocks: List[nn.Module] = []
512
  for st in stages:
@@ -517,7 +521,7 @@ def unfreeze_last_stage(
517
 
518
  if bb_type == "resnet":
519
  # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
520
- # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze 합니다.
521
  bb = model.backbone
522
  for name in ("layer1", "layer2", "layer3", "layer4"):
523
  if not hasattr(bb, name):
@@ -538,7 +542,7 @@ def unfreeze_last_stage(
538
 
539
  if bb_type == "efficientnet":
540
  # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
541
- # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze 합니다.
542
  bb = model.backbone
543
  if not hasattr(bb, "features"):
544
  raise RuntimeError("Unexpected EfficientNet structure: missing features")
@@ -556,7 +560,7 @@ def unfreeze_last_stage(
556
 
557
  if bb_type in ("timm_densenet", "torchvision_densenet"):
558
  # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
559
- # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze 합니다.
560
  bb = model.backbone
561
  if not hasattr(bb, "features"):
562
  raise RuntimeError("Unexpected DenseNet: missing features")
@@ -575,7 +579,7 @@ def unfreeze_last_stage(
575
 
576
  def _denselayers(db: nn.Module) -> List[nn.Module]:
577
  # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
578
- # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환합니다.
579
  return list(db.children())
580
 
581
  blocks: List[nn.Module] = []
@@ -600,5 +604,5 @@ def unfreeze_last_stage(
600
  # register
601
  # -------------------------
602
  # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
603
- # from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록합니다.
604
  BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")
 
94
 
95
  def __init__(self, config: BackboneMLPHeadConfig):
96
  # PreTrainedModel expects a config object and stores it internally.
97
+ # PreTrainedModel은 config 객체를 받아 내부에 저장함.
98
  super().__init__(config)
99
 
100
  # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
101
+ # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않음 - fast fail.
102
  #
103
  # Note: Transformers may create configs with no args, but models are conventionally created with configs.
104
+ # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성.
105
  if config.backbone_name_or_path is None:
106
  raise ValueError(
107
  "config.backbone_name_or_path is None. "
 
109
  )
110
 
111
  # Fail-fast: training/inference requires a positive number of labels.
112
+ # fail-fast: 학습/추론은 num_labels가 양수여야 함.
113
  #
114
  # Config may exist in a minimal form for internal serialization paths, but the model should not.
115
+ # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 해당 없음.
116
  if int(getattr(config, "num_labels", 0)) <= 0:
117
  raise ValueError(
118
  f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
 
120
  )
121
 
122
  # Meta is a single source of truth for extraction and fine-tuning rules.
123
+ # meta는 feature 추출 및 미세조정 규칙의 단일 기준.
124
+ # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
125
  # Prefer config.backbone_meta to keep Hub runtime self-contained.
126
  self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
127
 
128
  # Backbone skeleton is always created without pretrained weights.
129
+ # backbone skeleton은 항상 pretrained weight 없이 생성.
130
  self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
131
 
132
  # Head shape is driven by meta feat_dim and config.num_labels.
133
+ # head shape은 meta의 feat_dim과 config.num_labels로 결정.
134
  self.classifier = MLPHead(
135
  in_dim=int(self._meta["feat_dim"]),
136
  num_labels=int(config.num_labels),
 
139
  )
140
 
141
  # HF initialization hook, but we override init_weights to initialize head-only.
142
+ # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화하도록 변경.
143
  self.post_init()
144
 
145
  def init_weights(self):
146
  """
147
  Initialize only the head to avoid touching the backbone skeleton.
148
+ backbone skeleton을 건드리지 않기 위해 head만 초기화.
149
 
150
  HF's default init may traverse the entire module tree, which is undesirable here.
151
+ HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서 그대로 사용하기 부적절.
152
+
153
+ 초기 설계에서 __init__ 내부에서 backbone의 가중치 로드를 수행함(편리를 위해).
154
+ 이 경우, HF의 post_init()으로 인해 해당 로드가 취소되는 경우가 존재(timm, torchvision 등의 백본).
155
+ 때문에 이를 오버라이드 하여 classifier만 초기화 하도록 변경함.
156
  """
157
  if getattr(self, "classifier", None) is not None:
158
  self.classifier.apply(self._init_weights)
 
164
  # ----------------------------
165
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
166
  # Meta decides which loader path to use.
167
+ # meta가 어떤 로더 경로를 사용할지 결정.
168
  meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
169
  if meta is None:
170
  raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
 
178
  return self._build_torchvision_densenet_skeleton(backbone_id)
179
 
180
  # For transformers backbones: build a random-weight skeleton from config only.
181
+ # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성.
182
  bb_cfg = AutoConfig.from_pretrained(backbone_id)
183
  return AutoModel.from_config(bb_cfg)
184
 
185
  @staticmethod
186
  def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
187
  # timm is an optional dependency and should be imported lazily.
188
+ # timm은 옵션 의존성이므로 지연 import 수행.
189
  try:
190
  import timm
191
  except Exception as e:
 
194
  ) from e
195
 
196
  # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
197
+ # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0).
198
  return timm.create_model(
199
  f"hf_hub:{hf_repo_id}",
200
  pretrained=False,
 
204
  @staticmethod
205
  def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
206
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
207
+ # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원.
208
  if model_id != "torchvision/densenet121":
209
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
210
 
211
  # Build structure only (weights=None) to avoid implicit pretrained loading.
212
+ # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None).
213
  m = tv_models.densenet121(weights=None)
214
  return m
215
 
 
226
  ):
227
  """
228
  Fresh-start only: inject pretrained backbone weights into the skeleton.
229
+ fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입.
230
 
231
  Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
232
+ from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 주의할 것.
233
  """
234
  bb = self.config.backbone_name_or_path
235
  meta = self._meta
 
244
  return
245
 
246
  # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
247
+ # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사.
248
  ref = AutoModel.from_pretrained(
249
  bb,
250
  low_cpu_mem_usage=low_cpu_mem_usage,
 
252
  )
253
 
254
  # strict=False is used to tolerate harmless key differences across minor versions.
255
+ # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용.
256
  self.backbone.load_state_dict(ref.state_dict(), strict=False)
257
  del ref
258
 
259
  @torch.no_grad()
260
  def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
261
  # timm must be present for timm backbones.
262
+ # timm 백본에��� timm 설치가 필요.
263
  import timm
264
 
265
  # Create a pretrained reference model and copy its weights strictly.
266
+ # pretrained reference 모델을 만들고 가중치를 strict하게 복사.
267
  ref = timm.create_model(
268
  f"hf_hub:{hf_repo_id}",
269
  pretrained=True,
 
276
  @torch.no_grad()
277
  def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
278
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
279
+ # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 지원.
280
  if model_id != "torchvision/densenet121":
281
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
282
 
283
  # Use torchvision's default pretrained weights for densenet121.
284
+ # torchvision의 densenet121 기본 pretrained weights를 사용.
285
  ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
286
 
287
  self.backbone.load_state_dict(ref.state_dict(), strict=True)
 
294
  @staticmethod
295
  def _pool_or_gap(outputs) -> torch.Tensor:
296
  # Some transformers vision CNNs provide pooler_output explicitly.
297
+ # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공.
298
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
299
  x = outputs.pooler_output
300
  if x.dim() == 2:
 
304
  raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
305
 
306
  # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
307
+ # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용.
308
  x = outputs.last_hidden_state
309
  if x.dim() == 4:
310
  return x.mean(dim=(2, 3))
 
316
 
317
  def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
318
  # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
319
+ # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적 동작을 위해 제한된 모델만 사용.
320
  rule = self._meta["feat_rule"]
321
 
322
  if rule == "cls":
323
  # ViT-style: use CLS token embedding from last_hidden_state.
324
+ # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용.
325
  return outputs.last_hidden_state[:, 0, :]
326
 
327
  if rule == "pool_or_mean":
328
  # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
329
+ # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용.
330
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
331
  return outputs.pooler_output
332
  return outputs.last_hidden_state.mean(dim=1)
333
 
334
  if rule == "pool_or_gap":
335
  # CNN-style: use pooler_output if present, else GAP over spatial dims.
336
+ # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용.
337
  return self._pool_or_gap(outputs)
338
 
339
  if rule == "timm_gap":
340
  # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
341
+ # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 변환.
342
  if not isinstance(outputs, torch.Tensor):
343
  raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
344
  if outputs.dim() != 4:
 
347
 
348
  if rule == "torchvision_densenet_gap":
349
  # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
350
+ # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요.
351
  if not isinstance(outputs, torch.Tensor):
352
  raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
353
  if outputs.dim() != 4:
 
366
  **kwargs,
367
  ):
368
  # Type decides the backbone forward path and output format.
369
+ # type이 backbone forward 경로 및 출력 포맷을 결정.
370
  t = self._meta["type"]
371
 
372
  if t == "timm_densenet":
 
398
 
399
  else:
400
  # Transformers vision models are called with pixel_values and return ModelOutput.
401
+ # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환.
402
  outputs = self.backbone(
403
  pixel_values=pixel_values,
404
  output_attentions=output_attentions,
 
411
  attentions = getattr(outputs, "attentions", None)
412
 
413
  # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
414
+ # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환.
415
  logits = self.classifier(feats)
416
 
417
  loss = None
418
  if labels is not None:
419
  # Cross entropy expects labels as class indices in [0, num_labels).
420
+ # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대함.
421
  loss = F.cross_entropy(logits, labels)
422
 
423
  if not return_dict:
 
438
  # ============================================================
439
  def _set_requires_grad(module: nn.Module, flag: bool):
440
  # Toggle requires_grad for all parameters in a module.
441
+ # 모듈의 모든 파라미터에 대해 requires_grad를 토글.
442
  for p in module.parameters():
443
  p.requires_grad = flag
444
 
445
 
446
  def set_bn_eval(module: nn.Module):
447
  # Put BatchNorm layers into eval mode to freeze running stats.
448
+ # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정.
449
  for m in module.modules():
450
  if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
451
  m.eval()
 
453
 
454
  def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
455
  # Stage1: freeze backbone and train only the head.
456
+ # stage1: backbone을 freeze하고 head만 학습.
457
  _set_requires_grad(model.backbone, False)
458
  _set_requires_grad(model.classifier, True)
459
 
 
464
 
465
  def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
466
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
467
+ # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있음. (buffer 등을 유지하기 위해)
468
  model.train()
469
  meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
470
  if keep_bn_eval and meta.get("has_bn", False):
 
473
 
474
  def trainable_summary(model: nn.Module):
475
  # Print a compact summary of trainable parameters.
476
+ # 학습 가능 파라미터 요약을 간단히 출력.
477
  total = sum(p.numel() for p in model.parameters())
478
  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
479
  ratio = trainable / total if total > 0 else 0.0
 
487
  keep_bn_eval: bool = True,
488
  ):
489
  # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
490
+ # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 구현.
491
  freeze_backbone(model, freeze_bn=keep_bn_eval)
492
 
493
  n = int(last_n)
 
502
 
503
  if bb_type == "vit":
504
  # ViT blocks live under backbone.encoder.layer in the transformers implementation.
505
+ # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 존재함.
506
  blocks = list(model.backbone.encoder.layer)
507
  for blk in blocks[-n:]:
508
  _set_requires_grad(blk, True)
 
510
 
511
  if bb_type == "swin":
512
  # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
513
+ # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze.
514
  stages = list(model.backbone.encoder.layers)
515
  blocks: List[nn.Module] = []
516
  for st in stages:
 
521
 
522
  if bb_type == "resnet":
523
  # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
524
+ # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze.
525
  bb = model.backbone
526
  for name in ("layer1", "layer2", "layer3", "layer4"):
527
  if not hasattr(bb, name):
 
542
 
543
  if bb_type == "efficientnet":
544
  # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
545
+ # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze.
546
  bb = model.backbone
547
  if not hasattr(bb, "features"):
548
  raise RuntimeError("Unexpected EfficientNet structure: missing features")
 
560
 
561
  if bb_type in ("timm_densenet", "torchvision_densenet"):
562
  # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
563
+ # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze.
564
  bb = model.backbone
565
  if not hasattr(bb, "features"):
566
  raise RuntimeError("Unexpected DenseNet: missing features")
 
579
 
580
  def _denselayers(db: nn.Module) -> List[nn.Module]:
581
  # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
582
+ # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환.
583
  return list(db.children())
584
 
585
  blocks: List[nn.Module] = []
 
604
  # register
605
  # -------------------------
606
  # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
607
+ # from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록.
608
  BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")
ds_proc.py CHANGED
@@ -4,8 +4,8 @@
4
  # src/ds_proc.py
5
 
6
  # ============================================================
7
- # (4) ImageProcessor (AutoImageProcessor integration)
8
- # (4) ImageProcessor (AutoImageProcessor 연동)
9
  # ============================================================
10
 
11
  from typing import Any
@@ -27,41 +27,38 @@ except ImportError:
27
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
28
  """
29
  This processor performs image preprocessing and outputs {"pixel_values": ...}.
30
- 이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 출력합니다.
31
 
32
  Key requirements:
33
  핵심 요구사항:
34
 
35
  1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
36
- save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 합니다.
37
-
38
  2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
39
- 런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됩니다.
40
-
41
  3) Runtime objects are rebuilt at init/load time based on backbone meta.
42
- 런타임 객체는 backbone meta에 따라 init/load 시점에 재구성됩니다.
43
-
44
  4) For reproducibility, use_fast must be explicitly persisted and honored on load.
45
- 재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 합니다.
46
  """
47
 
48
  # HF vision models conventionally expect "pixel_values" as the primary input key.
49
- # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대합니다.
50
  model_input_names = ["pixel_values"]
51
 
52
  def __init__(
53
  self,
54
  backbone_name_or_path: BackboneID,
55
- is_training: bool = False,
56
  use_fast: bool = False,
57
  **kwargs,
58
  ):
59
  # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
60
- # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리합니다.
61
  super().__init__(**kwargs)
62
 
63
  # Enforce whitelist via BACKBONE_META to keep behavior stable.
64
- # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제합니다.
65
  if backbone_name_or_path not in BACKBONE_META:
66
  raise ValueError(
67
  f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
@@ -69,23 +66,23 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
69
  )
70
 
71
  # Serializable fields only: these should appear in preprocessor_config.json.
72
- # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 합니다.
73
  self.backbone_name_or_path = backbone_name_or_path
74
  self.is_training = bool(is_training)
75
 
76
  # Reproducibility switch for transformers processors.
77
- # transformers processor의 fast/slow 선택을 재현 가능하게 고정합니다.
78
  self.use_fast = bool(use_fast)
79
 
80
  # Runtime-only fields: must never be serialized.
81
- # 런타임 전용 필드: 절대 직렬화되면 안 됩니다.
82
  self._meta = None
83
- self._delegate = None
84
- self._timm_transform = None
85
  self._torchvision_transform = None
86
 
87
  # Build runtime objects according to backbone type.
88
- # backbone type에 따라 런타임 객체를 구성합니다.
89
  self._build_runtime()
90
 
91
  # ============================================================
@@ -95,13 +92,13 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
95
  def _build_runtime(self):
96
  """
97
  Build runtime delegate/transform based on BACKBONE_META["type"].
98
- BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성합니다.
99
  """
100
  meta = BACKBONE_META[self.backbone_name_or_path]
101
  self._meta = meta
102
 
103
  # Always reset runtime fields before rebuilding.
104
- # 재구성 전 런타임 필드는 항상 초기화합니다.
105
  self._delegate = None
106
  self._timm_transform = None
107
  self._torchvision_transform = None
@@ -110,7 +107,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
110
 
111
  if t == "timm_densenet":
112
  # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
113
- # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용합니다.
114
  self._timm_transform = self._build_timm_transform(
115
  backbone_id=self.backbone_name_or_path,
116
  is_training=self.is_training,
@@ -119,17 +116,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
119
 
120
  if t == "torchvision_densenet":
121
  # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
122
- # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요합니다.
123
  self._torchvision_transform = self._build_torchvision_densenet_transform(
124
  is_training=self.is_training
125
  )
126
  return
127
 
128
  # Default: transformers backbone delegates to its official AutoImageProcessor.
129
- # 기본: transformers 백본은 공식 AutoImageProcessor에 위임합니다.
130
  #
131
  # IMPORTANT:
132
- # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달합니다.
133
  self._delegate = AutoImageProcessor.from_pretrained(
134
  self.backbone_name_or_path,
135
  use_fast=self.use_fast,
@@ -140,7 +137,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
140
  def _build_timm_transform(*, backbone_id: str, is_training: bool):
141
  """
142
  Create timm transform without storing non-serializable objects in config.
143
- 비직렬화 객체를 config에 저장하지 않고 timm transform을 생성합니다.
144
  """
145
  try:
146
  import timm
@@ -151,20 +148,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
151
  ) from e
152
 
153
  # We only need model metadata to resolve data config, so pretrained=False is preferred.
154
- # data config 추출만 필요하므로 pretrained=False를 우선 사용합니다.
155
  m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
156
  dc = resolve_model_data_config(m)
157
 
158
  # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
159
- # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환합니다.
160
- tfm = create_transform(**dc, is_training=is_training)
161
  return tfm
162
 
163
  @staticmethod
164
  def _build_torchvision_densenet_transform(*, is_training: bool):
165
  """
166
  Build torchvision preprocessing for DenseNet-121 (224 pipeline).
167
- DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성합니다.
168
  """
169
  try:
170
  from torchvision import transforms
@@ -174,28 +171,29 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
174
  ) from e
175
 
176
  # These are the standard ImageNet normalization stats used by torchvision weights.
177
- # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계입니다.
178
  mean = (0.485, 0.456, 0.406)
179
- std = (0.229, 0.224, 0.225)
180
 
181
  # Training pipeline typically uses RandomResizedCrop and horizontal flip.
182
- # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용합니다.
183
  if is_training:
184
  return transforms.Compose(
185
  [
186
- transforms.RandomResizedCrop(224),
187
- transforms.RandomHorizontalFlip(p=0.5),
 
188
  transforms.ToTensor(),
189
  transforms.Normalize(mean=mean, std=std),
190
  ]
191
  )
192
 
193
  # Inference pipeline typically uses Resize(256) + CenterCrop(224).
194
- # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용합니다.
195
  return transforms.Compose(
196
  [
197
  transforms.Resize(256),
198
- transforms.CenterCrop(224),
199
  transforms.ToTensor(),
200
  transforms.Normalize(mean=mean, std=std),
201
  ]
@@ -208,24 +206,24 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
208
  def to_dict(self) -> dict[str, Any]:
209
  """
210
  Return a JSON-serializable dict for preprocessor_config.json.
211
- preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환합니다.
212
 
213
  Important: do not leak runtime objects into the serialized dict.
214
- 중요: 런타임 객체가 직렬화 dict에 섞이면 안 됩니다.
215
  """
216
  # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
217
  # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
218
  d = super().to_dict()
219
 
220
  # Force minimal stable fields for long-term compatibility.
221
- # 장기 호환을 위해 최소 안정 필드를 강제합니다.
222
- d["image_processor_type"] = self.__class__.__name__
223
  d["backbone_name_or_path"] = self.backbone_name_or_path
224
  d["is_training"] = self.is_training
225
- d["use_fast"] = self.use_fast
226
 
227
  # Remove any runtime-only fields defensively.
228
- # 런타임 전용 필드는 보수적으로 제거합니다.
229
  for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
230
  d.pop(key, None)
231
 
@@ -235,14 +233,14 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
235
  def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
236
  """
237
  Standard load path used by BaseImageProcessor / AutoImageProcessor.
238
- BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로입니다.
239
  """
240
  backbone = image_processor_dict.get("backbone_name_or_path", None)
241
  if backbone is None:
242
  raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
243
 
244
  is_training = bool(image_processor_dict.get("is_training", False))
245
- use_fast = bool(image_processor_dict.get("use_fast", False))
246
 
247
  return cls(
248
  backbone_name_or_path=backbone,
@@ -255,20 +253,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
255
  def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
256
  """
257
  Fallback path if AutoImageProcessor calls class.from_pretrained directly.
258
- AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 경로입니다.
259
 
260
  Strategy:
261
  전략:
262
 
263
  - Read config.json via AutoConfig and recover backbone_name_or_path.
264
- AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구합니다.
265
  """
266
 
267
  # is_training is runtime-only and should default to False for inference/serving.
268
- # is_training은 런타임 전용이며 추론/서빙 기본값은 False 맞습니다.
269
  #
270
  # IMPORTANT:
271
- # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영합니다.
272
  use_fast = bool(kwargs.pop("use_fast", False))
273
 
274
  kwargs.pop("trust_remote_code", None)
@@ -289,7 +287,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
289
  @staticmethod
290
  def _ensure_list(images: Any) -> list[Any]:
291
  # Normalize scalar image input to a list for uniform processing.
292
- # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용합니다.
293
  if isinstance(images, (list, tuple)):
294
  return list(images)
295
  return [images]
@@ -297,7 +295,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
297
  @staticmethod
298
  def _to_pil_rgb(x: Any):
299
  # Convert common image inputs into PIL RGB images.
300
- # 일반적인 입력을 PIL RGB 이미지로 변환합니다.
301
  from PIL import Image as PILImage
302
 
303
  if isinstance(x, PILImage.Image):
@@ -314,17 +312,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
314
  ) -> dict[str, Any]:
315
  """
316
  Convert images into {"pixel_values": Tensor/ndarray}.
317
- 이미지를 {"pixel_values": Tensor/ndarray}로 변환합니다.
318
  """
319
  images = self._ensure_list(images)
320
 
321
  # Rebuild runtime if needed (e.g., right after deserialization).
322
- # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성합니다.
323
  if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
324
  self._build_runtime()
325
 
326
  # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
327
- # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
328
  if self._timm_transform is not None:
329
  pv: list[torch.Tensor] = []
330
  for im in images:
@@ -337,7 +335,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
337
  return self._format_return(pixel_values, return_tensors)
338
 
339
  # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
340
- # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
341
  if self._torchvision_transform is not None:
342
  pv: list[torch.Tensor] = []
343
  for im in images:
@@ -350,7 +348,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
350
  return self._format_return(pixel_values, return_tensors)
351
 
352
  # transformers delegate path: rely on official processor behavior.
353
- # transformers 위임 경로: 공식 processor 동작을 그대로 사용합니다.
354
  if self._delegate is None:
355
  raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
356
 
@@ -360,7 +358,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
360
  def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
361
  """
362
  Format pixel_values according to return_tensors.
363
- return_tensors에 맞춰 pixel_values 반환 포맷을 맞춥니다.
364
  """
365
  if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
366
  return {"pixel_values": pixel_values}
@@ -370,6 +368,6 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
370
 
371
 
372
  # Register this processor for AutoImageProcessor resolution.
373
- # AutoImageProcessor 해석을 위해 이 processor를 등록합니다.
374
  if __name__ != "__main__":
375
  BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")
 
4
  # src/ds_proc.py
5
 
6
  # ============================================================
7
+ # ImageProcessor (AutoImageProcessor integration)
8
+ # ImageProcessor (AutoImageProcessor 연동)
9
  # ============================================================
10
 
11
  from typing import Any
 
27
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
28
  """
29
  This processor performs image preprocessing and outputs {"pixel_values": ...}.
30
+ 이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 반환함.
31
 
32
  Key requirements:
33
  핵심 요구사항:
34
 
35
  1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
36
+ save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 함.
 
37
  2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
38
+ 런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됨.
 
39
  3) Runtime objects are rebuilt at init/load time based on backbone meta.
40
+ 런타임 객체는 backbone meta에 따라 init/load 시점에 재구성.
 
41
  4) For reproducibility, use_fast must be explicitly persisted and honored on load.
42
+ 재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 함.
43
  """
44
 
45
  # HF vision models conventionally expect "pixel_values" as the primary input key.
46
+ # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대.
47
  model_input_names = ["pixel_values"]
48
 
49
  def __init__(
50
  self,
51
  backbone_name_or_path: BackboneID,
52
+ is_training: bool = False, # timm 에서 data augmentation 용.
53
  use_fast: bool = False,
54
  **kwargs,
55
  ):
56
  # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
57
+ # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리.
58
  super().__init__(**kwargs)
59
 
60
  # Enforce whitelist via BACKBONE_META to keep behavior stable.
61
+ # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제. - fast fail
62
  if backbone_name_or_path not in BACKBONE_META:
63
  raise ValueError(
64
  f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
 
66
  )
67
 
68
  # Serializable fields only: these should appear in preprocessor_config.json.
69
+ # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야
70
  self.backbone_name_or_path = backbone_name_or_path
71
  self.is_training = bool(is_training)
72
 
73
  # Reproducibility switch for transformers processors.
74
+ # transformers processor의 fast/slow 선택을 재현 가능하게 고정.
75
  self.use_fast = bool(use_fast)
76
 
77
  # Runtime-only fields: must never be serialized.
78
+ # 런타임 전용 필드: 절대 직렬화되면 안 됨.
79
  self._meta = None
80
+ self._delegate = None
81
+ self._timm_transform = None
82
  self._torchvision_transform = None
83
 
84
  # Build runtime objects according to backbone type.
85
+ # backbone type에 따라 런타임 객체를 구성.
86
  self._build_runtime()
87
 
88
  # ============================================================
 
92
  def _build_runtime(self):
93
  """
94
  Build runtime delegate/transform based on BACKBONE_META["type"].
95
+ BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성.
96
  """
97
  meta = BACKBONE_META[self.backbone_name_or_path]
98
  self._meta = meta
99
 
100
  # Always reset runtime fields before rebuilding.
101
+ # 재구성 전 런타임 필드는 항상 초기화.
102
  self._delegate = None
103
  self._timm_transform = None
104
  self._torchvision_transform = None
 
107
 
108
  if t == "timm_densenet":
109
  # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
110
+ # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용.
111
  self._timm_transform = self._build_timm_transform(
112
  backbone_id=self.backbone_name_or_path,
113
  is_training=self.is_training,
 
116
 
117
  if t == "torchvision_densenet":
118
  # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
119
+ # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요.
120
  self._torchvision_transform = self._build_torchvision_densenet_transform(
121
  is_training=self.is_training
122
  )
123
  return
124
 
125
  # Default: transformers backbone delegates to its official AutoImageProcessor.
126
+ # 기본: transformers 백본은 공식 AutoImageProcessor에 위임.
127
  #
128
  # IMPORTANT:
129
+ # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달.
130
  self._delegate = AutoImageProcessor.from_pretrained(
131
  self.backbone_name_or_path,
132
  use_fast=self.use_fast,
 
137
  def _build_timm_transform(*, backbone_id: str, is_training: bool):
138
  """
139
  Create timm transform without storing non-serializable objects in config.
140
+ 비직렬화 객체를 config에 저장하지 않고 timm transform을 생성.
141
  """
142
  try:
143
  import timm
 
148
  ) from e
149
 
150
  # We only need model metadata to resolve data config, so pretrained=False is preferred.
151
+ # data config 추출만 필요하므로 pretrained=False를 우선 사용.
152
  m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
153
  dc = resolve_model_data_config(m)
154
 
155
  # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
156
+ # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환.
157
+ tfm = create_transform(**dc, is_training=is_training) # is_training :Data Aug.
158
  return tfm
159
 
160
  @staticmethod
161
  def _build_torchvision_densenet_transform(*, is_training: bool):
162
  """
163
  Build torchvision preprocessing for DenseNet-121 (224 pipeline).
164
+ DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성.
165
  """
166
  try:
167
  from torchvision import transforms
 
171
  ) from e
172
 
173
  # These are the standard ImageNet normalization stats used by torchvision weights.
174
+ # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계.
175
  mean = (0.485, 0.456, 0.406)
176
+ std = (0.229, 0.224, 0.225)
177
 
178
  # Training pipeline typically uses RandomResizedCrop and horizontal flip.
179
+ # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용.
180
  if is_training:
181
  return transforms.Compose(
182
  [
183
+ # transforms.RandomResizedCrop(224),
184
+ # transforms.RandomHorizontalFlip(p=0.5),
185
+ transforms.Resize(224),
186
  transforms.ToTensor(),
187
  transforms.Normalize(mean=mean, std=std),
188
  ]
189
  )
190
 
191
  # Inference pipeline typically uses Resize(256) + CenterCrop(224).
192
+ # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용.
193
  return transforms.Compose(
194
  [
195
  transforms.Resize(256),
196
+ # transforms.CenterCrop(224),
197
  transforms.ToTensor(),
198
  transforms.Normalize(mean=mean, std=std),
199
  ]
 
206
  def to_dict(self) -> dict[str, Any]:
207
  """
208
  Return a JSON-serializable dict for preprocessor_config.json.
209
+ preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환.
210
 
211
  Important: do not leak runtime objects into the serialized dict.
212
+ 중요: 런타임 객체가 직렬화 dict에 섞이면 안 됨.
213
  """
214
  # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
215
  # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
216
  d = super().to_dict()
217
 
218
  # Force minimal stable fields for long-term compatibility.
219
+ # 장기 호환을 위해 최소 안정 필드를 강제로 지정.
220
+ d["image_processor_type"] = self.__class__.__name__
221
  d["backbone_name_or_path"] = self.backbone_name_or_path
222
  d["is_training"] = self.is_training
223
+ d["use_fast"] = self.use_fast
224
 
225
  # Remove any runtime-only fields defensively.
226
+ # 런타임 전용 필드는 보수적으로 제거.
227
  for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
228
  d.pop(key, None)
229
 
 
233
  def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
234
  """
235
  Standard load path used by BaseImageProcessor / AutoImageProcessor.
236
+ BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로임.
237
  """
238
  backbone = image_processor_dict.get("backbone_name_or_path", None)
239
  if backbone is None:
240
  raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
241
 
242
  is_training = bool(image_processor_dict.get("is_training", False))
243
+ use_fast = bool(image_processor_dict.get("use_fast", False))
244
 
245
  return cls(
246
  backbone_name_or_path=backbone,
 
253
  def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
254
  """
255
  Fallback path if AutoImageProcessor calls class.from_pretrained directly.
256
+ AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 메서드.
257
 
258
  Strategy:
259
  전략:
260
 
261
  - Read config.json via AutoConfig and recover backbone_name_or_path.
262
+ AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구.
263
  """
264
 
265
  # is_training is runtime-only and should default to False for inference/serving.
266
+ # is_training은 런타임 전용이며 추론/서빙 기본값은 False 임.
267
  #
268
  # IMPORTANT:
269
+ # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영.
270
  use_fast = bool(kwargs.pop("use_fast", False))
271
 
272
  kwargs.pop("trust_remote_code", None)
 
287
  @staticmethod
288
  def _ensure_list(images: Any) -> list[Any]:
289
  # Normalize scalar image input to a list for uniform processing.
290
+ # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용.
291
  if isinstance(images, (list, tuple)):
292
  return list(images)
293
  return [images]
 
295
  @staticmethod
296
  def _to_pil_rgb(x: Any):
297
  # Convert common image inputs into PIL RGB images.
298
+ # 일반적인 입력을 PIL RGB 이미지로 변환.
299
  from PIL import Image as PILImage
300
 
301
  if isinstance(x, PILImage.Image):
 
312
  ) -> dict[str, Any]:
313
  """
314
  Convert images into {"pixel_values": Tensor/ndarray}.
315
+ 이미지를 {"pixel_values": Tensor/ndarray}로 변환.
316
  """
317
  images = self._ensure_list(images)
318
 
319
  # Rebuild runtime if needed (e.g., right after deserialization).
320
+ # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성.
321
  if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
322
  self._build_runtime()
323
 
324
  # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
325
+ # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
326
  if self._timm_transform is not None:
327
  pv: list[torch.Tensor] = []
328
  for im in images:
 
335
  return self._format_return(pixel_values, return_tensors)
336
 
337
  # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
338
+ # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
339
  if self._torchvision_transform is not None:
340
  pv: list[torch.Tensor] = []
341
  for im in images:
 
348
  return self._format_return(pixel_values, return_tensors)
349
 
350
  # transformers delegate path: rely on official processor behavior.
351
+ # transformers 위임 경로: 공식 processor 동작을 그대로 사용.
352
  if self._delegate is None:
353
  raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
354
 
 
358
  def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
359
  """
360
  Format pixel_values according to return_tensors.
361
+ return_tensors에 맞춰 pixel_values 반환 포맷을 변환.
362
  """
363
  if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
364
  return {"pixel_values": pixel_values}
 
368
 
369
 
370
  # Register this processor for AutoImageProcessor resolution.
371
+ # AutoImageProcessor 해석을 위해 이 processor를 등록.
372
  if __name__ != "__main__":
373
  BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")
manifest_20260212_202546.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "timestamp": "20260212_202546",
3
+ "repo_id": "dsaint31/bb_mlp_224",
4
+ "revision": "main",
5
+ "tag": null,
6
+ "num_labels": 3,
7
+ "build_device": "mps",
8
+ "count": 6,
9
+ "items": [
10
+ {
11
+ "backbone": "google/vit-base-patch16-224",
12
+ "subdir": "models/google__vit-base-patch16-224",
13
+ "dirname": "google__vit-base-patch16-224"
14
+ },
15
+ {
16
+ "backbone": "microsoft/swin-tiny-patch4-window7-224",
17
+ "subdir": "models/microsoft__swin-tiny-patch4-window7-224",
18
+ "dirname": "microsoft__swin-tiny-patch4-window7-224"
19
+ },
20
+ {
21
+ "backbone": "microsoft/resnet-50",
22
+ "subdir": "models/microsoft__resnet-50",
23
+ "dirname": "microsoft__resnet-50"
24
+ },
25
+ {
26
+ "backbone": "google/efficientnet-b0",
27
+ "subdir": "models/google__efficientnet-b0",
28
+ "dirname": "google__efficientnet-b0"
29
+ },
30
+ {
31
+ "backbone": "timm/densenet121.tv_in1k",
32
+ "subdir": "models/timm__densenet121.tv_in1k",
33
+ "dirname": "timm__densenet121.tv_in1k"
34
+ },
35
+ {
36
+ "backbone": "torchvision/densenet121",
37
+ "subdir": "models/torchvision__densenet121",
38
+ "dirname": "torchvision__densenet121"
39
+ }
40
+ ],
41
+ "root_code_included": true,
42
+ "root_code_files": [
43
+ "ds_proc.py",
44
+ "ds_model.py",
45
+ "ds_cfg.py"
46
+ ],
47
+ "subfolder_code_included": true,
48
+ "subfolder_code_files": [
49
+ "ds_proc.py",
50
+ "ds_model.py",
51
+ "ds_cfg.py"
52
+ ]
53
+ }
models/google__efficientnet-b0/config.json CHANGED
@@ -24,7 +24,7 @@
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
- "created_at": "20260210_170738",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/google__efficientnet-b0",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
 
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
+ "created_at": "20260212_202546",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/google__efficientnet-b0",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
models/google__efficientnet-b0/ds_model.py CHANGED
@@ -94,14 +94,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
94
 
95
  def __init__(self, config: BackboneMLPHeadConfig):
96
  # PreTrainedModel expects a config object and stores it internally.
97
- # PreTrainedModel은 config 객체를 받아 내부에 저장하는 전제를 가집니다.
98
  super().__init__(config)
99
 
100
  # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
101
- # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않습니다.
102
  #
103
  # Note: Transformers may create configs with no args, but models are conventionally created with configs.
104
- # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성됩니다.
105
  if config.backbone_name_or_path is None:
106
  raise ValueError(
107
  "config.backbone_name_or_path is None. "
@@ -109,10 +109,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
109
  )
110
 
111
  # Fail-fast: training/inference requires a positive number of labels.
112
- # fail-fast: 학습/추론은 num_labels가 양수여야 합니다.
113
  #
114
  # Config may exist in a minimal form for internal serialization paths, but the model should not.
115
- # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 그러면 안 됩니다.
116
  if int(getattr(config, "num_labels", 0)) <= 0:
117
  raise ValueError(
118
  f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
@@ -120,17 +120,17 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
120
  )
121
 
122
  # Meta is a single source of truth for extraction and fine-tuning rules.
123
- # meta는 feature 추출 및 미세조정 규칙의 단일 기준입니다.
124
- # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
125
  # Prefer config.backbone_meta to keep Hub runtime self-contained.
126
  self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
127
 
128
  # Backbone skeleton is always created without pretrained weights.
129
- # backbone skeleton은 항상 pretrained weight 없이 생성합니다.
130
  self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
131
 
132
  # Head shape is driven by meta feat_dim and config.num_labels.
133
- # head shape은 meta의 feat_dim과 config.num_labels로 결정됩니다.
134
  self.classifier = MLPHead(
135
  in_dim=int(self._meta["feat_dim"]),
136
  num_labels=int(config.num_labels),
@@ -139,16 +139,20 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
139
  )
140
 
141
  # HF initialization hook, but we override init_weights to initialize head-only.
142
- # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화합니다.
143
  self.post_init()
144
 
145
  def init_weights(self):
146
  """
147
  Initialize only the head to avoid touching the backbone skeleton.
148
- backbone skeleton을 건드리지 않기 위해 head만 초기화합니다.
149
 
150
  HF's default init may traverse the entire module tree, which is undesirable here.
151
- HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서는 부적절합니다.
 
 
 
 
152
  """
153
  if getattr(self, "classifier", None) is not None:
154
  self.classifier.apply(self._init_weights)
@@ -160,7 +164,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
160
  # ----------------------------
161
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
162
  # Meta decides which loader path to use.
163
- # meta가 어떤 로더 경로를 사용할지 결정합니다.
164
  meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
165
  if meta is None:
166
  raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
@@ -174,14 +178,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
174
  return self._build_torchvision_densenet_skeleton(backbone_id)
175
 
176
  # For transformers backbones: build a random-weight skeleton from config only.
177
- # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성합니다.
178
  bb_cfg = AutoConfig.from_pretrained(backbone_id)
179
  return AutoModel.from_config(bb_cfg)
180
 
181
  @staticmethod
182
  def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
183
  # timm is an optional dependency and should be imported lazily.
184
- # timm은 옵션 의존성이므로 지연 import 합니다.
185
  try:
186
  import timm
187
  except Exception as e:
@@ -190,7 +194,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
190
  ) from e
191
 
192
  # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
193
- # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0)합니다.
194
  return timm.create_model(
195
  f"hf_hub:{hf_repo_id}",
196
  pretrained=False,
@@ -200,12 +204,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
200
  @staticmethod
201
  def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
202
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
203
- # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
204
  if model_id != "torchvision/densenet121":
205
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
206
 
207
  # Build structure only (weights=None) to avoid implicit pretrained loading.
208
- # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None)합니다.
209
  m = tv_models.densenet121(weights=None)
210
  return m
211
 
@@ -222,10 +226,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
222
  ):
223
  """
224
  Fresh-start only: inject pretrained backbone weights into the skeleton.
225
- fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입합니다.
226
 
227
  Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
228
- from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 절대 호출하면 안 됩니다.
229
  """
230
  bb = self.config.backbone_name_or_path
231
  meta = self._meta
@@ -240,7 +244,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
240
  return
241
 
242
  # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
243
- # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사합니다.
244
  ref = AutoModel.from_pretrained(
245
  bb,
246
  low_cpu_mem_usage=low_cpu_mem_usage,
@@ -248,18 +252,18 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
248
  )
249
 
250
  # strict=False is used to tolerate harmless key differences across minor versions.
251
- # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용합니다.
252
  self.backbone.load_state_dict(ref.state_dict(), strict=False)
253
  del ref
254
 
255
  @torch.no_grad()
256
  def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
257
  # timm must be present for timm backbones.
258
- # timm 백본에는 timm 설치가 필요합니다.
259
  import timm
260
 
261
  # Create a pretrained reference model and copy its weights strictly.
262
- # pretrained reference 모델을 만들고 가중치를 strict하게 복사합니다.
263
  ref = timm.create_model(
264
  f"hf_hub:{hf_repo_id}",
265
  pretrained=True,
@@ -272,12 +276,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
272
  @torch.no_grad()
273
  def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
274
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
275
- # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
276
  if model_id != "torchvision/densenet121":
277
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
278
 
279
  # Use torchvision's default pretrained weights for densenet121.
280
- # torchvision의 densenet121 기본 pretrained weights를 사용합니다.
281
  ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
282
 
283
  self.backbone.load_state_dict(ref.state_dict(), strict=True)
@@ -290,7 +294,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
290
  @staticmethod
291
  def _pool_or_gap(outputs) -> torch.Tensor:
292
  # Some transformers vision CNNs provide pooler_output explicitly.
293
- # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공합니다.
294
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
295
  x = outputs.pooler_output
296
  if x.dim() == 2:
@@ -300,7 +304,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
300
  raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
301
 
302
  # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
303
- # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용합니다.
304
  x = outputs.last_hidden_state
305
  if x.dim() == 4:
306
  return x.mean(dim=(2, 3))
@@ -312,29 +316,29 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
312
 
313
  def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
314
  # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
315
- # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적으로 유지되어야 합니다.
316
  rule = self._meta["feat_rule"]
317
 
318
  if rule == "cls":
319
  # ViT-style: use CLS token embedding from last_hidden_state.
320
- # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용합니다.
321
  return outputs.last_hidden_state[:, 0, :]
322
 
323
  if rule == "pool_or_mean":
324
  # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
325
- # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용합니다.
326
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
327
  return outputs.pooler_output
328
  return outputs.last_hidden_state.mean(dim=1)
329
 
330
  if rule == "pool_or_gap":
331
  # CNN-style: use pooler_output if present, else GAP over spatial dims.
332
- # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용합니다.
333
  return self._pool_or_gap(outputs)
334
 
335
  if rule == "timm_gap":
336
  # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
337
- # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 만듭니다.
338
  if not isinstance(outputs, torch.Tensor):
339
  raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
340
  if outputs.dim() != 4:
@@ -343,7 +347,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
343
 
344
  if rule == "torchvision_densenet_gap":
345
  # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
346
- # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요합니다.
347
  if not isinstance(outputs, torch.Tensor):
348
  raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
349
  if outputs.dim() != 4:
@@ -362,7 +366,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
362
  **kwargs,
363
  ):
364
  # Type decides the backbone forward path and output format.
365
- # type이 backbone forward 경로 및 출력 포맷을 결정합니다.
366
  t = self._meta["type"]
367
 
368
  if t == "timm_densenet":
@@ -394,7 +398,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
394
 
395
  else:
396
  # Transformers vision models are called with pixel_values and return ModelOutput.
397
- # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환합니다.
398
  outputs = self.backbone(
399
  pixel_values=pixel_values,
400
  output_attentions=output_attentions,
@@ -407,13 +411,13 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
407
  attentions = getattr(outputs, "attentions", None)
408
 
409
  # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
410
- # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환합니다.
411
  logits = self.classifier(feats)
412
 
413
  loss = None
414
  if labels is not None:
415
  # Cross entropy expects labels as class indices in [0, num_labels).
416
- # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대합니다.
417
  loss = F.cross_entropy(logits, labels)
418
 
419
  if not return_dict:
@@ -434,14 +438,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
434
  # ============================================================
435
  def _set_requires_grad(module: nn.Module, flag: bool):
436
  # Toggle requires_grad for all parameters in a module.
437
- # 모듈의 모든 파라미터에 대해 requires_grad를 토글합니다.
438
  for p in module.parameters():
439
  p.requires_grad = flag
440
 
441
 
442
  def set_bn_eval(module: nn.Module):
443
  # Put BatchNorm layers into eval mode to freeze running stats.
444
- # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정합니다.
445
  for m in module.modules():
446
  if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
447
  m.eval()
@@ -449,7 +453,7 @@ def set_bn_eval(module: nn.Module):
449
 
450
  def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
451
  # Stage1: freeze backbone and train only the head.
452
- # stage1: backbone을 freeze하고 head만 학습합니다.
453
  _set_requires_grad(model.backbone, False)
454
  _set_requires_grad(model.classifier, True)
455
 
@@ -460,7 +464,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
460
 
461
  def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
462
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
463
- # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있습니다.
464
  model.train()
465
  meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
466
  if keep_bn_eval and meta.get("has_bn", False):
@@ -469,7 +473,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
469
 
470
  def trainable_summary(model: nn.Module):
471
  # Print a compact summary of trainable parameters.
472
- # 학습 가능 파라미터 요약을 간단히 출력합니다.
473
  total = sum(p.numel() for p in model.parameters())
474
  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
475
  ratio = trainable / total if total > 0 else 0.0
@@ -483,7 +487,7 @@ def unfreeze_last_stage(
483
  keep_bn_eval: bool = True,
484
  ):
485
  # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
486
- # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 ���현합니다.
487
  freeze_backbone(model, freeze_bn=keep_bn_eval)
488
 
489
  n = int(last_n)
@@ -498,7 +502,7 @@ def unfreeze_last_stage(
498
 
499
  if bb_type == "vit":
500
  # ViT blocks live under backbone.encoder.layer in the transformers implementation.
501
- # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 있습니다.
502
  blocks = list(model.backbone.encoder.layer)
503
  for blk in blocks[-n:]:
504
  _set_requires_grad(blk, True)
@@ -506,7 +510,7 @@ def unfreeze_last_stage(
506
 
507
  if bb_type == "swin":
508
  # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
509
- # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze 합니다.
510
  stages = list(model.backbone.encoder.layers)
511
  blocks: List[nn.Module] = []
512
  for st in stages:
@@ -517,7 +521,7 @@ def unfreeze_last_stage(
517
 
518
  if bb_type == "resnet":
519
  # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
520
- # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze 합니다.
521
  bb = model.backbone
522
  for name in ("layer1", "layer2", "layer3", "layer4"):
523
  if not hasattr(bb, name):
@@ -538,7 +542,7 @@ def unfreeze_last_stage(
538
 
539
  if bb_type == "efficientnet":
540
  # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
541
- # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze 합니다.
542
  bb = model.backbone
543
  if not hasattr(bb, "features"):
544
  raise RuntimeError("Unexpected EfficientNet structure: missing features")
@@ -556,7 +560,7 @@ def unfreeze_last_stage(
556
 
557
  if bb_type in ("timm_densenet", "torchvision_densenet"):
558
  # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
559
- # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze 합니다.
560
  bb = model.backbone
561
  if not hasattr(bb, "features"):
562
  raise RuntimeError("Unexpected DenseNet: missing features")
@@ -575,7 +579,7 @@ def unfreeze_last_stage(
575
 
576
  def _denselayers(db: nn.Module) -> List[nn.Module]:
577
  # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
578
- # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환합니다.
579
  return list(db.children())
580
 
581
  blocks: List[nn.Module] = []
@@ -600,5 +604,5 @@ def unfreeze_last_stage(
600
  # register
601
  # -------------------------
602
  # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
603
- # from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록합니다.
604
  BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")
 
94
 
95
  def __init__(self, config: BackboneMLPHeadConfig):
96
  # PreTrainedModel expects a config object and stores it internally.
97
+ # PreTrainedModel은 config 객체를 받아 내부에 저장함.
98
  super().__init__(config)
99
 
100
  # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
101
+ # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않음 - fast fail.
102
  #
103
  # Note: Transformers may create configs with no args, but models are conventionally created with configs.
104
+ # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성.
105
  if config.backbone_name_or_path is None:
106
  raise ValueError(
107
  "config.backbone_name_or_path is None. "
 
109
  )
110
 
111
  # Fail-fast: training/inference requires a positive number of labels.
112
+ # fail-fast: 학습/추론은 num_labels가 양수여야 함.
113
  #
114
  # Config may exist in a minimal form for internal serialization paths, but the model should not.
115
+ # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 해당 없음.
116
  if int(getattr(config, "num_labels", 0)) <= 0:
117
  raise ValueError(
118
  f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
 
120
  )
121
 
122
  # Meta is a single source of truth for extraction and fine-tuning rules.
123
+ # meta는 feature 추출 및 미세조정 규칙의 단일 기준.
124
+ # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
125
  # Prefer config.backbone_meta to keep Hub runtime self-contained.
126
  self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
127
 
128
  # Backbone skeleton is always created without pretrained weights.
129
+ # backbone skeleton은 항상 pretrained weight 없이 생성.
130
  self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
131
 
132
  # Head shape is driven by meta feat_dim and config.num_labels.
133
+ # head shape은 meta의 feat_dim과 config.num_labels로 결정.
134
  self.classifier = MLPHead(
135
  in_dim=int(self._meta["feat_dim"]),
136
  num_labels=int(config.num_labels),
 
139
  )
140
 
141
  # HF initialization hook, but we override init_weights to initialize head-only.
142
+ # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화하도록 변경.
143
  self.post_init()
144
 
145
  def init_weights(self):
146
  """
147
  Initialize only the head to avoid touching the backbone skeleton.
148
+ backbone skeleton을 건드리지 않기 위해 head만 초기화.
149
 
150
  HF's default init may traverse the entire module tree, which is undesirable here.
151
+ HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서 그대로 사용하기 부적절.
152
+
153
+ 초기 설계에서 __init__ 내부에서 backbone의 가중치 로드를 수행함(편리를 위해).
154
+ 이 경우, HF의 post_init()으로 인해 해당 로드가 취소되는 경우가 존재(timm, torchvision 등의 백본).
155
+ 때문에 이를 오버라이드 하여 classifier만 초기화 하도록 변경함.
156
  """
157
  if getattr(self, "classifier", None) is not None:
158
  self.classifier.apply(self._init_weights)
 
164
  # ----------------------------
165
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
166
  # Meta decides which loader path to use.
167
+ # meta가 어떤 로더 경로를 사용할지 결정.
168
  meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
169
  if meta is None:
170
  raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
 
178
  return self._build_torchvision_densenet_skeleton(backbone_id)
179
 
180
  # For transformers backbones: build a random-weight skeleton from config only.
181
+ # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성.
182
  bb_cfg = AutoConfig.from_pretrained(backbone_id)
183
  return AutoModel.from_config(bb_cfg)
184
 
185
  @staticmethod
186
  def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
187
  # timm is an optional dependency and should be imported lazily.
188
+ # timm은 옵션 의존성이므로 지연 import 수행.
189
  try:
190
  import timm
191
  except Exception as e:
 
194
  ) from e
195
 
196
  # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
197
+ # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0).
198
  return timm.create_model(
199
  f"hf_hub:{hf_repo_id}",
200
  pretrained=False,
 
204
  @staticmethod
205
  def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
206
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
207
+ # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원.
208
  if model_id != "torchvision/densenet121":
209
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
210
 
211
  # Build structure only (weights=None) to avoid implicit pretrained loading.
212
+ # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None).
213
  m = tv_models.densenet121(weights=None)
214
  return m
215
 
 
226
  ):
227
  """
228
  Fresh-start only: inject pretrained backbone weights into the skeleton.
229
+ fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입.
230
 
231
  Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
232
+ from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 주의할 것.
233
  """
234
  bb = self.config.backbone_name_or_path
235
  meta = self._meta
 
244
  return
245
 
246
  # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
247
+ # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사.
248
  ref = AutoModel.from_pretrained(
249
  bb,
250
  low_cpu_mem_usage=low_cpu_mem_usage,
 
252
  )
253
 
254
  # strict=False is used to tolerate harmless key differences across minor versions.
255
+ # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용.
256
  self.backbone.load_state_dict(ref.state_dict(), strict=False)
257
  del ref
258
 
259
  @torch.no_grad()
260
  def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
261
  # timm must be present for timm backbones.
262
+ # timm 백본에��� timm 설치가 필요.
263
  import timm
264
 
265
  # Create a pretrained reference model and copy its weights strictly.
266
+ # pretrained reference 모델을 만들고 가중치를 strict하게 복사.
267
  ref = timm.create_model(
268
  f"hf_hub:{hf_repo_id}",
269
  pretrained=True,
 
276
  @torch.no_grad()
277
  def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
278
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
279
+ # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 지원.
280
  if model_id != "torchvision/densenet121":
281
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
282
 
283
  # Use torchvision's default pretrained weights for densenet121.
284
+ # torchvision의 densenet121 기본 pretrained weights를 사용.
285
  ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
286
 
287
  self.backbone.load_state_dict(ref.state_dict(), strict=True)
 
294
  @staticmethod
295
  def _pool_or_gap(outputs) -> torch.Tensor:
296
  # Some transformers vision CNNs provide pooler_output explicitly.
297
+ # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공.
298
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
299
  x = outputs.pooler_output
300
  if x.dim() == 2:
 
304
  raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
305
 
306
  # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
307
+ # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용.
308
  x = outputs.last_hidden_state
309
  if x.dim() == 4:
310
  return x.mean(dim=(2, 3))
 
316
 
317
  def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
318
  # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
319
+ # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적 동작을 위해 제한된 모델만 사용.
320
  rule = self._meta["feat_rule"]
321
 
322
  if rule == "cls":
323
  # ViT-style: use CLS token embedding from last_hidden_state.
324
+ # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용.
325
  return outputs.last_hidden_state[:, 0, :]
326
 
327
  if rule == "pool_or_mean":
328
  # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
329
+ # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용.
330
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
331
  return outputs.pooler_output
332
  return outputs.last_hidden_state.mean(dim=1)
333
 
334
  if rule == "pool_or_gap":
335
  # CNN-style: use pooler_output if present, else GAP over spatial dims.
336
+ # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용.
337
  return self._pool_or_gap(outputs)
338
 
339
  if rule == "timm_gap":
340
  # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
341
+ # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 변환.
342
  if not isinstance(outputs, torch.Tensor):
343
  raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
344
  if outputs.dim() != 4:
 
347
 
348
  if rule == "torchvision_densenet_gap":
349
  # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
350
+ # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요.
351
  if not isinstance(outputs, torch.Tensor):
352
  raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
353
  if outputs.dim() != 4:
 
366
  **kwargs,
367
  ):
368
  # Type decides the backbone forward path and output format.
369
+ # type이 backbone forward 경로 및 출력 포맷을 결정.
370
  t = self._meta["type"]
371
 
372
  if t == "timm_densenet":
 
398
 
399
  else:
400
  # Transformers vision models are called with pixel_values and return ModelOutput.
401
+ # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환.
402
  outputs = self.backbone(
403
  pixel_values=pixel_values,
404
  output_attentions=output_attentions,
 
411
  attentions = getattr(outputs, "attentions", None)
412
 
413
  # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
414
+ # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환.
415
  logits = self.classifier(feats)
416
 
417
  loss = None
418
  if labels is not None:
419
  # Cross entropy expects labels as class indices in [0, num_labels).
420
+ # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대함.
421
  loss = F.cross_entropy(logits, labels)
422
 
423
  if not return_dict:
 
438
  # ============================================================
439
  def _set_requires_grad(module: nn.Module, flag: bool):
440
  # Toggle requires_grad for all parameters in a module.
441
+ # 모듈의 모든 파라미터에 대해 requires_grad를 토글.
442
  for p in module.parameters():
443
  p.requires_grad = flag
444
 
445
 
446
  def set_bn_eval(module: nn.Module):
447
  # Put BatchNorm layers into eval mode to freeze running stats.
448
+ # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정.
449
  for m in module.modules():
450
  if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
451
  m.eval()
 
453
 
454
  def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
455
  # Stage1: freeze backbone and train only the head.
456
+ # stage1: backbone을 freeze하고 head만 학습.
457
  _set_requires_grad(model.backbone, False)
458
  _set_requires_grad(model.classifier, True)
459
 
 
464
 
465
  def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
466
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
467
+ # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있음. (buffer 등을 유지하기 위해)
468
  model.train()
469
  meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
470
  if keep_bn_eval and meta.get("has_bn", False):
 
473
 
474
  def trainable_summary(model: nn.Module):
475
  # Print a compact summary of trainable parameters.
476
+ # 학습 가능 파라미터 요약을 간단히 출력.
477
  total = sum(p.numel() for p in model.parameters())
478
  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
479
  ratio = trainable / total if total > 0 else 0.0
 
487
  keep_bn_eval: bool = True,
488
  ):
489
  # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
490
+ # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 구현.
491
  freeze_backbone(model, freeze_bn=keep_bn_eval)
492
 
493
  n = int(last_n)
 
502
 
503
  if bb_type == "vit":
504
  # ViT blocks live under backbone.encoder.layer in the transformers implementation.
505
+ # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 존재함.
506
  blocks = list(model.backbone.encoder.layer)
507
  for blk in blocks[-n:]:
508
  _set_requires_grad(blk, True)
 
510
 
511
  if bb_type == "swin":
512
  # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
513
+ # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze.
514
  stages = list(model.backbone.encoder.layers)
515
  blocks: List[nn.Module] = []
516
  for st in stages:
 
521
 
522
  if bb_type == "resnet":
523
  # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
524
+ # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze.
525
  bb = model.backbone
526
  for name in ("layer1", "layer2", "layer3", "layer4"):
527
  if not hasattr(bb, name):
 
542
 
543
  if bb_type == "efficientnet":
544
  # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
545
+ # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze.
546
  bb = model.backbone
547
  if not hasattr(bb, "features"):
548
  raise RuntimeError("Unexpected EfficientNet structure: missing features")
 
560
 
561
  if bb_type in ("timm_densenet", "torchvision_densenet"):
562
  # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
563
+ # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze.
564
  bb = model.backbone
565
  if not hasattr(bb, "features"):
566
  raise RuntimeError("Unexpected DenseNet: missing features")
 
579
 
580
  def _denselayers(db: nn.Module) -> List[nn.Module]:
581
  # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
582
+ # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환.
583
  return list(db.children())
584
 
585
  blocks: List[nn.Module] = []
 
604
  # register
605
  # -------------------------
606
  # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
607
+ # from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록.
608
  BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")
models/google__efficientnet-b0/ds_proc.py CHANGED
@@ -4,8 +4,8 @@
4
  # src/ds_proc.py
5
 
6
  # ============================================================
7
- # (4) ImageProcessor (AutoImageProcessor integration)
8
- # (4) ImageProcessor (AutoImageProcessor 연동)
9
  # ============================================================
10
 
11
  from typing import Any
@@ -27,41 +27,38 @@ except ImportError:
27
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
28
  """
29
  This processor performs image preprocessing and outputs {"pixel_values": ...}.
30
- 이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 출력합니다.
31
 
32
  Key requirements:
33
  핵심 요구사항:
34
 
35
  1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
36
- save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 합니다.
37
-
38
  2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
39
- 런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됩니다.
40
-
41
  3) Runtime objects are rebuilt at init/load time based on backbone meta.
42
- 런타임 객체는 backbone meta에 따라 init/load 시점에 재구성됩니다.
43
-
44
  4) For reproducibility, use_fast must be explicitly persisted and honored on load.
45
- 재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 합니다.
46
  """
47
 
48
  # HF vision models conventionally expect "pixel_values" as the primary input key.
49
- # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대합니다.
50
  model_input_names = ["pixel_values"]
51
 
52
  def __init__(
53
  self,
54
  backbone_name_or_path: BackboneID,
55
- is_training: bool = False,
56
  use_fast: bool = False,
57
  **kwargs,
58
  ):
59
  # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
60
- # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리합니다.
61
  super().__init__(**kwargs)
62
 
63
  # Enforce whitelist via BACKBONE_META to keep behavior stable.
64
- # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제합니다.
65
  if backbone_name_or_path not in BACKBONE_META:
66
  raise ValueError(
67
  f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
@@ -69,23 +66,23 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
69
  )
70
 
71
  # Serializable fields only: these should appear in preprocessor_config.json.
72
- # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 합니다.
73
  self.backbone_name_or_path = backbone_name_or_path
74
  self.is_training = bool(is_training)
75
 
76
  # Reproducibility switch for transformers processors.
77
- # transformers processor의 fast/slow 선택을 재현 가능하게 고정합니다.
78
  self.use_fast = bool(use_fast)
79
 
80
  # Runtime-only fields: must never be serialized.
81
- # 런타임 전용 필드: 절대 직렬화되면 안 됩니다.
82
  self._meta = None
83
- self._delegate = None
84
- self._timm_transform = None
85
  self._torchvision_transform = None
86
 
87
  # Build runtime objects according to backbone type.
88
- # backbone type에 따라 런타임 객체를 구성합니다.
89
  self._build_runtime()
90
 
91
  # ============================================================
@@ -95,13 +92,13 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
95
  def _build_runtime(self):
96
  """
97
  Build runtime delegate/transform based on BACKBONE_META["type"].
98
- BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성합니다.
99
  """
100
  meta = BACKBONE_META[self.backbone_name_or_path]
101
  self._meta = meta
102
 
103
  # Always reset runtime fields before rebuilding.
104
- # 재구성 전 런타임 필드는 항상 초기화합니다.
105
  self._delegate = None
106
  self._timm_transform = None
107
  self._torchvision_transform = None
@@ -110,7 +107,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
110
 
111
  if t == "timm_densenet":
112
  # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
113
- # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용합니다.
114
  self._timm_transform = self._build_timm_transform(
115
  backbone_id=self.backbone_name_or_path,
116
  is_training=self.is_training,
@@ -119,17 +116,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
119
 
120
  if t == "torchvision_densenet":
121
  # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
122
- # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요합니다.
123
  self._torchvision_transform = self._build_torchvision_densenet_transform(
124
  is_training=self.is_training
125
  )
126
  return
127
 
128
  # Default: transformers backbone delegates to its official AutoImageProcessor.
129
- # 기본: transformers 백본은 공식 AutoImageProcessor에 위임합니다.
130
  #
131
  # IMPORTANT:
132
- # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달합니다.
133
  self._delegate = AutoImageProcessor.from_pretrained(
134
  self.backbone_name_or_path,
135
  use_fast=self.use_fast,
@@ -140,7 +137,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
140
  def _build_timm_transform(*, backbone_id: str, is_training: bool):
141
  """
142
  Create timm transform without storing non-serializable objects in config.
143
- 비직렬화 객체를 config에 저장하지 않고 timm transform을 생성합니다.
144
  """
145
  try:
146
  import timm
@@ -151,20 +148,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
151
  ) from e
152
 
153
  # We only need model metadata to resolve data config, so pretrained=False is preferred.
154
- # data config 추출만 필요하므로 pretrained=False를 우선 사용합니다.
155
  m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
156
  dc = resolve_model_data_config(m)
157
 
158
  # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
159
- # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환합니다.
160
- tfm = create_transform(**dc, is_training=is_training)
161
  return tfm
162
 
163
  @staticmethod
164
  def _build_torchvision_densenet_transform(*, is_training: bool):
165
  """
166
  Build torchvision preprocessing for DenseNet-121 (224 pipeline).
167
- DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성합니다.
168
  """
169
  try:
170
  from torchvision import transforms
@@ -174,28 +171,29 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
174
  ) from e
175
 
176
  # These are the standard ImageNet normalization stats used by torchvision weights.
177
- # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계입니다.
178
  mean = (0.485, 0.456, 0.406)
179
- std = (0.229, 0.224, 0.225)
180
 
181
  # Training pipeline typically uses RandomResizedCrop and horizontal flip.
182
- # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용합니다.
183
  if is_training:
184
  return transforms.Compose(
185
  [
186
- transforms.RandomResizedCrop(224),
187
- transforms.RandomHorizontalFlip(p=0.5),
 
188
  transforms.ToTensor(),
189
  transforms.Normalize(mean=mean, std=std),
190
  ]
191
  )
192
 
193
  # Inference pipeline typically uses Resize(256) + CenterCrop(224).
194
- # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용합니다.
195
  return transforms.Compose(
196
  [
197
  transforms.Resize(256),
198
- transforms.CenterCrop(224),
199
  transforms.ToTensor(),
200
  transforms.Normalize(mean=mean, std=std),
201
  ]
@@ -208,24 +206,24 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
208
  def to_dict(self) -> dict[str, Any]:
209
  """
210
  Return a JSON-serializable dict for preprocessor_config.json.
211
- preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환합니다.
212
 
213
  Important: do not leak runtime objects into the serialized dict.
214
- 중요: 런타임 객체가 직렬화 dict에 섞이면 안 됩니다.
215
  """
216
  # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
217
  # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
218
  d = super().to_dict()
219
 
220
  # Force minimal stable fields for long-term compatibility.
221
- # 장기 호환을 위해 최소 안정 필드를 강제합니다.
222
- d["image_processor_type"] = self.__class__.__name__
223
  d["backbone_name_or_path"] = self.backbone_name_or_path
224
  d["is_training"] = self.is_training
225
- d["use_fast"] = self.use_fast
226
 
227
  # Remove any runtime-only fields defensively.
228
- # 런타임 전용 필드는 보수적으로 제거합니다.
229
  for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
230
  d.pop(key, None)
231
 
@@ -235,14 +233,14 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
235
  def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
236
  """
237
  Standard load path used by BaseImageProcessor / AutoImageProcessor.
238
- BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로입니다.
239
  """
240
  backbone = image_processor_dict.get("backbone_name_or_path", None)
241
  if backbone is None:
242
  raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
243
 
244
  is_training = bool(image_processor_dict.get("is_training", False))
245
- use_fast = bool(image_processor_dict.get("use_fast", False))
246
 
247
  return cls(
248
  backbone_name_or_path=backbone,
@@ -255,20 +253,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
255
  def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
256
  """
257
  Fallback path if AutoImageProcessor calls class.from_pretrained directly.
258
- AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 경로입니다.
259
 
260
  Strategy:
261
  전략:
262
 
263
  - Read config.json via AutoConfig and recover backbone_name_or_path.
264
- AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구합니다.
265
  """
266
 
267
  # is_training is runtime-only and should default to False for inference/serving.
268
- # is_training은 런타임 전용이며 추론/서빙 기본값은 False 맞습니다.
269
  #
270
  # IMPORTANT:
271
- # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영합니다.
272
  use_fast = bool(kwargs.pop("use_fast", False))
273
 
274
  kwargs.pop("trust_remote_code", None)
@@ -289,7 +287,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
289
  @staticmethod
290
  def _ensure_list(images: Any) -> list[Any]:
291
  # Normalize scalar image input to a list for uniform processing.
292
- # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용합니다.
293
  if isinstance(images, (list, tuple)):
294
  return list(images)
295
  return [images]
@@ -297,7 +295,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
297
  @staticmethod
298
  def _to_pil_rgb(x: Any):
299
  # Convert common image inputs into PIL RGB images.
300
- # 일반적인 입력을 PIL RGB 이미지로 변환합니다.
301
  from PIL import Image as PILImage
302
 
303
  if isinstance(x, PILImage.Image):
@@ -314,17 +312,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
314
  ) -> dict[str, Any]:
315
  """
316
  Convert images into {"pixel_values": Tensor/ndarray}.
317
- 이미지를 {"pixel_values": Tensor/ndarray}로 변환합니다.
318
  """
319
  images = self._ensure_list(images)
320
 
321
  # Rebuild runtime if needed (e.g., right after deserialization).
322
- # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성합니다.
323
  if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
324
  self._build_runtime()
325
 
326
  # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
327
- # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
328
  if self._timm_transform is not None:
329
  pv: list[torch.Tensor] = []
330
  for im in images:
@@ -337,7 +335,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
337
  return self._format_return(pixel_values, return_tensors)
338
 
339
  # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
340
- # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
341
  if self._torchvision_transform is not None:
342
  pv: list[torch.Tensor] = []
343
  for im in images:
@@ -350,7 +348,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
350
  return self._format_return(pixel_values, return_tensors)
351
 
352
  # transformers delegate path: rely on official processor behavior.
353
- # transformers 위임 경로: 공식 processor 동작을 그대로 사용합니다.
354
  if self._delegate is None:
355
  raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
356
 
@@ -360,7 +358,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
360
  def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
361
  """
362
  Format pixel_values according to return_tensors.
363
- return_tensors에 맞춰 pixel_values 반환 포맷을 맞춥니다.
364
  """
365
  if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
366
  return {"pixel_values": pixel_values}
@@ -370,6 +368,6 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
370
 
371
 
372
  # Register this processor for AutoImageProcessor resolution.
373
- # AutoImageProcessor 해석을 위해 이 processor를 등록합니다.
374
  if __name__ != "__main__":
375
  BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")
 
4
  # src/ds_proc.py
5
 
6
  # ============================================================
7
+ # ImageProcessor (AutoImageProcessor integration)
8
+ # ImageProcessor (AutoImageProcessor 연동)
9
  # ============================================================
10
 
11
  from typing import Any
 
27
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
28
  """
29
  This processor performs image preprocessing and outputs {"pixel_values": ...}.
30
+ 이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 반환함.
31
 
32
  Key requirements:
33
  핵심 요구사항:
34
 
35
  1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
36
+ save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 함.
 
37
  2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
38
+ 런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됨.
 
39
  3) Runtime objects are rebuilt at init/load time based on backbone meta.
40
+ 런타임 객체는 backbone meta에 따라 init/load 시점에 재구성.
 
41
  4) For reproducibility, use_fast must be explicitly persisted and honored on load.
42
+ 재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 함.
43
  """
44
 
45
  # HF vision models conventionally expect "pixel_values" as the primary input key.
46
+ # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대.
47
  model_input_names = ["pixel_values"]
48
 
49
  def __init__(
50
  self,
51
  backbone_name_or_path: BackboneID,
52
+ is_training: bool = False, # timm 에서 data augmentation 용.
53
  use_fast: bool = False,
54
  **kwargs,
55
  ):
56
  # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
57
+ # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리.
58
  super().__init__(**kwargs)
59
 
60
  # Enforce whitelist via BACKBONE_META to keep behavior stable.
61
+ # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제. - fast fail
62
  if backbone_name_or_path not in BACKBONE_META:
63
  raise ValueError(
64
  f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
 
66
  )
67
 
68
  # Serializable fields only: these should appear in preprocessor_config.json.
69
+ # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야
70
  self.backbone_name_or_path = backbone_name_or_path
71
  self.is_training = bool(is_training)
72
 
73
  # Reproducibility switch for transformers processors.
74
+ # transformers processor의 fast/slow 선택을 재현 가능하게 고정.
75
  self.use_fast = bool(use_fast)
76
 
77
  # Runtime-only fields: must never be serialized.
78
+ # 런타임 전용 필드: 절대 직렬화되면 안 됨.
79
  self._meta = None
80
+ self._delegate = None
81
+ self._timm_transform = None
82
  self._torchvision_transform = None
83
 
84
  # Build runtime objects according to backbone type.
85
+ # backbone type에 따라 런타임 객체를 구성.
86
  self._build_runtime()
87
 
88
  # ============================================================
 
92
  def _build_runtime(self):
93
  """
94
  Build runtime delegate/transform based on BACKBONE_META["type"].
95
+ BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성.
96
  """
97
  meta = BACKBONE_META[self.backbone_name_or_path]
98
  self._meta = meta
99
 
100
  # Always reset runtime fields before rebuilding.
101
+ # 재구성 전 런타임 필드는 항상 초기화.
102
  self._delegate = None
103
  self._timm_transform = None
104
  self._torchvision_transform = None
 
107
 
108
  if t == "timm_densenet":
109
  # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
110
+ # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용.
111
  self._timm_transform = self._build_timm_transform(
112
  backbone_id=self.backbone_name_or_path,
113
  is_training=self.is_training,
 
116
 
117
  if t == "torchvision_densenet":
118
  # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
119
+ # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요.
120
  self._torchvision_transform = self._build_torchvision_densenet_transform(
121
  is_training=self.is_training
122
  )
123
  return
124
 
125
  # Default: transformers backbone delegates to its official AutoImageProcessor.
126
+ # 기본: transformers 백본은 공식 AutoImageProcessor에 위임.
127
  #
128
  # IMPORTANT:
129
+ # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달.
130
  self._delegate = AutoImageProcessor.from_pretrained(
131
  self.backbone_name_or_path,
132
  use_fast=self.use_fast,
 
137
  def _build_timm_transform(*, backbone_id: str, is_training: bool):
138
  """
139
  Create timm transform without storing non-serializable objects in config.
140
+ 비직렬화 객체를 config에 저장하지 않고 timm transform을 생성.
141
  """
142
  try:
143
  import timm
 
148
  ) from e
149
 
150
  # We only need model metadata to resolve data config, so pretrained=False is preferred.
151
+ # data config 추출만 필요하므로 pretrained=False를 우선 사용.
152
  m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
153
  dc = resolve_model_data_config(m)
154
 
155
  # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
156
+ # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환.
157
+ tfm = create_transform(**dc, is_training=is_training) # is_training :Data Aug.
158
  return tfm
159
 
160
  @staticmethod
161
  def _build_torchvision_densenet_transform(*, is_training: bool):
162
  """
163
  Build torchvision preprocessing for DenseNet-121 (224 pipeline).
164
+ DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성.
165
  """
166
  try:
167
  from torchvision import transforms
 
171
  ) from e
172
 
173
  # These are the standard ImageNet normalization stats used by torchvision weights.
174
+ # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계.
175
  mean = (0.485, 0.456, 0.406)
176
+ std = (0.229, 0.224, 0.225)
177
 
178
  # Training pipeline typically uses RandomResizedCrop and horizontal flip.
179
+ # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용.
180
  if is_training:
181
  return transforms.Compose(
182
  [
183
+ # transforms.RandomResizedCrop(224),
184
+ # transforms.RandomHorizontalFlip(p=0.5),
185
+ transforms.Resize(224),
186
  transforms.ToTensor(),
187
  transforms.Normalize(mean=mean, std=std),
188
  ]
189
  )
190
 
191
  # Inference pipeline typically uses Resize(256) + CenterCrop(224).
192
+ # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용.
193
  return transforms.Compose(
194
  [
195
  transforms.Resize(256),
196
+ # transforms.CenterCrop(224),
197
  transforms.ToTensor(),
198
  transforms.Normalize(mean=mean, std=std),
199
  ]
 
206
  def to_dict(self) -> dict[str, Any]:
207
  """
208
  Return a JSON-serializable dict for preprocessor_config.json.
209
+ preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환.
210
 
211
  Important: do not leak runtime objects into the serialized dict.
212
+ 중요: 런타임 객체가 직렬화 dict에 섞이면 안 됨.
213
  """
214
  # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
215
  # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
216
  d = super().to_dict()
217
 
218
  # Force minimal stable fields for long-term compatibility.
219
+ # 장기 호환을 위해 최소 안정 필드를 강제로 지정.
220
+ d["image_processor_type"] = self.__class__.__name__
221
  d["backbone_name_or_path"] = self.backbone_name_or_path
222
  d["is_training"] = self.is_training
223
+ d["use_fast"] = self.use_fast
224
 
225
  # Remove any runtime-only fields defensively.
226
+ # 런타임 전용 필드는 보수적으로 제거.
227
  for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
228
  d.pop(key, None)
229
 
 
233
  def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
234
  """
235
  Standard load path used by BaseImageProcessor / AutoImageProcessor.
236
+ BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로임.
237
  """
238
  backbone = image_processor_dict.get("backbone_name_or_path", None)
239
  if backbone is None:
240
  raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
241
 
242
  is_training = bool(image_processor_dict.get("is_training", False))
243
+ use_fast = bool(image_processor_dict.get("use_fast", False))
244
 
245
  return cls(
246
  backbone_name_or_path=backbone,
 
253
  def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
254
  """
255
  Fallback path if AutoImageProcessor calls class.from_pretrained directly.
256
+ AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 메서드.
257
 
258
  Strategy:
259
  전략:
260
 
261
  - Read config.json via AutoConfig and recover backbone_name_or_path.
262
+ AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구.
263
  """
264
 
265
  # is_training is runtime-only and should default to False for inference/serving.
266
+ # is_training은 런타임 전용이며 추론/서빙 기본값은 False 임.
267
  #
268
  # IMPORTANT:
269
+ # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영.
270
  use_fast = bool(kwargs.pop("use_fast", False))
271
 
272
  kwargs.pop("trust_remote_code", None)
 
287
  @staticmethod
288
  def _ensure_list(images: Any) -> list[Any]:
289
  # Normalize scalar image input to a list for uniform processing.
290
+ # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용.
291
  if isinstance(images, (list, tuple)):
292
  return list(images)
293
  return [images]
 
295
  @staticmethod
296
  def _to_pil_rgb(x: Any):
297
  # Convert common image inputs into PIL RGB images.
298
+ # 일반적인 입력을 PIL RGB 이미지로 변환.
299
  from PIL import Image as PILImage
300
 
301
  if isinstance(x, PILImage.Image):
 
312
  ) -> dict[str, Any]:
313
  """
314
  Convert images into {"pixel_values": Tensor/ndarray}.
315
+ 이미지를 {"pixel_values": Tensor/ndarray}로 변환.
316
  """
317
  images = self._ensure_list(images)
318
 
319
  # Rebuild runtime if needed (e.g., right after deserialization).
320
+ # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성.
321
  if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
322
  self._build_runtime()
323
 
324
  # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
325
+ # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
326
  if self._timm_transform is not None:
327
  pv: list[torch.Tensor] = []
328
  for im in images:
 
335
  return self._format_return(pixel_values, return_tensors)
336
 
337
  # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
338
+ # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
339
  if self._torchvision_transform is not None:
340
  pv: list[torch.Tensor] = []
341
  for im in images:
 
348
  return self._format_return(pixel_values, return_tensors)
349
 
350
  # transformers delegate path: rely on official processor behavior.
351
+ # transformers 위임 경로: 공식 processor 동작을 그대로 사용.
352
  if self._delegate is None:
353
  raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
354
 
 
358
  def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
359
  """
360
  Format pixel_values according to return_tensors.
361
+ return_tensors에 맞춰 pixel_values 반환 포맷을 변환.
362
  """
363
  if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
364
  return {"pixel_values": pixel_values}
 
368
 
369
 
370
  # Register this processor for AutoImageProcessor resolution.
371
+ # AutoImageProcessor 해석을 위해 이 processor를 등록.
372
  if __name__ != "__main__":
373
  BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")
models/google__efficientnet-b0/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0069437f45965e8c4ac06831d6e34c4ba7603bf246b37eef446730ec04bf4e5d
3
  size 17558436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:865ceddfa7c4eb1c24844a2bea075ff25ee577fabb2886069b5478cd27bf6cac
3
  size 17558436
models/google__vit-base-patch16-224/config.json CHANGED
@@ -24,7 +24,7 @@
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
- "created_at": "20260210_170738",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/google__vit-base-patch16-224",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
 
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
+ "created_at": "20260212_202546",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/google__vit-base-patch16-224",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
models/google__vit-base-patch16-224/ds_model.py CHANGED
@@ -94,14 +94,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
94
 
95
  def __init__(self, config: BackboneMLPHeadConfig):
96
  # PreTrainedModel expects a config object and stores it internally.
97
- # PreTrainedModel은 config 객체를 받아 내부에 저장하는 전제를 가집니다.
98
  super().__init__(config)
99
 
100
  # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
101
- # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않습니다.
102
  #
103
  # Note: Transformers may create configs with no args, but models are conventionally created with configs.
104
- # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성됩니다.
105
  if config.backbone_name_or_path is None:
106
  raise ValueError(
107
  "config.backbone_name_or_path is None. "
@@ -109,10 +109,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
109
  )
110
 
111
  # Fail-fast: training/inference requires a positive number of labels.
112
- # fail-fast: 학습/추론은 num_labels가 양수여야 합니다.
113
  #
114
  # Config may exist in a minimal form for internal serialization paths, but the model should not.
115
- # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 그러면 안 됩니다.
116
  if int(getattr(config, "num_labels", 0)) <= 0:
117
  raise ValueError(
118
  f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
@@ -120,17 +120,17 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
120
  )
121
 
122
  # Meta is a single source of truth for extraction and fine-tuning rules.
123
- # meta는 feature 추출 및 미세조정 규칙의 단일 기준입니다.
124
- # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
125
  # Prefer config.backbone_meta to keep Hub runtime self-contained.
126
  self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
127
 
128
  # Backbone skeleton is always created without pretrained weights.
129
- # backbone skeleton은 항상 pretrained weight 없이 생성합니다.
130
  self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
131
 
132
  # Head shape is driven by meta feat_dim and config.num_labels.
133
- # head shape은 meta의 feat_dim과 config.num_labels로 결정됩니다.
134
  self.classifier = MLPHead(
135
  in_dim=int(self._meta["feat_dim"]),
136
  num_labels=int(config.num_labels),
@@ -139,16 +139,20 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
139
  )
140
 
141
  # HF initialization hook, but we override init_weights to initialize head-only.
142
- # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화합니다.
143
  self.post_init()
144
 
145
  def init_weights(self):
146
  """
147
  Initialize only the head to avoid touching the backbone skeleton.
148
- backbone skeleton을 건드리지 않기 위해 head만 초기화합니다.
149
 
150
  HF's default init may traverse the entire module tree, which is undesirable here.
151
- HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서는 부적절합니다.
 
 
 
 
152
  """
153
  if getattr(self, "classifier", None) is not None:
154
  self.classifier.apply(self._init_weights)
@@ -160,7 +164,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
160
  # ----------------------------
161
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
162
  # Meta decides which loader path to use.
163
- # meta가 어떤 로더 경로를 사용할지 결정합니다.
164
  meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
165
  if meta is None:
166
  raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
@@ -174,14 +178,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
174
  return self._build_torchvision_densenet_skeleton(backbone_id)
175
 
176
  # For transformers backbones: build a random-weight skeleton from config only.
177
- # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성합니다.
178
  bb_cfg = AutoConfig.from_pretrained(backbone_id)
179
  return AutoModel.from_config(bb_cfg)
180
 
181
  @staticmethod
182
  def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
183
  # timm is an optional dependency and should be imported lazily.
184
- # timm은 옵션 의존성이므로 지연 import 합니다.
185
  try:
186
  import timm
187
  except Exception as e:
@@ -190,7 +194,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
190
  ) from e
191
 
192
  # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
193
- # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0)합니다.
194
  return timm.create_model(
195
  f"hf_hub:{hf_repo_id}",
196
  pretrained=False,
@@ -200,12 +204,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
200
  @staticmethod
201
  def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
202
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
203
- # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
204
  if model_id != "torchvision/densenet121":
205
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
206
 
207
  # Build structure only (weights=None) to avoid implicit pretrained loading.
208
- # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None)합니다.
209
  m = tv_models.densenet121(weights=None)
210
  return m
211
 
@@ -222,10 +226,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
222
  ):
223
  """
224
  Fresh-start only: inject pretrained backbone weights into the skeleton.
225
- fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입합니다.
226
 
227
  Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
228
- from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 절대 호출하면 안 됩니다.
229
  """
230
  bb = self.config.backbone_name_or_path
231
  meta = self._meta
@@ -240,7 +244,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
240
  return
241
 
242
  # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
243
- # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사합니다.
244
  ref = AutoModel.from_pretrained(
245
  bb,
246
  low_cpu_mem_usage=low_cpu_mem_usage,
@@ -248,18 +252,18 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
248
  )
249
 
250
  # strict=False is used to tolerate harmless key differences across minor versions.
251
- # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용합니다.
252
  self.backbone.load_state_dict(ref.state_dict(), strict=False)
253
  del ref
254
 
255
  @torch.no_grad()
256
  def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
257
  # timm must be present for timm backbones.
258
- # timm 백본에는 timm 설치가 필요합니다.
259
  import timm
260
 
261
  # Create a pretrained reference model and copy its weights strictly.
262
- # pretrained reference 모델을 만들고 가중치를 strict하게 복사합니다.
263
  ref = timm.create_model(
264
  f"hf_hub:{hf_repo_id}",
265
  pretrained=True,
@@ -272,12 +276,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
272
  @torch.no_grad()
273
  def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
274
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
275
- # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
276
  if model_id != "torchvision/densenet121":
277
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
278
 
279
  # Use torchvision's default pretrained weights for densenet121.
280
- # torchvision의 densenet121 기본 pretrained weights를 사용합니다.
281
  ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
282
 
283
  self.backbone.load_state_dict(ref.state_dict(), strict=True)
@@ -290,7 +294,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
290
  @staticmethod
291
  def _pool_or_gap(outputs) -> torch.Tensor:
292
  # Some transformers vision CNNs provide pooler_output explicitly.
293
- # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공합니다.
294
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
295
  x = outputs.pooler_output
296
  if x.dim() == 2:
@@ -300,7 +304,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
300
  raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
301
 
302
  # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
303
- # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용합니다.
304
  x = outputs.last_hidden_state
305
  if x.dim() == 4:
306
  return x.mean(dim=(2, 3))
@@ -312,29 +316,29 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
312
 
313
  def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
314
  # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
315
- # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적으로 유지되어야 합니다.
316
  rule = self._meta["feat_rule"]
317
 
318
  if rule == "cls":
319
  # ViT-style: use CLS token embedding from last_hidden_state.
320
- # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용합니다.
321
  return outputs.last_hidden_state[:, 0, :]
322
 
323
  if rule == "pool_or_mean":
324
  # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
325
- # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용합니다.
326
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
327
  return outputs.pooler_output
328
  return outputs.last_hidden_state.mean(dim=1)
329
 
330
  if rule == "pool_or_gap":
331
  # CNN-style: use pooler_output if present, else GAP over spatial dims.
332
- # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용합니다.
333
  return self._pool_or_gap(outputs)
334
 
335
  if rule == "timm_gap":
336
  # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
337
- # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 만듭니다.
338
  if not isinstance(outputs, torch.Tensor):
339
  raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
340
  if outputs.dim() != 4:
@@ -343,7 +347,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
343
 
344
  if rule == "torchvision_densenet_gap":
345
  # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
346
- # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요합니다.
347
  if not isinstance(outputs, torch.Tensor):
348
  raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
349
  if outputs.dim() != 4:
@@ -362,7 +366,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
362
  **kwargs,
363
  ):
364
  # Type decides the backbone forward path and output format.
365
- # type이 backbone forward 경로 및 출력 포맷을 결정합니다.
366
  t = self._meta["type"]
367
 
368
  if t == "timm_densenet":
@@ -394,7 +398,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
394
 
395
  else:
396
  # Transformers vision models are called with pixel_values and return ModelOutput.
397
- # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환합니다.
398
  outputs = self.backbone(
399
  pixel_values=pixel_values,
400
  output_attentions=output_attentions,
@@ -407,13 +411,13 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
407
  attentions = getattr(outputs, "attentions", None)
408
 
409
  # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
410
- # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환합니다.
411
  logits = self.classifier(feats)
412
 
413
  loss = None
414
  if labels is not None:
415
  # Cross entropy expects labels as class indices in [0, num_labels).
416
- # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대합니다.
417
  loss = F.cross_entropy(logits, labels)
418
 
419
  if not return_dict:
@@ -434,14 +438,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
434
  # ============================================================
435
  def _set_requires_grad(module: nn.Module, flag: bool):
436
  # Toggle requires_grad for all parameters in a module.
437
- # 모듈의 모든 파라미터에 대해 requires_grad를 토글합니다.
438
  for p in module.parameters():
439
  p.requires_grad = flag
440
 
441
 
442
  def set_bn_eval(module: nn.Module):
443
  # Put BatchNorm layers into eval mode to freeze running stats.
444
- # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정합니다.
445
  for m in module.modules():
446
  if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
447
  m.eval()
@@ -449,7 +453,7 @@ def set_bn_eval(module: nn.Module):
449
 
450
  def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
451
  # Stage1: freeze backbone and train only the head.
452
- # stage1: backbone을 freeze하고 head만 학습합니다.
453
  _set_requires_grad(model.backbone, False)
454
  _set_requires_grad(model.classifier, True)
455
 
@@ -460,7 +464,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
460
 
461
  def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
462
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
463
- # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있습니다.
464
  model.train()
465
  meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
466
  if keep_bn_eval and meta.get("has_bn", False):
@@ -469,7 +473,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
469
 
470
  def trainable_summary(model: nn.Module):
471
  # Print a compact summary of trainable parameters.
472
- # 학습 가능 파라미터 요약을 간단히 출력합니다.
473
  total = sum(p.numel() for p in model.parameters())
474
  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
475
  ratio = trainable / total if total > 0 else 0.0
@@ -483,7 +487,7 @@ def unfreeze_last_stage(
483
  keep_bn_eval: bool = True,
484
  ):
485
  # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
486
- # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 ���현합니다.
487
  freeze_backbone(model, freeze_bn=keep_bn_eval)
488
 
489
  n = int(last_n)
@@ -498,7 +502,7 @@ def unfreeze_last_stage(
498
 
499
  if bb_type == "vit":
500
  # ViT blocks live under backbone.encoder.layer in the transformers implementation.
501
- # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 있습니다.
502
  blocks = list(model.backbone.encoder.layer)
503
  for blk in blocks[-n:]:
504
  _set_requires_grad(blk, True)
@@ -506,7 +510,7 @@ def unfreeze_last_stage(
506
 
507
  if bb_type == "swin":
508
  # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
509
- # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze 합니다.
510
  stages = list(model.backbone.encoder.layers)
511
  blocks: List[nn.Module] = []
512
  for st in stages:
@@ -517,7 +521,7 @@ def unfreeze_last_stage(
517
 
518
  if bb_type == "resnet":
519
  # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
520
- # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze 합니다.
521
  bb = model.backbone
522
  for name in ("layer1", "layer2", "layer3", "layer4"):
523
  if not hasattr(bb, name):
@@ -538,7 +542,7 @@ def unfreeze_last_stage(
538
 
539
  if bb_type == "efficientnet":
540
  # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
541
- # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze 합니다.
542
  bb = model.backbone
543
  if not hasattr(bb, "features"):
544
  raise RuntimeError("Unexpected EfficientNet structure: missing features")
@@ -556,7 +560,7 @@ def unfreeze_last_stage(
556
 
557
  if bb_type in ("timm_densenet", "torchvision_densenet"):
558
  # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
559
- # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze 합니다.
560
  bb = model.backbone
561
  if not hasattr(bb, "features"):
562
  raise RuntimeError("Unexpected DenseNet: missing features")
@@ -575,7 +579,7 @@ def unfreeze_last_stage(
575
 
576
  def _denselayers(db: nn.Module) -> List[nn.Module]:
577
  # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
578
- # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환합니다.
579
  return list(db.children())
580
 
581
  blocks: List[nn.Module] = []
@@ -600,5 +604,5 @@ def unfreeze_last_stage(
600
  # register
601
  # -------------------------
602
  # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
603
- # from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록합니다.
604
  BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")
 
94
 
95
  def __init__(self, config: BackboneMLPHeadConfig):
96
  # PreTrainedModel expects a config object and stores it internally.
97
+ # PreTrainedModel은 config 객체를 받아 내부에 저장함.
98
  super().__init__(config)
99
 
100
  # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
101
+ # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않음 - fast fail.
102
  #
103
  # Note: Transformers may create configs with no args, but models are conventionally created with configs.
104
+ # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성.
105
  if config.backbone_name_or_path is None:
106
  raise ValueError(
107
  "config.backbone_name_or_path is None. "
 
109
  )
110
 
111
  # Fail-fast: training/inference requires a positive number of labels.
112
+ # fail-fast: 학습/추론은 num_labels가 양수여야 함.
113
  #
114
  # Config may exist in a minimal form for internal serialization paths, but the model should not.
115
+ # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 해당 없음.
116
  if int(getattr(config, "num_labels", 0)) <= 0:
117
  raise ValueError(
118
  f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
 
120
  )
121
 
122
  # Meta is a single source of truth for extraction and fine-tuning rules.
123
+ # meta는 feature 추출 및 미세조정 규칙의 단일 기준.
124
+ # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
125
  # Prefer config.backbone_meta to keep Hub runtime self-contained.
126
  self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
127
 
128
  # Backbone skeleton is always created without pretrained weights.
129
+ # backbone skeleton은 항상 pretrained weight 없이 생성.
130
  self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
131
 
132
  # Head shape is driven by meta feat_dim and config.num_labels.
133
+ # head shape은 meta의 feat_dim과 config.num_labels로 결정.
134
  self.classifier = MLPHead(
135
  in_dim=int(self._meta["feat_dim"]),
136
  num_labels=int(config.num_labels),
 
139
  )
140
 
141
  # HF initialization hook, but we override init_weights to initialize head-only.
142
+ # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화하도록 변경.
143
  self.post_init()
144
 
145
  def init_weights(self):
146
  """
147
  Initialize only the head to avoid touching the backbone skeleton.
148
+ backbone skeleton을 건드리지 않기 위해 head만 초기화.
149
 
150
  HF's default init may traverse the entire module tree, which is undesirable here.
151
+ HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서 그대로 사용하기 부적절.
152
+
153
+ 초기 설계에서 __init__ 내부에서 backbone의 가중치 로드를 수행함(편리를 위해).
154
+ 이 경우, HF의 post_init()으로 인해 해당 로드가 취소되는 경우가 존재(timm, torchvision 등의 백본).
155
+ 때문에 이를 오버라이드 하여 classifier만 초기화 하도록 변경함.
156
  """
157
  if getattr(self, "classifier", None) is not None:
158
  self.classifier.apply(self._init_weights)
 
164
  # ----------------------------
165
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
166
  # Meta decides which loader path to use.
167
+ # meta가 어떤 로더 경로를 사용할지 결정.
168
  meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
169
  if meta is None:
170
  raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
 
178
  return self._build_torchvision_densenet_skeleton(backbone_id)
179
 
180
  # For transformers backbones: build a random-weight skeleton from config only.
181
+ # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성.
182
  bb_cfg = AutoConfig.from_pretrained(backbone_id)
183
  return AutoModel.from_config(bb_cfg)
184
 
185
  @staticmethod
186
  def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
187
  # timm is an optional dependency and should be imported lazily.
188
+ # timm은 옵션 의존성이므로 지연 import 수행.
189
  try:
190
  import timm
191
  except Exception as e:
 
194
  ) from e
195
 
196
  # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
197
+ # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0).
198
  return timm.create_model(
199
  f"hf_hub:{hf_repo_id}",
200
  pretrained=False,
 
204
  @staticmethod
205
  def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
206
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
207
+ # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원.
208
  if model_id != "torchvision/densenet121":
209
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
210
 
211
  # Build structure only (weights=None) to avoid implicit pretrained loading.
212
+ # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None).
213
  m = tv_models.densenet121(weights=None)
214
  return m
215
 
 
226
  ):
227
  """
228
  Fresh-start only: inject pretrained backbone weights into the skeleton.
229
+ fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입.
230
 
231
  Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
232
+ from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 주의할 것.
233
  """
234
  bb = self.config.backbone_name_or_path
235
  meta = self._meta
 
244
  return
245
 
246
  # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
247
+ # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사.
248
  ref = AutoModel.from_pretrained(
249
  bb,
250
  low_cpu_mem_usage=low_cpu_mem_usage,
 
252
  )
253
 
254
  # strict=False is used to tolerate harmless key differences across minor versions.
255
+ # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용.
256
  self.backbone.load_state_dict(ref.state_dict(), strict=False)
257
  del ref
258
 
259
  @torch.no_grad()
260
  def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
261
  # timm must be present for timm backbones.
262
+ # timm 백본에��� timm 설치가 필요.
263
  import timm
264
 
265
  # Create a pretrained reference model and copy its weights strictly.
266
+ # pretrained reference 모델을 만들고 가중치를 strict하게 복사.
267
  ref = timm.create_model(
268
  f"hf_hub:{hf_repo_id}",
269
  pretrained=True,
 
276
  @torch.no_grad()
277
  def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
278
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
279
+ # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 지원.
280
  if model_id != "torchvision/densenet121":
281
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
282
 
283
  # Use torchvision's default pretrained weights for densenet121.
284
+ # torchvision의 densenet121 기본 pretrained weights를 사용.
285
  ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
286
 
287
  self.backbone.load_state_dict(ref.state_dict(), strict=True)
 
294
  @staticmethod
295
  def _pool_or_gap(outputs) -> torch.Tensor:
296
  # Some transformers vision CNNs provide pooler_output explicitly.
297
+ # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공.
298
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
299
  x = outputs.pooler_output
300
  if x.dim() == 2:
 
304
  raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
305
 
306
  # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
307
+ # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용.
308
  x = outputs.last_hidden_state
309
  if x.dim() == 4:
310
  return x.mean(dim=(2, 3))
 
316
 
317
  def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
318
  # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
319
+ # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적 동작을 위해 제한된 모델만 사용.
320
  rule = self._meta["feat_rule"]
321
 
322
  if rule == "cls":
323
  # ViT-style: use CLS token embedding from last_hidden_state.
324
+ # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용.
325
  return outputs.last_hidden_state[:, 0, :]
326
 
327
  if rule == "pool_or_mean":
328
  # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
329
+ # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용.
330
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
331
  return outputs.pooler_output
332
  return outputs.last_hidden_state.mean(dim=1)
333
 
334
  if rule == "pool_or_gap":
335
  # CNN-style: use pooler_output if present, else GAP over spatial dims.
336
+ # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용.
337
  return self._pool_or_gap(outputs)
338
 
339
  if rule == "timm_gap":
340
  # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
341
+ # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 변환.
342
  if not isinstance(outputs, torch.Tensor):
343
  raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
344
  if outputs.dim() != 4:
 
347
 
348
  if rule == "torchvision_densenet_gap":
349
  # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
350
+ # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요.
351
  if not isinstance(outputs, torch.Tensor):
352
  raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
353
  if outputs.dim() != 4:
 
366
  **kwargs,
367
  ):
368
  # Type decides the backbone forward path and output format.
369
+ # type이 backbone forward 경로 및 출력 포맷을 결정.
370
  t = self._meta["type"]
371
 
372
  if t == "timm_densenet":
 
398
 
399
  else:
400
  # Transformers vision models are called with pixel_values and return ModelOutput.
401
+ # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환.
402
  outputs = self.backbone(
403
  pixel_values=pixel_values,
404
  output_attentions=output_attentions,
 
411
  attentions = getattr(outputs, "attentions", None)
412
 
413
  # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
414
+ # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환.
415
  logits = self.classifier(feats)
416
 
417
  loss = None
418
  if labels is not None:
419
  # Cross entropy expects labels as class indices in [0, num_labels).
420
+ # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대함.
421
  loss = F.cross_entropy(logits, labels)
422
 
423
  if not return_dict:
 
438
  # ============================================================
439
  def _set_requires_grad(module: nn.Module, flag: bool):
440
  # Toggle requires_grad for all parameters in a module.
441
+ # 모듈의 모든 파라미터에 대해 requires_grad를 토글.
442
  for p in module.parameters():
443
  p.requires_grad = flag
444
 
445
 
446
  def set_bn_eval(module: nn.Module):
447
  # Put BatchNorm layers into eval mode to freeze running stats.
448
+ # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정.
449
  for m in module.modules():
450
  if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
451
  m.eval()
 
453
 
454
  def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
455
  # Stage1: freeze backbone and train only the head.
456
+ # stage1: backbone을 freeze하고 head만 학습.
457
  _set_requires_grad(model.backbone, False)
458
  _set_requires_grad(model.classifier, True)
459
 
 
464
 
465
  def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
466
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
467
+ # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있음. (buffer 등을 유지하기 위해)
468
  model.train()
469
  meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
470
  if keep_bn_eval and meta.get("has_bn", False):
 
473
 
474
  def trainable_summary(model: nn.Module):
475
  # Print a compact summary of trainable parameters.
476
+ # 학습 가능 파라미터 요약을 간단히 출력.
477
  total = sum(p.numel() for p in model.parameters())
478
  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
479
  ratio = trainable / total if total > 0 else 0.0
 
487
  keep_bn_eval: bool = True,
488
  ):
489
  # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
490
+ # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 구현.
491
  freeze_backbone(model, freeze_bn=keep_bn_eval)
492
 
493
  n = int(last_n)
 
502
 
503
  if bb_type == "vit":
504
  # ViT blocks live under backbone.encoder.layer in the transformers implementation.
505
+ # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 존재함.
506
  blocks = list(model.backbone.encoder.layer)
507
  for blk in blocks[-n:]:
508
  _set_requires_grad(blk, True)
 
510
 
511
  if bb_type == "swin":
512
  # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
513
+ # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze.
514
  stages = list(model.backbone.encoder.layers)
515
  blocks: List[nn.Module] = []
516
  for st in stages:
 
521
 
522
  if bb_type == "resnet":
523
  # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
524
+ # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze.
525
  bb = model.backbone
526
  for name in ("layer1", "layer2", "layer3", "layer4"):
527
  if not hasattr(bb, name):
 
542
 
543
  if bb_type == "efficientnet":
544
  # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
545
+ # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze.
546
  bb = model.backbone
547
  if not hasattr(bb, "features"):
548
  raise RuntimeError("Unexpected EfficientNet structure: missing features")
 
560
 
561
  if bb_type in ("timm_densenet", "torchvision_densenet"):
562
  # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
563
+ # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze.
564
  bb = model.backbone
565
  if not hasattr(bb, "features"):
566
  raise RuntimeError("Unexpected DenseNet: missing features")
 
579
 
580
  def _denselayers(db: nn.Module) -> List[nn.Module]:
581
  # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
582
+ # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환.
583
  return list(db.children())
584
 
585
  blocks: List[nn.Module] = []
 
604
  # register
605
  # -------------------------
606
  # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
607
+ # from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록.
608
  BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")
models/google__vit-base-patch16-224/ds_proc.py CHANGED
@@ -4,8 +4,8 @@
4
  # src/ds_proc.py
5
 
6
  # ============================================================
7
- # (4) ImageProcessor (AutoImageProcessor integration)
8
- # (4) ImageProcessor (AutoImageProcessor 연동)
9
  # ============================================================
10
 
11
  from typing import Any
@@ -27,41 +27,38 @@ except ImportError:
27
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
28
  """
29
  This processor performs image preprocessing and outputs {"pixel_values": ...}.
30
- 이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 출력합니다.
31
 
32
  Key requirements:
33
  핵심 요구사항:
34
 
35
  1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
36
- save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 합니다.
37
-
38
  2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
39
- 런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됩니다.
40
-
41
  3) Runtime objects are rebuilt at init/load time based on backbone meta.
42
- 런타임 객체는 backbone meta에 따라 init/load 시점에 재구성됩니다.
43
-
44
  4) For reproducibility, use_fast must be explicitly persisted and honored on load.
45
- 재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 합니다.
46
  """
47
 
48
  # HF vision models conventionally expect "pixel_values" as the primary input key.
49
- # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대합니다.
50
  model_input_names = ["pixel_values"]
51
 
52
  def __init__(
53
  self,
54
  backbone_name_or_path: BackboneID,
55
- is_training: bool = False,
56
  use_fast: bool = False,
57
  **kwargs,
58
  ):
59
  # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
60
- # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리합니다.
61
  super().__init__(**kwargs)
62
 
63
  # Enforce whitelist via BACKBONE_META to keep behavior stable.
64
- # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제합니다.
65
  if backbone_name_or_path not in BACKBONE_META:
66
  raise ValueError(
67
  f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
@@ -69,23 +66,23 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
69
  )
70
 
71
  # Serializable fields only: these should appear in preprocessor_config.json.
72
- # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 합니다.
73
  self.backbone_name_or_path = backbone_name_or_path
74
  self.is_training = bool(is_training)
75
 
76
  # Reproducibility switch for transformers processors.
77
- # transformers processor의 fast/slow 선택을 재현 가능하게 고정합니다.
78
  self.use_fast = bool(use_fast)
79
 
80
  # Runtime-only fields: must never be serialized.
81
- # 런타임 전용 필드: 절대 직렬화되면 안 됩니다.
82
  self._meta = None
83
- self._delegate = None
84
- self._timm_transform = None
85
  self._torchvision_transform = None
86
 
87
  # Build runtime objects according to backbone type.
88
- # backbone type에 따라 런타임 객체를 구성합니다.
89
  self._build_runtime()
90
 
91
  # ============================================================
@@ -95,13 +92,13 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
95
  def _build_runtime(self):
96
  """
97
  Build runtime delegate/transform based on BACKBONE_META["type"].
98
- BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성합니다.
99
  """
100
  meta = BACKBONE_META[self.backbone_name_or_path]
101
  self._meta = meta
102
 
103
  # Always reset runtime fields before rebuilding.
104
- # 재구성 전 런타임 필드는 항상 초기화합니다.
105
  self._delegate = None
106
  self._timm_transform = None
107
  self._torchvision_transform = None
@@ -110,7 +107,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
110
 
111
  if t == "timm_densenet":
112
  # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
113
- # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용합니다.
114
  self._timm_transform = self._build_timm_transform(
115
  backbone_id=self.backbone_name_or_path,
116
  is_training=self.is_training,
@@ -119,17 +116,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
119
 
120
  if t == "torchvision_densenet":
121
  # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
122
- # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요합니다.
123
  self._torchvision_transform = self._build_torchvision_densenet_transform(
124
  is_training=self.is_training
125
  )
126
  return
127
 
128
  # Default: transformers backbone delegates to its official AutoImageProcessor.
129
- # 기본: transformers 백본은 공식 AutoImageProcessor에 위임합니다.
130
  #
131
  # IMPORTANT:
132
- # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달합니다.
133
  self._delegate = AutoImageProcessor.from_pretrained(
134
  self.backbone_name_or_path,
135
  use_fast=self.use_fast,
@@ -140,7 +137,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
140
  def _build_timm_transform(*, backbone_id: str, is_training: bool):
141
  """
142
  Create timm transform without storing non-serializable objects in config.
143
- 비직렬화 객체를 config에 저장하지 않고 timm transform을 생성합니다.
144
  """
145
  try:
146
  import timm
@@ -151,20 +148,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
151
  ) from e
152
 
153
  # We only need model metadata to resolve data config, so pretrained=False is preferred.
154
- # data config 추출만 필요하므로 pretrained=False를 우선 사용합니다.
155
  m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
156
  dc = resolve_model_data_config(m)
157
 
158
  # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
159
- # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환합니다.
160
- tfm = create_transform(**dc, is_training=is_training)
161
  return tfm
162
 
163
  @staticmethod
164
  def _build_torchvision_densenet_transform(*, is_training: bool):
165
  """
166
  Build torchvision preprocessing for DenseNet-121 (224 pipeline).
167
- DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성합니다.
168
  """
169
  try:
170
  from torchvision import transforms
@@ -174,28 +171,29 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
174
  ) from e
175
 
176
  # These are the standard ImageNet normalization stats used by torchvision weights.
177
- # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계입니다.
178
  mean = (0.485, 0.456, 0.406)
179
- std = (0.229, 0.224, 0.225)
180
 
181
  # Training pipeline typically uses RandomResizedCrop and horizontal flip.
182
- # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용합니다.
183
  if is_training:
184
  return transforms.Compose(
185
  [
186
- transforms.RandomResizedCrop(224),
187
- transforms.RandomHorizontalFlip(p=0.5),
 
188
  transforms.ToTensor(),
189
  transforms.Normalize(mean=mean, std=std),
190
  ]
191
  )
192
 
193
  # Inference pipeline typically uses Resize(256) + CenterCrop(224).
194
- # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용합니다.
195
  return transforms.Compose(
196
  [
197
  transforms.Resize(256),
198
- transforms.CenterCrop(224),
199
  transforms.ToTensor(),
200
  transforms.Normalize(mean=mean, std=std),
201
  ]
@@ -208,24 +206,24 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
208
  def to_dict(self) -> dict[str, Any]:
209
  """
210
  Return a JSON-serializable dict for preprocessor_config.json.
211
- preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환합니다.
212
 
213
  Important: do not leak runtime objects into the serialized dict.
214
- 중요: 런타임 객체가 직렬화 dict에 섞이면 안 됩니다.
215
  """
216
  # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
217
  # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
218
  d = super().to_dict()
219
 
220
  # Force minimal stable fields for long-term compatibility.
221
- # 장기 호환을 위해 최소 안정 필드를 강제합니다.
222
- d["image_processor_type"] = self.__class__.__name__
223
  d["backbone_name_or_path"] = self.backbone_name_or_path
224
  d["is_training"] = self.is_training
225
- d["use_fast"] = self.use_fast
226
 
227
  # Remove any runtime-only fields defensively.
228
- # 런타임 전용 필드는 보수적으로 제거합니다.
229
  for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
230
  d.pop(key, None)
231
 
@@ -235,14 +233,14 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
235
  def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
236
  """
237
  Standard load path used by BaseImageProcessor / AutoImageProcessor.
238
- BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로입니다.
239
  """
240
  backbone = image_processor_dict.get("backbone_name_or_path", None)
241
  if backbone is None:
242
  raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
243
 
244
  is_training = bool(image_processor_dict.get("is_training", False))
245
- use_fast = bool(image_processor_dict.get("use_fast", False))
246
 
247
  return cls(
248
  backbone_name_or_path=backbone,
@@ -255,20 +253,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
255
  def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
256
  """
257
  Fallback path if AutoImageProcessor calls class.from_pretrained directly.
258
- AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 경로입니다.
259
 
260
  Strategy:
261
  전략:
262
 
263
  - Read config.json via AutoConfig and recover backbone_name_or_path.
264
- AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구합니다.
265
  """
266
 
267
  # is_training is runtime-only and should default to False for inference/serving.
268
- # is_training은 런타임 전용이며 추론/서빙 기본값은 False 맞습니다.
269
  #
270
  # IMPORTANT:
271
- # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영합니다.
272
  use_fast = bool(kwargs.pop("use_fast", False))
273
 
274
  kwargs.pop("trust_remote_code", None)
@@ -289,7 +287,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
289
  @staticmethod
290
  def _ensure_list(images: Any) -> list[Any]:
291
  # Normalize scalar image input to a list for uniform processing.
292
- # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용합니다.
293
  if isinstance(images, (list, tuple)):
294
  return list(images)
295
  return [images]
@@ -297,7 +295,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
297
  @staticmethod
298
  def _to_pil_rgb(x: Any):
299
  # Convert common image inputs into PIL RGB images.
300
- # 일반적인 입력을 PIL RGB 이미지로 변환합니다.
301
  from PIL import Image as PILImage
302
 
303
  if isinstance(x, PILImage.Image):
@@ -314,17 +312,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
314
  ) -> dict[str, Any]:
315
  """
316
  Convert images into {"pixel_values": Tensor/ndarray}.
317
- 이미지를 {"pixel_values": Tensor/ndarray}로 변환합니다.
318
  """
319
  images = self._ensure_list(images)
320
 
321
  # Rebuild runtime if needed (e.g., right after deserialization).
322
- # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성합니다.
323
  if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
324
  self._build_runtime()
325
 
326
  # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
327
- # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
328
  if self._timm_transform is not None:
329
  pv: list[torch.Tensor] = []
330
  for im in images:
@@ -337,7 +335,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
337
  return self._format_return(pixel_values, return_tensors)
338
 
339
  # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
340
- # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
341
  if self._torchvision_transform is not None:
342
  pv: list[torch.Tensor] = []
343
  for im in images:
@@ -350,7 +348,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
350
  return self._format_return(pixel_values, return_tensors)
351
 
352
  # transformers delegate path: rely on official processor behavior.
353
- # transformers 위임 경로: 공식 processor 동작을 그대로 사용합니다.
354
  if self._delegate is None:
355
  raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
356
 
@@ -360,7 +358,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
360
  def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
361
  """
362
  Format pixel_values according to return_tensors.
363
- return_tensors에 맞춰 pixel_values 반환 포맷을 맞춥니다.
364
  """
365
  if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
366
  return {"pixel_values": pixel_values}
@@ -370,6 +368,6 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
370
 
371
 
372
  # Register this processor for AutoImageProcessor resolution.
373
- # AutoImageProcessor 해석을 위해 이 processor를 등록합니다.
374
  if __name__ != "__main__":
375
  BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")
 
4
  # src/ds_proc.py
5
 
6
  # ============================================================
7
+ # ImageProcessor (AutoImageProcessor integration)
8
+ # ImageProcessor (AutoImageProcessor 연동)
9
  # ============================================================
10
 
11
  from typing import Any
 
27
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
28
  """
29
  This processor performs image preprocessing and outputs {"pixel_values": ...}.
30
+ 이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 반환함.
31
 
32
  Key requirements:
33
  핵심 요구사항:
34
 
35
  1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
36
+ save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 함.
 
37
  2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
38
+ 런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됨.
 
39
  3) Runtime objects are rebuilt at init/load time based on backbone meta.
40
+ 런타임 객체는 backbone meta에 따라 init/load 시점에 재구성.
 
41
  4) For reproducibility, use_fast must be explicitly persisted and honored on load.
42
+ 재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 함.
43
  """
44
 
45
  # HF vision models conventionally expect "pixel_values" as the primary input key.
46
+ # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대.
47
  model_input_names = ["pixel_values"]
48
 
49
  def __init__(
50
  self,
51
  backbone_name_or_path: BackboneID,
52
+ is_training: bool = False, # timm 에서 data augmentation 용.
53
  use_fast: bool = False,
54
  **kwargs,
55
  ):
56
  # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
57
+ # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리.
58
  super().__init__(**kwargs)
59
 
60
  # Enforce whitelist via BACKBONE_META to keep behavior stable.
61
+ # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제. - fast fail
62
  if backbone_name_or_path not in BACKBONE_META:
63
  raise ValueError(
64
  f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
 
66
  )
67
 
68
  # Serializable fields only: these should appear in preprocessor_config.json.
69
+ # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야
70
  self.backbone_name_or_path = backbone_name_or_path
71
  self.is_training = bool(is_training)
72
 
73
  # Reproducibility switch for transformers processors.
74
+ # transformers processor의 fast/slow 선택을 재현 가능하게 고정.
75
  self.use_fast = bool(use_fast)
76
 
77
  # Runtime-only fields: must never be serialized.
78
+ # 런타임 전용 필드: 절대 직렬화되면 안 됨.
79
  self._meta = None
80
+ self._delegate = None
81
+ self._timm_transform = None
82
  self._torchvision_transform = None
83
 
84
  # Build runtime objects according to backbone type.
85
+ # backbone type에 따라 런타임 객체를 구성.
86
  self._build_runtime()
87
 
88
  # ============================================================
 
92
  def _build_runtime(self):
93
  """
94
  Build runtime delegate/transform based on BACKBONE_META["type"].
95
+ BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성.
96
  """
97
  meta = BACKBONE_META[self.backbone_name_or_path]
98
  self._meta = meta
99
 
100
  # Always reset runtime fields before rebuilding.
101
+ # 재구성 전 런타임 필드는 항상 초기화.
102
  self._delegate = None
103
  self._timm_transform = None
104
  self._torchvision_transform = None
 
107
 
108
  if t == "timm_densenet":
109
  # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
110
+ # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용.
111
  self._timm_transform = self._build_timm_transform(
112
  backbone_id=self.backbone_name_or_path,
113
  is_training=self.is_training,
 
116
 
117
  if t == "torchvision_densenet":
118
  # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
119
+ # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요.
120
  self._torchvision_transform = self._build_torchvision_densenet_transform(
121
  is_training=self.is_training
122
  )
123
  return
124
 
125
  # Default: transformers backbone delegates to its official AutoImageProcessor.
126
+ # 기본: transformers 백본은 공식 AutoImageProcessor에 위임.
127
  #
128
  # IMPORTANT:
129
+ # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달.
130
  self._delegate = AutoImageProcessor.from_pretrained(
131
  self.backbone_name_or_path,
132
  use_fast=self.use_fast,
 
137
  def _build_timm_transform(*, backbone_id: str, is_training: bool):
138
  """
139
  Create timm transform without storing non-serializable objects in config.
140
+ 비직렬화 객체를 config에 저장하지 않고 timm transform을 생성.
141
  """
142
  try:
143
  import timm
 
148
  ) from e
149
 
150
  # We only need model metadata to resolve data config, so pretrained=False is preferred.
151
+ # data config 추출만 필요하므로 pretrained=False를 우선 사용.
152
  m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
153
  dc = resolve_model_data_config(m)
154
 
155
  # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
156
+ # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환.
157
+ tfm = create_transform(**dc, is_training=is_training) # is_training :Data Aug.
158
  return tfm
159
 
160
  @staticmethod
161
  def _build_torchvision_densenet_transform(*, is_training: bool):
162
  """
163
  Build torchvision preprocessing for DenseNet-121 (224 pipeline).
164
+ DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성.
165
  """
166
  try:
167
  from torchvision import transforms
 
171
  ) from e
172
 
173
  # These are the standard ImageNet normalization stats used by torchvision weights.
174
+ # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계.
175
  mean = (0.485, 0.456, 0.406)
176
+ std = (0.229, 0.224, 0.225)
177
 
178
  # Training pipeline typically uses RandomResizedCrop and horizontal flip.
179
+ # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용.
180
  if is_training:
181
  return transforms.Compose(
182
  [
183
+ # transforms.RandomResizedCrop(224),
184
+ # transforms.RandomHorizontalFlip(p=0.5),
185
+ transforms.Resize(224),
186
  transforms.ToTensor(),
187
  transforms.Normalize(mean=mean, std=std),
188
  ]
189
  )
190
 
191
  # Inference pipeline typically uses Resize(256) + CenterCrop(224).
192
+ # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용.
193
  return transforms.Compose(
194
  [
195
  transforms.Resize(256),
196
+ # transforms.CenterCrop(224),
197
  transforms.ToTensor(),
198
  transforms.Normalize(mean=mean, std=std),
199
  ]
 
206
  def to_dict(self) -> dict[str, Any]:
207
  """
208
  Return a JSON-serializable dict for preprocessor_config.json.
209
+ preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환.
210
 
211
  Important: do not leak runtime objects into the serialized dict.
212
+ 중요: 런타임 객체가 직렬화 dict에 섞이면 안 됨.
213
  """
214
  # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
215
  # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
216
  d = super().to_dict()
217
 
218
  # Force minimal stable fields for long-term compatibility.
219
+ # 장기 호환을 위해 최소 안정 필드를 강제로 지정.
220
+ d["image_processor_type"] = self.__class__.__name__
221
  d["backbone_name_or_path"] = self.backbone_name_or_path
222
  d["is_training"] = self.is_training
223
+ d["use_fast"] = self.use_fast
224
 
225
  # Remove any runtime-only fields defensively.
226
+ # 런타임 전용 필드는 보수적으로 제거.
227
  for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
228
  d.pop(key, None)
229
 
 
233
  def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
234
  """
235
  Standard load path used by BaseImageProcessor / AutoImageProcessor.
236
+ BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로임.
237
  """
238
  backbone = image_processor_dict.get("backbone_name_or_path", None)
239
  if backbone is None:
240
  raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
241
 
242
  is_training = bool(image_processor_dict.get("is_training", False))
243
+ use_fast = bool(image_processor_dict.get("use_fast", False))
244
 
245
  return cls(
246
  backbone_name_or_path=backbone,
 
253
  def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
254
  """
255
  Fallback path if AutoImageProcessor calls class.from_pretrained directly.
256
+ AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 메서드.
257
 
258
  Strategy:
259
  전략:
260
 
261
  - Read config.json via AutoConfig and recover backbone_name_or_path.
262
+ AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구.
263
  """
264
 
265
  # is_training is runtime-only and should default to False for inference/serving.
266
+ # is_training은 런타임 전용이며 추론/서빙 기본값은 False 임.
267
  #
268
  # IMPORTANT:
269
+ # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영.
270
  use_fast = bool(kwargs.pop("use_fast", False))
271
 
272
  kwargs.pop("trust_remote_code", None)
 
287
  @staticmethod
288
  def _ensure_list(images: Any) -> list[Any]:
289
  # Normalize scalar image input to a list for uniform processing.
290
+ # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용.
291
  if isinstance(images, (list, tuple)):
292
  return list(images)
293
  return [images]
 
295
  @staticmethod
296
  def _to_pil_rgb(x: Any):
297
  # Convert common image inputs into PIL RGB images.
298
+ # 일반적인 입력을 PIL RGB 이미지로 변환.
299
  from PIL import Image as PILImage
300
 
301
  if isinstance(x, PILImage.Image):
 
312
  ) -> dict[str, Any]:
313
  """
314
  Convert images into {"pixel_values": Tensor/ndarray}.
315
+ 이미지를 {"pixel_values": Tensor/ndarray}로 변환.
316
  """
317
  images = self._ensure_list(images)
318
 
319
  # Rebuild runtime if needed (e.g., right after deserialization).
320
+ # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성.
321
  if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
322
  self._build_runtime()
323
 
324
  # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
325
+ # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
326
  if self._timm_transform is not None:
327
  pv: list[torch.Tensor] = []
328
  for im in images:
 
335
  return self._format_return(pixel_values, return_tensors)
336
 
337
  # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
338
+ # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
339
  if self._torchvision_transform is not None:
340
  pv: list[torch.Tensor] = []
341
  for im in images:
 
348
  return self._format_return(pixel_values, return_tensors)
349
 
350
  # transformers delegate path: rely on official processor behavior.
351
+ # transformers 위임 경로: 공식 processor 동작을 그대로 사용.
352
  if self._delegate is None:
353
  raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
354
 
 
358
  def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
359
  """
360
  Format pixel_values according to return_tensors.
361
+ return_tensors에 맞춰 pixel_values 반환 포맷을 변환.
362
  """
363
  if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
364
  return {"pixel_values": pixel_values}
 
368
 
369
 
370
  # Register this processor for AutoImageProcessor resolution.
371
+ # AutoImageProcessor 해석을 위해 이 processor를 등록.
372
  if __name__ != "__main__":
373
  BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")
models/google__vit-base-patch16-224/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:911b24afbf6ed3a61687a5c67609c5a909dfd6f16d013bb1cad613aba5b3ebe4
3
  size 346372132
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ecc8942482f4acdd221a9490549b44ab8d697b3b170cacce759bfd0e215e5df
3
  size 346372132
models/microsoft__resnet-50/config.json CHANGED
@@ -24,7 +24,7 @@
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
- "created_at": "20260210_170738",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/microsoft__resnet-50",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
 
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
+ "created_at": "20260212_202546",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/microsoft__resnet-50",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
models/microsoft__resnet-50/ds_model.py CHANGED
@@ -94,14 +94,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
94
 
95
  def __init__(self, config: BackboneMLPHeadConfig):
96
  # PreTrainedModel expects a config object and stores it internally.
97
- # PreTrainedModel은 config 객체를 받아 내부에 저장하는 전제를 가집니다.
98
  super().__init__(config)
99
 
100
  # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
101
- # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않습니다.
102
  #
103
  # Note: Transformers may create configs with no args, but models are conventionally created with configs.
104
- # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성됩니다.
105
  if config.backbone_name_or_path is None:
106
  raise ValueError(
107
  "config.backbone_name_or_path is None. "
@@ -109,10 +109,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
109
  )
110
 
111
  # Fail-fast: training/inference requires a positive number of labels.
112
- # fail-fast: 학습/추론은 num_labels가 양수여야 합니다.
113
  #
114
  # Config may exist in a minimal form for internal serialization paths, but the model should not.
115
- # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 그러면 안 됩니다.
116
  if int(getattr(config, "num_labels", 0)) <= 0:
117
  raise ValueError(
118
  f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
@@ -120,17 +120,17 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
120
  )
121
 
122
  # Meta is a single source of truth for extraction and fine-tuning rules.
123
- # meta는 feature 추출 및 미세조정 규칙의 단일 기준입니다.
124
- # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
125
  # Prefer config.backbone_meta to keep Hub runtime self-contained.
126
  self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
127
 
128
  # Backbone skeleton is always created without pretrained weights.
129
- # backbone skeleton은 항상 pretrained weight 없이 생성합니다.
130
  self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
131
 
132
  # Head shape is driven by meta feat_dim and config.num_labels.
133
- # head shape은 meta의 feat_dim과 config.num_labels로 결정됩니다.
134
  self.classifier = MLPHead(
135
  in_dim=int(self._meta["feat_dim"]),
136
  num_labels=int(config.num_labels),
@@ -139,16 +139,20 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
139
  )
140
 
141
  # HF initialization hook, but we override init_weights to initialize head-only.
142
- # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화합니다.
143
  self.post_init()
144
 
145
  def init_weights(self):
146
  """
147
  Initialize only the head to avoid touching the backbone skeleton.
148
- backbone skeleton을 건드리지 않기 위해 head만 초기화합니다.
149
 
150
  HF's default init may traverse the entire module tree, which is undesirable here.
151
- HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서는 부적절합니다.
 
 
 
 
152
  """
153
  if getattr(self, "classifier", None) is not None:
154
  self.classifier.apply(self._init_weights)
@@ -160,7 +164,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
160
  # ----------------------------
161
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
162
  # Meta decides which loader path to use.
163
- # meta가 어떤 로더 경로를 사용할지 결정합니다.
164
  meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
165
  if meta is None:
166
  raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
@@ -174,14 +178,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
174
  return self._build_torchvision_densenet_skeleton(backbone_id)
175
 
176
  # For transformers backbones: build a random-weight skeleton from config only.
177
- # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성합니다.
178
  bb_cfg = AutoConfig.from_pretrained(backbone_id)
179
  return AutoModel.from_config(bb_cfg)
180
 
181
  @staticmethod
182
  def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
183
  # timm is an optional dependency and should be imported lazily.
184
- # timm은 옵션 의존성이므로 지연 import 합니다.
185
  try:
186
  import timm
187
  except Exception as e:
@@ -190,7 +194,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
190
  ) from e
191
 
192
  # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
193
- # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0)합니다.
194
  return timm.create_model(
195
  f"hf_hub:{hf_repo_id}",
196
  pretrained=False,
@@ -200,12 +204,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
200
  @staticmethod
201
  def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
202
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
203
- # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
204
  if model_id != "torchvision/densenet121":
205
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
206
 
207
  # Build structure only (weights=None) to avoid implicit pretrained loading.
208
- # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None)합니다.
209
  m = tv_models.densenet121(weights=None)
210
  return m
211
 
@@ -222,10 +226,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
222
  ):
223
  """
224
  Fresh-start only: inject pretrained backbone weights into the skeleton.
225
- fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입합니다.
226
 
227
  Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
228
- from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 절대 호출하면 안 됩니다.
229
  """
230
  bb = self.config.backbone_name_or_path
231
  meta = self._meta
@@ -240,7 +244,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
240
  return
241
 
242
  # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
243
- # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사합니다.
244
  ref = AutoModel.from_pretrained(
245
  bb,
246
  low_cpu_mem_usage=low_cpu_mem_usage,
@@ -248,18 +252,18 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
248
  )
249
 
250
  # strict=False is used to tolerate harmless key differences across minor versions.
251
- # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용합니다.
252
  self.backbone.load_state_dict(ref.state_dict(), strict=False)
253
  del ref
254
 
255
  @torch.no_grad()
256
  def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
257
  # timm must be present for timm backbones.
258
- # timm 백본에는 timm 설치가 필요합니다.
259
  import timm
260
 
261
  # Create a pretrained reference model and copy its weights strictly.
262
- # pretrained reference 모델을 만들고 가중치를 strict하게 복사합니다.
263
  ref = timm.create_model(
264
  f"hf_hub:{hf_repo_id}",
265
  pretrained=True,
@@ -272,12 +276,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
272
  @torch.no_grad()
273
  def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
274
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
275
- # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
276
  if model_id != "torchvision/densenet121":
277
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
278
 
279
  # Use torchvision's default pretrained weights for densenet121.
280
- # torchvision의 densenet121 기본 pretrained weights를 사용합니다.
281
  ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
282
 
283
  self.backbone.load_state_dict(ref.state_dict(), strict=True)
@@ -290,7 +294,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
290
  @staticmethod
291
  def _pool_or_gap(outputs) -> torch.Tensor:
292
  # Some transformers vision CNNs provide pooler_output explicitly.
293
- # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공합니다.
294
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
295
  x = outputs.pooler_output
296
  if x.dim() == 2:
@@ -300,7 +304,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
300
  raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
301
 
302
  # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
303
- # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용합니다.
304
  x = outputs.last_hidden_state
305
  if x.dim() == 4:
306
  return x.mean(dim=(2, 3))
@@ -312,29 +316,29 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
312
 
313
  def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
314
  # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
315
- # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적으로 유지되어야 합니다.
316
  rule = self._meta["feat_rule"]
317
 
318
  if rule == "cls":
319
  # ViT-style: use CLS token embedding from last_hidden_state.
320
- # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용합니다.
321
  return outputs.last_hidden_state[:, 0, :]
322
 
323
  if rule == "pool_or_mean":
324
  # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
325
- # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용합니다.
326
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
327
  return outputs.pooler_output
328
  return outputs.last_hidden_state.mean(dim=1)
329
 
330
  if rule == "pool_or_gap":
331
  # CNN-style: use pooler_output if present, else GAP over spatial dims.
332
- # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용합니다.
333
  return self._pool_or_gap(outputs)
334
 
335
  if rule == "timm_gap":
336
  # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
337
- # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 만듭니다.
338
  if not isinstance(outputs, torch.Tensor):
339
  raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
340
  if outputs.dim() != 4:
@@ -343,7 +347,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
343
 
344
  if rule == "torchvision_densenet_gap":
345
  # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
346
- # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요합니다.
347
  if not isinstance(outputs, torch.Tensor):
348
  raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
349
  if outputs.dim() != 4:
@@ -362,7 +366,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
362
  **kwargs,
363
  ):
364
  # Type decides the backbone forward path and output format.
365
- # type이 backbone forward 경로 및 출력 포맷을 결정합니다.
366
  t = self._meta["type"]
367
 
368
  if t == "timm_densenet":
@@ -394,7 +398,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
394
 
395
  else:
396
  # Transformers vision models are called with pixel_values and return ModelOutput.
397
- # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환합니다.
398
  outputs = self.backbone(
399
  pixel_values=pixel_values,
400
  output_attentions=output_attentions,
@@ -407,13 +411,13 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
407
  attentions = getattr(outputs, "attentions", None)
408
 
409
  # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
410
- # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환합니다.
411
  logits = self.classifier(feats)
412
 
413
  loss = None
414
  if labels is not None:
415
  # Cross entropy expects labels as class indices in [0, num_labels).
416
- # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대합니다.
417
  loss = F.cross_entropy(logits, labels)
418
 
419
  if not return_dict:
@@ -434,14 +438,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
434
  # ============================================================
435
  def _set_requires_grad(module: nn.Module, flag: bool):
436
  # Toggle requires_grad for all parameters in a module.
437
- # 모듈의 모든 파라미터에 대해 requires_grad를 토글합니다.
438
  for p in module.parameters():
439
  p.requires_grad = flag
440
 
441
 
442
  def set_bn_eval(module: nn.Module):
443
  # Put BatchNorm layers into eval mode to freeze running stats.
444
- # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정합니다.
445
  for m in module.modules():
446
  if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
447
  m.eval()
@@ -449,7 +453,7 @@ def set_bn_eval(module: nn.Module):
449
 
450
  def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
451
  # Stage1: freeze backbone and train only the head.
452
- # stage1: backbone을 freeze하고 head만 학습합니다.
453
  _set_requires_grad(model.backbone, False)
454
  _set_requires_grad(model.classifier, True)
455
 
@@ -460,7 +464,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
460
 
461
  def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
462
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
463
- # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있습니다.
464
  model.train()
465
  meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
466
  if keep_bn_eval and meta.get("has_bn", False):
@@ -469,7 +473,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
469
 
470
  def trainable_summary(model: nn.Module):
471
  # Print a compact summary of trainable parameters.
472
- # 학습 가능 파라미터 요약을 간단히 출력합니다.
473
  total = sum(p.numel() for p in model.parameters())
474
  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
475
  ratio = trainable / total if total > 0 else 0.0
@@ -483,7 +487,7 @@ def unfreeze_last_stage(
483
  keep_bn_eval: bool = True,
484
  ):
485
  # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
486
- # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 ���현합니다.
487
  freeze_backbone(model, freeze_bn=keep_bn_eval)
488
 
489
  n = int(last_n)
@@ -498,7 +502,7 @@ def unfreeze_last_stage(
498
 
499
  if bb_type == "vit":
500
  # ViT blocks live under backbone.encoder.layer in the transformers implementation.
501
- # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 있습니다.
502
  blocks = list(model.backbone.encoder.layer)
503
  for blk in blocks[-n:]:
504
  _set_requires_grad(blk, True)
@@ -506,7 +510,7 @@ def unfreeze_last_stage(
506
 
507
  if bb_type == "swin":
508
  # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
509
- # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze 합니다.
510
  stages = list(model.backbone.encoder.layers)
511
  blocks: List[nn.Module] = []
512
  for st in stages:
@@ -517,7 +521,7 @@ def unfreeze_last_stage(
517
 
518
  if bb_type == "resnet":
519
  # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
520
- # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze 합니다.
521
  bb = model.backbone
522
  for name in ("layer1", "layer2", "layer3", "layer4"):
523
  if not hasattr(bb, name):
@@ -538,7 +542,7 @@ def unfreeze_last_stage(
538
 
539
  if bb_type == "efficientnet":
540
  # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
541
- # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze 합니다.
542
  bb = model.backbone
543
  if not hasattr(bb, "features"):
544
  raise RuntimeError("Unexpected EfficientNet structure: missing features")
@@ -556,7 +560,7 @@ def unfreeze_last_stage(
556
 
557
  if bb_type in ("timm_densenet", "torchvision_densenet"):
558
  # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
559
- # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze 합니다.
560
  bb = model.backbone
561
  if not hasattr(bb, "features"):
562
  raise RuntimeError("Unexpected DenseNet: missing features")
@@ -575,7 +579,7 @@ def unfreeze_last_stage(
575
 
576
  def _denselayers(db: nn.Module) -> List[nn.Module]:
577
  # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
578
- # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환합니다.
579
  return list(db.children())
580
 
581
  blocks: List[nn.Module] = []
@@ -600,5 +604,5 @@ def unfreeze_last_stage(
600
  # register
601
  # -------------------------
602
  # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
603
- # from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록합니다.
604
  BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")
 
94
 
95
  def __init__(self, config: BackboneMLPHeadConfig):
96
  # PreTrainedModel expects a config object and stores it internally.
97
+ # PreTrainedModel은 config 객체를 받아 내부에 저장함.
98
  super().__init__(config)
99
 
100
  # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
101
+ # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않음 - fast fail.
102
  #
103
  # Note: Transformers may create configs with no args, but models are conventionally created with configs.
104
+ # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성.
105
  if config.backbone_name_or_path is None:
106
  raise ValueError(
107
  "config.backbone_name_or_path is None. "
 
109
  )
110
 
111
  # Fail-fast: training/inference requires a positive number of labels.
112
+ # fail-fast: 학습/추론은 num_labels가 양수여야 함.
113
  #
114
  # Config may exist in a minimal form for internal serialization paths, but the model should not.
115
+ # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 해당 없음.
116
  if int(getattr(config, "num_labels", 0)) <= 0:
117
  raise ValueError(
118
  f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
 
120
  )
121
 
122
  # Meta is a single source of truth for extraction and fine-tuning rules.
123
+ # meta는 feature 추출 및 미세조정 규칙의 단일 기준.
124
+ # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
125
  # Prefer config.backbone_meta to keep Hub runtime self-contained.
126
  self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
127
 
128
  # Backbone skeleton is always created without pretrained weights.
129
+ # backbone skeleton은 항상 pretrained weight 없이 생성.
130
  self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
131
 
132
  # Head shape is driven by meta feat_dim and config.num_labels.
133
+ # head shape은 meta의 feat_dim과 config.num_labels로 결정.
134
  self.classifier = MLPHead(
135
  in_dim=int(self._meta["feat_dim"]),
136
  num_labels=int(config.num_labels),
 
139
  )
140
 
141
  # HF initialization hook, but we override init_weights to initialize head-only.
142
+ # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화하도록 변경.
143
  self.post_init()
144
 
145
  def init_weights(self):
146
  """
147
  Initialize only the head to avoid touching the backbone skeleton.
148
+ backbone skeleton을 건드리지 않기 위해 head만 초기화.
149
 
150
  HF's default init may traverse the entire module tree, which is undesirable here.
151
+ HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서 그대로 사용하기 부적절.
152
+
153
+ 초기 설계에서 __init__ 내부에서 backbone의 가중치 로드를 수행함(편리를 위해).
154
+ 이 경우, HF의 post_init()으로 인해 해당 로드가 취소되는 경우가 존재(timm, torchvision 등의 백본).
155
+ 때문에 이를 오버라이드 하여 classifier만 초기화 하도록 변경함.
156
  """
157
  if getattr(self, "classifier", None) is not None:
158
  self.classifier.apply(self._init_weights)
 
164
  # ----------------------------
165
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
166
  # Meta decides which loader path to use.
167
+ # meta가 어떤 로더 경로를 사용할지 결정.
168
  meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
169
  if meta is None:
170
  raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
 
178
  return self._build_torchvision_densenet_skeleton(backbone_id)
179
 
180
  # For transformers backbones: build a random-weight skeleton from config only.
181
+ # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성.
182
  bb_cfg = AutoConfig.from_pretrained(backbone_id)
183
  return AutoModel.from_config(bb_cfg)
184
 
185
  @staticmethod
186
  def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
187
  # timm is an optional dependency and should be imported lazily.
188
+ # timm은 옵션 의존성이므로 지연 import 수행.
189
  try:
190
  import timm
191
  except Exception as e:
 
194
  ) from e
195
 
196
  # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
197
+ # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0).
198
  return timm.create_model(
199
  f"hf_hub:{hf_repo_id}",
200
  pretrained=False,
 
204
  @staticmethod
205
  def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
206
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
207
+ # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원.
208
  if model_id != "torchvision/densenet121":
209
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
210
 
211
  # Build structure only (weights=None) to avoid implicit pretrained loading.
212
+ # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None).
213
  m = tv_models.densenet121(weights=None)
214
  return m
215
 
 
226
  ):
227
  """
228
  Fresh-start only: inject pretrained backbone weights into the skeleton.
229
+ fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입.
230
 
231
  Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
232
+ from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 주의할 것.
233
  """
234
  bb = self.config.backbone_name_or_path
235
  meta = self._meta
 
244
  return
245
 
246
  # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
247
+ # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사.
248
  ref = AutoModel.from_pretrained(
249
  bb,
250
  low_cpu_mem_usage=low_cpu_mem_usage,
 
252
  )
253
 
254
  # strict=False is used to tolerate harmless key differences across minor versions.
255
+ # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용.
256
  self.backbone.load_state_dict(ref.state_dict(), strict=False)
257
  del ref
258
 
259
  @torch.no_grad()
260
  def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
261
  # timm must be present for timm backbones.
262
+ # timm 백본에��� timm 설치가 필요.
263
  import timm
264
 
265
  # Create a pretrained reference model and copy its weights strictly.
266
+ # pretrained reference 모델을 만들고 가중치를 strict하게 복사.
267
  ref = timm.create_model(
268
  f"hf_hub:{hf_repo_id}",
269
  pretrained=True,
 
276
  @torch.no_grad()
277
  def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
278
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
279
+ # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 지원.
280
  if model_id != "torchvision/densenet121":
281
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
282
 
283
  # Use torchvision's default pretrained weights for densenet121.
284
+ # torchvision의 densenet121 기본 pretrained weights를 사용.
285
  ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
286
 
287
  self.backbone.load_state_dict(ref.state_dict(), strict=True)
 
294
  @staticmethod
295
  def _pool_or_gap(outputs) -> torch.Tensor:
296
  # Some transformers vision CNNs provide pooler_output explicitly.
297
+ # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공.
298
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
299
  x = outputs.pooler_output
300
  if x.dim() == 2:
 
304
  raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
305
 
306
  # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
307
+ # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용.
308
  x = outputs.last_hidden_state
309
  if x.dim() == 4:
310
  return x.mean(dim=(2, 3))
 
316
 
317
  def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
318
  # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
319
+ # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적 동작을 위해 제한된 모델만 사용.
320
  rule = self._meta["feat_rule"]
321
 
322
  if rule == "cls":
323
  # ViT-style: use CLS token embedding from last_hidden_state.
324
+ # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용.
325
  return outputs.last_hidden_state[:, 0, :]
326
 
327
  if rule == "pool_or_mean":
328
  # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
329
+ # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용.
330
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
331
  return outputs.pooler_output
332
  return outputs.last_hidden_state.mean(dim=1)
333
 
334
  if rule == "pool_or_gap":
335
  # CNN-style: use pooler_output if present, else GAP over spatial dims.
336
+ # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용.
337
  return self._pool_or_gap(outputs)
338
 
339
  if rule == "timm_gap":
340
  # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
341
+ # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 변환.
342
  if not isinstance(outputs, torch.Tensor):
343
  raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
344
  if outputs.dim() != 4:
 
347
 
348
  if rule == "torchvision_densenet_gap":
349
  # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
350
+ # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요.
351
  if not isinstance(outputs, torch.Tensor):
352
  raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
353
  if outputs.dim() != 4:
 
366
  **kwargs,
367
  ):
368
  # Type decides the backbone forward path and output format.
369
+ # type이 backbone forward 경로 및 출력 포맷을 결정.
370
  t = self._meta["type"]
371
 
372
  if t == "timm_densenet":
 
398
 
399
  else:
400
  # Transformers vision models are called with pixel_values and return ModelOutput.
401
+ # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환.
402
  outputs = self.backbone(
403
  pixel_values=pixel_values,
404
  output_attentions=output_attentions,
 
411
  attentions = getattr(outputs, "attentions", None)
412
 
413
  # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
414
+ # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환.
415
  logits = self.classifier(feats)
416
 
417
  loss = None
418
  if labels is not None:
419
  # Cross entropy expects labels as class indices in [0, num_labels).
420
+ # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대함.
421
  loss = F.cross_entropy(logits, labels)
422
 
423
  if not return_dict:
 
438
  # ============================================================
439
  def _set_requires_grad(module: nn.Module, flag: bool):
440
  # Toggle requires_grad for all parameters in a module.
441
+ # 모듈의 모든 파라미터에 대해 requires_grad를 토글.
442
  for p in module.parameters():
443
  p.requires_grad = flag
444
 
445
 
446
  def set_bn_eval(module: nn.Module):
447
  # Put BatchNorm layers into eval mode to freeze running stats.
448
+ # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정.
449
  for m in module.modules():
450
  if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
451
  m.eval()
 
453
 
454
  def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
455
  # Stage1: freeze backbone and train only the head.
456
+ # stage1: backbone을 freeze하고 head만 학습.
457
  _set_requires_grad(model.backbone, False)
458
  _set_requires_grad(model.classifier, True)
459
 
 
464
 
465
  def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
466
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
467
+ # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있음. (buffer 등을 유지하기 위해)
468
  model.train()
469
  meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
470
  if keep_bn_eval and meta.get("has_bn", False):
 
473
 
474
  def trainable_summary(model: nn.Module):
475
  # Print a compact summary of trainable parameters.
476
+ # 학습 가능 파라미터 요약을 간단히 출력.
477
  total = sum(p.numel() for p in model.parameters())
478
  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
479
  ratio = trainable / total if total > 0 else 0.0
 
487
  keep_bn_eval: bool = True,
488
  ):
489
  # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
490
+ # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 구현.
491
  freeze_backbone(model, freeze_bn=keep_bn_eval)
492
 
493
  n = int(last_n)
 
502
 
503
  if bb_type == "vit":
504
  # ViT blocks live under backbone.encoder.layer in the transformers implementation.
505
+ # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 존재함.
506
  blocks = list(model.backbone.encoder.layer)
507
  for blk in blocks[-n:]:
508
  _set_requires_grad(blk, True)
 
510
 
511
  if bb_type == "swin":
512
  # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
513
+ # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze.
514
  stages = list(model.backbone.encoder.layers)
515
  blocks: List[nn.Module] = []
516
  for st in stages:
 
521
 
522
  if bb_type == "resnet":
523
  # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
524
+ # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze.
525
  bb = model.backbone
526
  for name in ("layer1", "layer2", "layer3", "layer4"):
527
  if not hasattr(bb, name):
 
542
 
543
  if bb_type == "efficientnet":
544
  # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
545
+ # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze.
546
  bb = model.backbone
547
  if not hasattr(bb, "features"):
548
  raise RuntimeError("Unexpected EfficientNet structure: missing features")
 
560
 
561
  if bb_type in ("timm_densenet", "torchvision_densenet"):
562
  # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
563
+ # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze.
564
  bb = model.backbone
565
  if not hasattr(bb, "features"):
566
  raise RuntimeError("Unexpected DenseNet: missing features")
 
579
 
580
  def _denselayers(db: nn.Module) -> List[nn.Module]:
581
  # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
582
+ # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환.
583
  return list(db.children())
584
 
585
  blocks: List[nn.Module] = []
 
604
  # register
605
  # -------------------------
606
  # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
607
+ # from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록.
608
  BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")
models/microsoft__resnet-50/ds_proc.py CHANGED
@@ -4,8 +4,8 @@
4
  # src/ds_proc.py
5
 
6
  # ============================================================
7
- # (4) ImageProcessor (AutoImageProcessor integration)
8
- # (4) ImageProcessor (AutoImageProcessor 연동)
9
  # ============================================================
10
 
11
  from typing import Any
@@ -27,41 +27,38 @@ except ImportError:
27
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
28
  """
29
  This processor performs image preprocessing and outputs {"pixel_values": ...}.
30
- 이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 출력합니다.
31
 
32
  Key requirements:
33
  핵심 요구사항:
34
 
35
  1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
36
- save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 합니다.
37
-
38
  2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
39
- 런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됩니다.
40
-
41
  3) Runtime objects are rebuilt at init/load time based on backbone meta.
42
- 런타임 객체는 backbone meta에 따라 init/load 시점에 재구성됩니다.
43
-
44
  4) For reproducibility, use_fast must be explicitly persisted and honored on load.
45
- 재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 합니다.
46
  """
47
 
48
  # HF vision models conventionally expect "pixel_values" as the primary input key.
49
- # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대합니다.
50
  model_input_names = ["pixel_values"]
51
 
52
  def __init__(
53
  self,
54
  backbone_name_or_path: BackboneID,
55
- is_training: bool = False,
56
  use_fast: bool = False,
57
  **kwargs,
58
  ):
59
  # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
60
- # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리합니다.
61
  super().__init__(**kwargs)
62
 
63
  # Enforce whitelist via BACKBONE_META to keep behavior stable.
64
- # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제합니다.
65
  if backbone_name_or_path not in BACKBONE_META:
66
  raise ValueError(
67
  f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
@@ -69,23 +66,23 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
69
  )
70
 
71
  # Serializable fields only: these should appear in preprocessor_config.json.
72
- # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 합니다.
73
  self.backbone_name_or_path = backbone_name_or_path
74
  self.is_training = bool(is_training)
75
 
76
  # Reproducibility switch for transformers processors.
77
- # transformers processor의 fast/slow 선택을 재현 가능하게 고정합니다.
78
  self.use_fast = bool(use_fast)
79
 
80
  # Runtime-only fields: must never be serialized.
81
- # 런타임 전용 필드: 절대 직렬화되면 안 됩니다.
82
  self._meta = None
83
- self._delegate = None
84
- self._timm_transform = None
85
  self._torchvision_transform = None
86
 
87
  # Build runtime objects according to backbone type.
88
- # backbone type에 따라 런타임 객체를 구성합니다.
89
  self._build_runtime()
90
 
91
  # ============================================================
@@ -95,13 +92,13 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
95
  def _build_runtime(self):
96
  """
97
  Build runtime delegate/transform based on BACKBONE_META["type"].
98
- BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성합니다.
99
  """
100
  meta = BACKBONE_META[self.backbone_name_or_path]
101
  self._meta = meta
102
 
103
  # Always reset runtime fields before rebuilding.
104
- # 재구성 전 런타임 필드는 항상 초기화합니다.
105
  self._delegate = None
106
  self._timm_transform = None
107
  self._torchvision_transform = None
@@ -110,7 +107,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
110
 
111
  if t == "timm_densenet":
112
  # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
113
- # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용합니다.
114
  self._timm_transform = self._build_timm_transform(
115
  backbone_id=self.backbone_name_or_path,
116
  is_training=self.is_training,
@@ -119,17 +116,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
119
 
120
  if t == "torchvision_densenet":
121
  # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
122
- # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요합니다.
123
  self._torchvision_transform = self._build_torchvision_densenet_transform(
124
  is_training=self.is_training
125
  )
126
  return
127
 
128
  # Default: transformers backbone delegates to its official AutoImageProcessor.
129
- # 기본: transformers 백본은 공식 AutoImageProcessor에 위임합니다.
130
  #
131
  # IMPORTANT:
132
- # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달합니다.
133
  self._delegate = AutoImageProcessor.from_pretrained(
134
  self.backbone_name_or_path,
135
  use_fast=self.use_fast,
@@ -140,7 +137,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
140
  def _build_timm_transform(*, backbone_id: str, is_training: bool):
141
  """
142
  Create timm transform without storing non-serializable objects in config.
143
- 비직렬화 객체를 config에 저장하지 않고 timm transform을 생성합니다.
144
  """
145
  try:
146
  import timm
@@ -151,20 +148,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
151
  ) from e
152
 
153
  # We only need model metadata to resolve data config, so pretrained=False is preferred.
154
- # data config 추출만 필요하므로 pretrained=False를 우선 사용합니다.
155
  m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
156
  dc = resolve_model_data_config(m)
157
 
158
  # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
159
- # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환합니다.
160
- tfm = create_transform(**dc, is_training=is_training)
161
  return tfm
162
 
163
  @staticmethod
164
  def _build_torchvision_densenet_transform(*, is_training: bool):
165
  """
166
  Build torchvision preprocessing for DenseNet-121 (224 pipeline).
167
- DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성합니다.
168
  """
169
  try:
170
  from torchvision import transforms
@@ -174,28 +171,29 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
174
  ) from e
175
 
176
  # These are the standard ImageNet normalization stats used by torchvision weights.
177
- # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계입니다.
178
  mean = (0.485, 0.456, 0.406)
179
- std = (0.229, 0.224, 0.225)
180
 
181
  # Training pipeline typically uses RandomResizedCrop and horizontal flip.
182
- # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용합니다.
183
  if is_training:
184
  return transforms.Compose(
185
  [
186
- transforms.RandomResizedCrop(224),
187
- transforms.RandomHorizontalFlip(p=0.5),
 
188
  transforms.ToTensor(),
189
  transforms.Normalize(mean=mean, std=std),
190
  ]
191
  )
192
 
193
  # Inference pipeline typically uses Resize(256) + CenterCrop(224).
194
- # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용합니다.
195
  return transforms.Compose(
196
  [
197
  transforms.Resize(256),
198
- transforms.CenterCrop(224),
199
  transforms.ToTensor(),
200
  transforms.Normalize(mean=mean, std=std),
201
  ]
@@ -208,24 +206,24 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
208
  def to_dict(self) -> dict[str, Any]:
209
  """
210
  Return a JSON-serializable dict for preprocessor_config.json.
211
- preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환합니다.
212
 
213
  Important: do not leak runtime objects into the serialized dict.
214
- 중요: 런타임 객체가 직렬화 dict에 섞이면 안 됩니다.
215
  """
216
  # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
217
  # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
218
  d = super().to_dict()
219
 
220
  # Force minimal stable fields for long-term compatibility.
221
- # 장기 호환을 위해 최소 안정 필드를 강제합니다.
222
- d["image_processor_type"] = self.__class__.__name__
223
  d["backbone_name_or_path"] = self.backbone_name_or_path
224
  d["is_training"] = self.is_training
225
- d["use_fast"] = self.use_fast
226
 
227
  # Remove any runtime-only fields defensively.
228
- # 런타임 전용 필드는 보수적으로 제거합니다.
229
  for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
230
  d.pop(key, None)
231
 
@@ -235,14 +233,14 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
235
  def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
236
  """
237
  Standard load path used by BaseImageProcessor / AutoImageProcessor.
238
- BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로입니다.
239
  """
240
  backbone = image_processor_dict.get("backbone_name_or_path", None)
241
  if backbone is None:
242
  raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
243
 
244
  is_training = bool(image_processor_dict.get("is_training", False))
245
- use_fast = bool(image_processor_dict.get("use_fast", False))
246
 
247
  return cls(
248
  backbone_name_or_path=backbone,
@@ -255,20 +253,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
255
  def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
256
  """
257
  Fallback path if AutoImageProcessor calls class.from_pretrained directly.
258
- AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 경로입니다.
259
 
260
  Strategy:
261
  전략:
262
 
263
  - Read config.json via AutoConfig and recover backbone_name_or_path.
264
- AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구합니다.
265
  """
266
 
267
  # is_training is runtime-only and should default to False for inference/serving.
268
- # is_training은 런타임 전용이며 추론/서빙 기본값은 False 맞습니다.
269
  #
270
  # IMPORTANT:
271
- # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영합니다.
272
  use_fast = bool(kwargs.pop("use_fast", False))
273
 
274
  kwargs.pop("trust_remote_code", None)
@@ -289,7 +287,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
289
  @staticmethod
290
  def _ensure_list(images: Any) -> list[Any]:
291
  # Normalize scalar image input to a list for uniform processing.
292
- # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용합니다.
293
  if isinstance(images, (list, tuple)):
294
  return list(images)
295
  return [images]
@@ -297,7 +295,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
297
  @staticmethod
298
  def _to_pil_rgb(x: Any):
299
  # Convert common image inputs into PIL RGB images.
300
- # 일반적인 입력을 PIL RGB 이미지로 변환합니다.
301
  from PIL import Image as PILImage
302
 
303
  if isinstance(x, PILImage.Image):
@@ -314,17 +312,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
314
  ) -> dict[str, Any]:
315
  """
316
  Convert images into {"pixel_values": Tensor/ndarray}.
317
- 이미지를 {"pixel_values": Tensor/ndarray}로 변환합니다.
318
  """
319
  images = self._ensure_list(images)
320
 
321
  # Rebuild runtime if needed (e.g., right after deserialization).
322
- # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성합니다.
323
  if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
324
  self._build_runtime()
325
 
326
  # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
327
- # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
328
  if self._timm_transform is not None:
329
  pv: list[torch.Tensor] = []
330
  for im in images:
@@ -337,7 +335,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
337
  return self._format_return(pixel_values, return_tensors)
338
 
339
  # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
340
- # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
341
  if self._torchvision_transform is not None:
342
  pv: list[torch.Tensor] = []
343
  for im in images:
@@ -350,7 +348,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
350
  return self._format_return(pixel_values, return_tensors)
351
 
352
  # transformers delegate path: rely on official processor behavior.
353
- # transformers 위임 경로: 공식 processor 동작을 그대로 사용합니다.
354
  if self._delegate is None:
355
  raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
356
 
@@ -360,7 +358,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
360
  def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
361
  """
362
  Format pixel_values according to return_tensors.
363
- return_tensors에 맞춰 pixel_values 반환 포맷을 맞춥니다.
364
  """
365
  if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
366
  return {"pixel_values": pixel_values}
@@ -370,6 +368,6 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
370
 
371
 
372
  # Register this processor for AutoImageProcessor resolution.
373
- # AutoImageProcessor 해석을 위해 이 processor를 등록합니다.
374
  if __name__ != "__main__":
375
  BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")
 
4
  # src/ds_proc.py
5
 
6
  # ============================================================
7
+ # ImageProcessor (AutoImageProcessor integration)
8
+ # ImageProcessor (AutoImageProcessor 연동)
9
  # ============================================================
10
 
11
  from typing import Any
 
27
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
28
  """
29
  This processor performs image preprocessing and outputs {"pixel_values": ...}.
30
+ 이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 반환함.
31
 
32
  Key requirements:
33
  핵심 요구사항:
34
 
35
  1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
36
+ save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 함.
 
37
  2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
38
+ 런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됨.
 
39
  3) Runtime objects are rebuilt at init/load time based on backbone meta.
40
+ 런타임 객체는 backbone meta에 따라 init/load 시점에 재구성.
 
41
  4) For reproducibility, use_fast must be explicitly persisted and honored on load.
42
+ 재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 함.
43
  """
44
 
45
  # HF vision models conventionally expect "pixel_values" as the primary input key.
46
+ # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대.
47
  model_input_names = ["pixel_values"]
48
 
49
  def __init__(
50
  self,
51
  backbone_name_or_path: BackboneID,
52
+ is_training: bool = False, # timm 에서 data augmentation 용.
53
  use_fast: bool = False,
54
  **kwargs,
55
  ):
56
  # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
57
+ # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리.
58
  super().__init__(**kwargs)
59
 
60
  # Enforce whitelist via BACKBONE_META to keep behavior stable.
61
+ # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제. - fast fail
62
  if backbone_name_or_path not in BACKBONE_META:
63
  raise ValueError(
64
  f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
 
66
  )
67
 
68
  # Serializable fields only: these should appear in preprocessor_config.json.
69
+ # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야
70
  self.backbone_name_or_path = backbone_name_or_path
71
  self.is_training = bool(is_training)
72
 
73
  # Reproducibility switch for transformers processors.
74
+ # transformers processor의 fast/slow 선택을 재현 가능하게 고정.
75
  self.use_fast = bool(use_fast)
76
 
77
  # Runtime-only fields: must never be serialized.
78
+ # 런타임 전용 필드: 절대 직렬화되면 안 됨.
79
  self._meta = None
80
+ self._delegate = None
81
+ self._timm_transform = None
82
  self._torchvision_transform = None
83
 
84
  # Build runtime objects according to backbone type.
85
+ # backbone type에 따라 런타임 객체를 구성.
86
  self._build_runtime()
87
 
88
  # ============================================================
 
92
  def _build_runtime(self):
93
  """
94
  Build runtime delegate/transform based on BACKBONE_META["type"].
95
+ BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성.
96
  """
97
  meta = BACKBONE_META[self.backbone_name_or_path]
98
  self._meta = meta
99
 
100
  # Always reset runtime fields before rebuilding.
101
+ # 재구성 전 런타임 필드는 항상 초기화.
102
  self._delegate = None
103
  self._timm_transform = None
104
  self._torchvision_transform = None
 
107
 
108
  if t == "timm_densenet":
109
  # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
110
+ # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용.
111
  self._timm_transform = self._build_timm_transform(
112
  backbone_id=self.backbone_name_or_path,
113
  is_training=self.is_training,
 
116
 
117
  if t == "torchvision_densenet":
118
  # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
119
+ # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요.
120
  self._torchvision_transform = self._build_torchvision_densenet_transform(
121
  is_training=self.is_training
122
  )
123
  return
124
 
125
  # Default: transformers backbone delegates to its official AutoImageProcessor.
126
+ # 기본: transformers 백본은 공식 AutoImageProcessor에 위임.
127
  #
128
  # IMPORTANT:
129
+ # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달.
130
  self._delegate = AutoImageProcessor.from_pretrained(
131
  self.backbone_name_or_path,
132
  use_fast=self.use_fast,
 
137
  def _build_timm_transform(*, backbone_id: str, is_training: bool):
138
  """
139
  Create timm transform without storing non-serializable objects in config.
140
+ 비직렬화 객체를 config에 저장하지 않고 timm transform을 생성.
141
  """
142
  try:
143
  import timm
 
148
  ) from e
149
 
150
  # We only need model metadata to resolve data config, so pretrained=False is preferred.
151
+ # data config 추출만 필요하므로 pretrained=False를 우선 사용.
152
  m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
153
  dc = resolve_model_data_config(m)
154
 
155
  # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
156
+ # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환.
157
+ tfm = create_transform(**dc, is_training=is_training) # is_training :Data Aug.
158
  return tfm
159
 
160
  @staticmethod
161
  def _build_torchvision_densenet_transform(*, is_training: bool):
162
  """
163
  Build torchvision preprocessing for DenseNet-121 (224 pipeline).
164
+ DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성.
165
  """
166
  try:
167
  from torchvision import transforms
 
171
  ) from e
172
 
173
  # These are the standard ImageNet normalization stats used by torchvision weights.
174
+ # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계.
175
  mean = (0.485, 0.456, 0.406)
176
+ std = (0.229, 0.224, 0.225)
177
 
178
  # Training pipeline typically uses RandomResizedCrop and horizontal flip.
179
+ # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용.
180
  if is_training:
181
  return transforms.Compose(
182
  [
183
+ # transforms.RandomResizedCrop(224),
184
+ # transforms.RandomHorizontalFlip(p=0.5),
185
+ transforms.Resize(224),
186
  transforms.ToTensor(),
187
  transforms.Normalize(mean=mean, std=std),
188
  ]
189
  )
190
 
191
  # Inference pipeline typically uses Resize(256) + CenterCrop(224).
192
+ # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용.
193
  return transforms.Compose(
194
  [
195
  transforms.Resize(256),
196
+ # transforms.CenterCrop(224),
197
  transforms.ToTensor(),
198
  transforms.Normalize(mean=mean, std=std),
199
  ]
 
206
  def to_dict(self) -> dict[str, Any]:
207
  """
208
  Return a JSON-serializable dict for preprocessor_config.json.
209
+ preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환.
210
 
211
  Important: do not leak runtime objects into the serialized dict.
212
+ 중요: 런타임 객체가 직렬화 dict에 섞이면 안 됨.
213
  """
214
  # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
215
  # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
216
  d = super().to_dict()
217
 
218
  # Force minimal stable fields for long-term compatibility.
219
+ # 장기 호환을 위해 최소 안정 필드를 강제로 지정.
220
+ d["image_processor_type"] = self.__class__.__name__
221
  d["backbone_name_or_path"] = self.backbone_name_or_path
222
  d["is_training"] = self.is_training
223
+ d["use_fast"] = self.use_fast
224
 
225
  # Remove any runtime-only fields defensively.
226
+ # 런타임 전용 필드는 보수적으로 제거.
227
  for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
228
  d.pop(key, None)
229
 
 
233
  def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
234
  """
235
  Standard load path used by BaseImageProcessor / AutoImageProcessor.
236
+ BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로임.
237
  """
238
  backbone = image_processor_dict.get("backbone_name_or_path", None)
239
  if backbone is None:
240
  raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
241
 
242
  is_training = bool(image_processor_dict.get("is_training", False))
243
+ use_fast = bool(image_processor_dict.get("use_fast", False))
244
 
245
  return cls(
246
  backbone_name_or_path=backbone,
 
253
  def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
254
  """
255
  Fallback path if AutoImageProcessor calls class.from_pretrained directly.
256
+ AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 메서드.
257
 
258
  Strategy:
259
  전략:
260
 
261
  - Read config.json via AutoConfig and recover backbone_name_or_path.
262
+ AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구.
263
  """
264
 
265
  # is_training is runtime-only and should default to False for inference/serving.
266
+ # is_training은 런타임 전용이며 추론/서빙 기본값은 False 임.
267
  #
268
  # IMPORTANT:
269
+ # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영.
270
  use_fast = bool(kwargs.pop("use_fast", False))
271
 
272
  kwargs.pop("trust_remote_code", None)
 
287
  @staticmethod
288
  def _ensure_list(images: Any) -> list[Any]:
289
  # Normalize scalar image input to a list for uniform processing.
290
+ # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용.
291
  if isinstance(images, (list, tuple)):
292
  return list(images)
293
  return [images]
 
295
  @staticmethod
296
  def _to_pil_rgb(x: Any):
297
  # Convert common image inputs into PIL RGB images.
298
+ # 일반적인 입력을 PIL RGB 이미지로 변환.
299
  from PIL import Image as PILImage
300
 
301
  if isinstance(x, PILImage.Image):
 
312
  ) -> dict[str, Any]:
313
  """
314
  Convert images into {"pixel_values": Tensor/ndarray}.
315
+ 이미지를 {"pixel_values": Tensor/ndarray}로 변환.
316
  """
317
  images = self._ensure_list(images)
318
 
319
  # Rebuild runtime if needed (e.g., right after deserialization).
320
+ # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성.
321
  if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
322
  self._build_runtime()
323
 
324
  # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
325
+ # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
326
  if self._timm_transform is not None:
327
  pv: list[torch.Tensor] = []
328
  for im in images:
 
335
  return self._format_return(pixel_values, return_tensors)
336
 
337
  # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
338
+ # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
339
  if self._torchvision_transform is not None:
340
  pv: list[torch.Tensor] = []
341
  for im in images:
 
348
  return self._format_return(pixel_values, return_tensors)
349
 
350
  # transformers delegate path: rely on official processor behavior.
351
+ # transformers 위임 경로: 공식 processor 동작을 그대로 사용.
352
  if self._delegate is None:
353
  raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
354
 
 
358
  def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
359
  """
360
  Format pixel_values according to return_tensors.
361
+ return_tensors에 맞춰 pixel_values 반환 포맷을 변환.
362
  """
363
  if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
364
  return {"pixel_values": pixel_values}
 
368
 
369
 
370
  # Register this processor for AutoImageProcessor resolution.
371
+ # AutoImageProcessor 해석을 위해 이 processor를 등록.
372
  if __name__ != "__main__":
373
  BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")
models/microsoft__resnet-50/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ff72671dfb9ce0dddaa0b43965385bf4185ce4210d00092b5d08ed116cecc4b
3
  size 96388660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b46be9524952c57179580e6e9728d525676dc8d1f2cf184315277e2f57bce90
3
  size 96388660
models/microsoft__swin-tiny-patch4-window7-224/config.json CHANGED
@@ -24,7 +24,7 @@
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
- "created_at": "20260210_170738",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/microsoft__swin-tiny-patch4-window7-224",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
 
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
+ "created_at": "20260212_202546",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/microsoft__swin-tiny-patch4-window7-224",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
models/microsoft__swin-tiny-patch4-window7-224/ds_model.py CHANGED
@@ -94,14 +94,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
94
 
95
  def __init__(self, config: BackboneMLPHeadConfig):
96
  # PreTrainedModel expects a config object and stores it internally.
97
- # PreTrainedModel은 config 객체를 받아 내부에 저장하는 전제를 가집니다.
98
  super().__init__(config)
99
 
100
  # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
101
- # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않습니다.
102
  #
103
  # Note: Transformers may create configs with no args, but models are conventionally created with configs.
104
- # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성됩니다.
105
  if config.backbone_name_or_path is None:
106
  raise ValueError(
107
  "config.backbone_name_or_path is None. "
@@ -109,10 +109,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
109
  )
110
 
111
  # Fail-fast: training/inference requires a positive number of labels.
112
- # fail-fast: 학습/추론은 num_labels가 양수여야 합니다.
113
  #
114
  # Config may exist in a minimal form for internal serialization paths, but the model should not.
115
- # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 그러면 안 됩니다.
116
  if int(getattr(config, "num_labels", 0)) <= 0:
117
  raise ValueError(
118
  f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
@@ -120,17 +120,17 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
120
  )
121
 
122
  # Meta is a single source of truth for extraction and fine-tuning rules.
123
- # meta는 feature 추출 및 미세조정 규칙의 단일 기준입니다.
124
- # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
125
  # Prefer config.backbone_meta to keep Hub runtime self-contained.
126
  self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
127
 
128
  # Backbone skeleton is always created without pretrained weights.
129
- # backbone skeleton은 항상 pretrained weight 없이 생성합니다.
130
  self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
131
 
132
  # Head shape is driven by meta feat_dim and config.num_labels.
133
- # head shape은 meta의 feat_dim과 config.num_labels로 결정됩니다.
134
  self.classifier = MLPHead(
135
  in_dim=int(self._meta["feat_dim"]),
136
  num_labels=int(config.num_labels),
@@ -139,16 +139,20 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
139
  )
140
 
141
  # HF initialization hook, but we override init_weights to initialize head-only.
142
- # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화합니다.
143
  self.post_init()
144
 
145
  def init_weights(self):
146
  """
147
  Initialize only the head to avoid touching the backbone skeleton.
148
- backbone skeleton을 건드리지 않기 위해 head만 초기화합니다.
149
 
150
  HF's default init may traverse the entire module tree, which is undesirable here.
151
- HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서는 부적절합니다.
 
 
 
 
152
  """
153
  if getattr(self, "classifier", None) is not None:
154
  self.classifier.apply(self._init_weights)
@@ -160,7 +164,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
160
  # ----------------------------
161
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
162
  # Meta decides which loader path to use.
163
- # meta가 어떤 로더 경로를 사용할지 결정합니다.
164
  meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
165
  if meta is None:
166
  raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
@@ -174,14 +178,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
174
  return self._build_torchvision_densenet_skeleton(backbone_id)
175
 
176
  # For transformers backbones: build a random-weight skeleton from config only.
177
- # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성합니다.
178
  bb_cfg = AutoConfig.from_pretrained(backbone_id)
179
  return AutoModel.from_config(bb_cfg)
180
 
181
  @staticmethod
182
  def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
183
  # timm is an optional dependency and should be imported lazily.
184
- # timm은 옵션 의존성이므로 지연 import 합니다.
185
  try:
186
  import timm
187
  except Exception as e:
@@ -190,7 +194,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
190
  ) from e
191
 
192
  # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
193
- # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0)합니다.
194
  return timm.create_model(
195
  f"hf_hub:{hf_repo_id}",
196
  pretrained=False,
@@ -200,12 +204,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
200
  @staticmethod
201
  def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
202
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
203
- # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
204
  if model_id != "torchvision/densenet121":
205
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
206
 
207
  # Build structure only (weights=None) to avoid implicit pretrained loading.
208
- # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None)합니다.
209
  m = tv_models.densenet121(weights=None)
210
  return m
211
 
@@ -222,10 +226,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
222
  ):
223
  """
224
  Fresh-start only: inject pretrained backbone weights into the skeleton.
225
- fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입합니다.
226
 
227
  Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
228
- from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 절대 호출하면 안 됩니다.
229
  """
230
  bb = self.config.backbone_name_or_path
231
  meta = self._meta
@@ -240,7 +244,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
240
  return
241
 
242
  # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
243
- # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사합니다.
244
  ref = AutoModel.from_pretrained(
245
  bb,
246
  low_cpu_mem_usage=low_cpu_mem_usage,
@@ -248,18 +252,18 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
248
  )
249
 
250
  # strict=False is used to tolerate harmless key differences across minor versions.
251
- # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용합니다.
252
  self.backbone.load_state_dict(ref.state_dict(), strict=False)
253
  del ref
254
 
255
  @torch.no_grad()
256
  def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
257
  # timm must be present for timm backbones.
258
- # timm 백본에는 timm 설치가 필요합니다.
259
  import timm
260
 
261
  # Create a pretrained reference model and copy its weights strictly.
262
- # pretrained reference 모델을 만들고 가중치를 strict하게 복사합니다.
263
  ref = timm.create_model(
264
  f"hf_hub:{hf_repo_id}",
265
  pretrained=True,
@@ -272,12 +276,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
272
  @torch.no_grad()
273
  def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
274
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
275
- # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
276
  if model_id != "torchvision/densenet121":
277
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
278
 
279
  # Use torchvision's default pretrained weights for densenet121.
280
- # torchvision의 densenet121 기본 pretrained weights를 사용합니다.
281
  ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
282
 
283
  self.backbone.load_state_dict(ref.state_dict(), strict=True)
@@ -290,7 +294,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
290
  @staticmethod
291
  def _pool_or_gap(outputs) -> torch.Tensor:
292
  # Some transformers vision CNNs provide pooler_output explicitly.
293
- # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공합니다.
294
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
295
  x = outputs.pooler_output
296
  if x.dim() == 2:
@@ -300,7 +304,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
300
  raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
301
 
302
  # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
303
- # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용합니다.
304
  x = outputs.last_hidden_state
305
  if x.dim() == 4:
306
  return x.mean(dim=(2, 3))
@@ -312,29 +316,29 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
312
 
313
  def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
314
  # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
315
- # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적으로 유지되어야 합니다.
316
  rule = self._meta["feat_rule"]
317
 
318
  if rule == "cls":
319
  # ViT-style: use CLS token embedding from last_hidden_state.
320
- # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용합니다.
321
  return outputs.last_hidden_state[:, 0, :]
322
 
323
  if rule == "pool_or_mean":
324
  # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
325
- # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용합니다.
326
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
327
  return outputs.pooler_output
328
  return outputs.last_hidden_state.mean(dim=1)
329
 
330
  if rule == "pool_or_gap":
331
  # CNN-style: use pooler_output if present, else GAP over spatial dims.
332
- # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용합니다.
333
  return self._pool_or_gap(outputs)
334
 
335
  if rule == "timm_gap":
336
  # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
337
- # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 만듭니다.
338
  if not isinstance(outputs, torch.Tensor):
339
  raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
340
  if outputs.dim() != 4:
@@ -343,7 +347,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
343
 
344
  if rule == "torchvision_densenet_gap":
345
  # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
346
- # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요합니다.
347
  if not isinstance(outputs, torch.Tensor):
348
  raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
349
  if outputs.dim() != 4:
@@ -362,7 +366,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
362
  **kwargs,
363
  ):
364
  # Type decides the backbone forward path and output format.
365
- # type이 backbone forward 경로 및 출력 포맷을 결정합니다.
366
  t = self._meta["type"]
367
 
368
  if t == "timm_densenet":
@@ -394,7 +398,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
394
 
395
  else:
396
  # Transformers vision models are called with pixel_values and return ModelOutput.
397
- # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환합니다.
398
  outputs = self.backbone(
399
  pixel_values=pixel_values,
400
  output_attentions=output_attentions,
@@ -407,13 +411,13 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
407
  attentions = getattr(outputs, "attentions", None)
408
 
409
  # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
410
- # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환합니다.
411
  logits = self.classifier(feats)
412
 
413
  loss = None
414
  if labels is not None:
415
  # Cross entropy expects labels as class indices in [0, num_labels).
416
- # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대합니다.
417
  loss = F.cross_entropy(logits, labels)
418
 
419
  if not return_dict:
@@ -434,14 +438,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
434
  # ============================================================
435
  def _set_requires_grad(module: nn.Module, flag: bool):
436
  # Toggle requires_grad for all parameters in a module.
437
- # 모듈의 모든 파라미터에 대해 requires_grad를 토글합니다.
438
  for p in module.parameters():
439
  p.requires_grad = flag
440
 
441
 
442
  def set_bn_eval(module: nn.Module):
443
  # Put BatchNorm layers into eval mode to freeze running stats.
444
- # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정합니다.
445
  for m in module.modules():
446
  if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
447
  m.eval()
@@ -449,7 +453,7 @@ def set_bn_eval(module: nn.Module):
449
 
450
  def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
451
  # Stage1: freeze backbone and train only the head.
452
- # stage1: backbone을 freeze하고 head만 학습합니다.
453
  _set_requires_grad(model.backbone, False)
454
  _set_requires_grad(model.classifier, True)
455
 
@@ -460,7 +464,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
460
 
461
  def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
462
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
463
- # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있습니다.
464
  model.train()
465
  meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
466
  if keep_bn_eval and meta.get("has_bn", False):
@@ -469,7 +473,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
469
 
470
  def trainable_summary(model: nn.Module):
471
  # Print a compact summary of trainable parameters.
472
- # 학습 가능 파라미터 요약을 간단히 출력합니다.
473
  total = sum(p.numel() for p in model.parameters())
474
  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
475
  ratio = trainable / total if total > 0 else 0.0
@@ -483,7 +487,7 @@ def unfreeze_last_stage(
483
  keep_bn_eval: bool = True,
484
  ):
485
  # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
486
- # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 ���현합니다.
487
  freeze_backbone(model, freeze_bn=keep_bn_eval)
488
 
489
  n = int(last_n)
@@ -498,7 +502,7 @@ def unfreeze_last_stage(
498
 
499
  if bb_type == "vit":
500
  # ViT blocks live under backbone.encoder.layer in the transformers implementation.
501
- # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 있습니다.
502
  blocks = list(model.backbone.encoder.layer)
503
  for blk in blocks[-n:]:
504
  _set_requires_grad(blk, True)
@@ -506,7 +510,7 @@ def unfreeze_last_stage(
506
 
507
  if bb_type == "swin":
508
  # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
509
- # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze 합니다.
510
  stages = list(model.backbone.encoder.layers)
511
  blocks: List[nn.Module] = []
512
  for st in stages:
@@ -517,7 +521,7 @@ def unfreeze_last_stage(
517
 
518
  if bb_type == "resnet":
519
  # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
520
- # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze 합니다.
521
  bb = model.backbone
522
  for name in ("layer1", "layer2", "layer3", "layer4"):
523
  if not hasattr(bb, name):
@@ -538,7 +542,7 @@ def unfreeze_last_stage(
538
 
539
  if bb_type == "efficientnet":
540
  # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
541
- # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze 합니다.
542
  bb = model.backbone
543
  if not hasattr(bb, "features"):
544
  raise RuntimeError("Unexpected EfficientNet structure: missing features")
@@ -556,7 +560,7 @@ def unfreeze_last_stage(
556
 
557
  if bb_type in ("timm_densenet", "torchvision_densenet"):
558
  # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
559
- # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze 합니다.
560
  bb = model.backbone
561
  if not hasattr(bb, "features"):
562
  raise RuntimeError("Unexpected DenseNet: missing features")
@@ -575,7 +579,7 @@ def unfreeze_last_stage(
575
 
576
  def _denselayers(db: nn.Module) -> List[nn.Module]:
577
  # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
578
- # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환합니다.
579
  return list(db.children())
580
 
581
  blocks: List[nn.Module] = []
@@ -600,5 +604,5 @@ def unfreeze_last_stage(
600
  # register
601
  # -------------------------
602
  # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
603
- # from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록합니다.
604
  BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")
 
94
 
95
  def __init__(self, config: BackboneMLPHeadConfig):
96
  # PreTrainedModel expects a config object and stores it internally.
97
+ # PreTrainedModel은 config 객체를 받아 내부에 저장함.
98
  super().__init__(config)
99
 
100
  # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
101
+ # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않음 - fast fail.
102
  #
103
  # Note: Transformers may create configs with no args, but models are conventionally created with configs.
104
+ # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성.
105
  if config.backbone_name_or_path is None:
106
  raise ValueError(
107
  "config.backbone_name_or_path is None. "
 
109
  )
110
 
111
  # Fail-fast: training/inference requires a positive number of labels.
112
+ # fail-fast: 학습/추론은 num_labels가 양수여야 함.
113
  #
114
  # Config may exist in a minimal form for internal serialization paths, but the model should not.
115
+ # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 해당 없음.
116
  if int(getattr(config, "num_labels", 0)) <= 0:
117
  raise ValueError(
118
  f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
 
120
  )
121
 
122
  # Meta is a single source of truth for extraction and fine-tuning rules.
123
+ # meta는 feature 추출 및 미세조정 규칙의 단일 기준.
124
+ # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
125
  # Prefer config.backbone_meta to keep Hub runtime self-contained.
126
  self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
127
 
128
  # Backbone skeleton is always created without pretrained weights.
129
+ # backbone skeleton은 항상 pretrained weight 없이 생성.
130
  self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
131
 
132
  # Head shape is driven by meta feat_dim and config.num_labels.
133
+ # head shape은 meta의 feat_dim과 config.num_labels로 결정.
134
  self.classifier = MLPHead(
135
  in_dim=int(self._meta["feat_dim"]),
136
  num_labels=int(config.num_labels),
 
139
  )
140
 
141
  # HF initialization hook, but we override init_weights to initialize head-only.
142
+ # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화하도록 변경.
143
  self.post_init()
144
 
145
  def init_weights(self):
146
  """
147
  Initialize only the head to avoid touching the backbone skeleton.
148
+ backbone skeleton을 건드리지 않기 위해 head만 초기화.
149
 
150
  HF's default init may traverse the entire module tree, which is undesirable here.
151
+ HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서 그대로 사용하기 부적절.
152
+
153
+ 초기 설계에서 __init__ 내부에서 backbone의 가중치 로드를 수행함(편리를 위해).
154
+ 이 경우, HF의 post_init()으로 인해 해당 로드가 취소되는 경우가 존재(timm, torchvision 등의 백본).
155
+ 때문에 이를 오버라이드 하여 classifier만 초기화 하도록 변경함.
156
  """
157
  if getattr(self, "classifier", None) is not None:
158
  self.classifier.apply(self._init_weights)
 
164
  # ----------------------------
165
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
166
  # Meta decides which loader path to use.
167
+ # meta가 어떤 로더 경로를 사용할지 결정.
168
  meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
169
  if meta is None:
170
  raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
 
178
  return self._build_torchvision_densenet_skeleton(backbone_id)
179
 
180
  # For transformers backbones: build a random-weight skeleton from config only.
181
+ # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성.
182
  bb_cfg = AutoConfig.from_pretrained(backbone_id)
183
  return AutoModel.from_config(bb_cfg)
184
 
185
  @staticmethod
186
  def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
187
  # timm is an optional dependency and should be imported lazily.
188
+ # timm은 옵션 의존성이므로 지연 import 수행.
189
  try:
190
  import timm
191
  except Exception as e:
 
194
  ) from e
195
 
196
  # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
197
+ # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0).
198
  return timm.create_model(
199
  f"hf_hub:{hf_repo_id}",
200
  pretrained=False,
 
204
  @staticmethod
205
  def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
206
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
207
+ # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원.
208
  if model_id != "torchvision/densenet121":
209
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
210
 
211
  # Build structure only (weights=None) to avoid implicit pretrained loading.
212
+ # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None).
213
  m = tv_models.densenet121(weights=None)
214
  return m
215
 
 
226
  ):
227
  """
228
  Fresh-start only: inject pretrained backbone weights into the skeleton.
229
+ fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입.
230
 
231
  Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
232
+ from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 주의할 것.
233
  """
234
  bb = self.config.backbone_name_or_path
235
  meta = self._meta
 
244
  return
245
 
246
  # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
247
+ # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사.
248
  ref = AutoModel.from_pretrained(
249
  bb,
250
  low_cpu_mem_usage=low_cpu_mem_usage,
 
252
  )
253
 
254
  # strict=False is used to tolerate harmless key differences across minor versions.
255
+ # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용.
256
  self.backbone.load_state_dict(ref.state_dict(), strict=False)
257
  del ref
258
 
259
  @torch.no_grad()
260
  def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
261
  # timm must be present for timm backbones.
262
+ # timm 백본에��� timm 설치가 필요.
263
  import timm
264
 
265
  # Create a pretrained reference model and copy its weights strictly.
266
+ # pretrained reference 모델을 만들고 가중치를 strict하게 복사.
267
  ref = timm.create_model(
268
  f"hf_hub:{hf_repo_id}",
269
  pretrained=True,
 
276
  @torch.no_grad()
277
  def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
278
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
279
+ # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 지원.
280
  if model_id != "torchvision/densenet121":
281
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
282
 
283
  # Use torchvision's default pretrained weights for densenet121.
284
+ # torchvision의 densenet121 기본 pretrained weights를 사용.
285
  ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
286
 
287
  self.backbone.load_state_dict(ref.state_dict(), strict=True)
 
294
  @staticmethod
295
  def _pool_or_gap(outputs) -> torch.Tensor:
296
  # Some transformers vision CNNs provide pooler_output explicitly.
297
+ # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공.
298
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
299
  x = outputs.pooler_output
300
  if x.dim() == 2:
 
304
  raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
305
 
306
  # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
307
+ # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용.
308
  x = outputs.last_hidden_state
309
  if x.dim() == 4:
310
  return x.mean(dim=(2, 3))
 
316
 
317
  def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
318
  # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
319
+ # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적 동작을 위해 제한된 모델만 사용.
320
  rule = self._meta["feat_rule"]
321
 
322
  if rule == "cls":
323
  # ViT-style: use CLS token embedding from last_hidden_state.
324
+ # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용.
325
  return outputs.last_hidden_state[:, 0, :]
326
 
327
  if rule == "pool_or_mean":
328
  # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
329
+ # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용.
330
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
331
  return outputs.pooler_output
332
  return outputs.last_hidden_state.mean(dim=1)
333
 
334
  if rule == "pool_or_gap":
335
  # CNN-style: use pooler_output if present, else GAP over spatial dims.
336
+ # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용.
337
  return self._pool_or_gap(outputs)
338
 
339
  if rule == "timm_gap":
340
  # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
341
+ # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 변환.
342
  if not isinstance(outputs, torch.Tensor):
343
  raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
344
  if outputs.dim() != 4:
 
347
 
348
  if rule == "torchvision_densenet_gap":
349
  # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
350
+ # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요.
351
  if not isinstance(outputs, torch.Tensor):
352
  raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
353
  if outputs.dim() != 4:
 
366
  **kwargs,
367
  ):
368
  # Type decides the backbone forward path and output format.
369
+ # type이 backbone forward 경로 및 출력 포맷을 결정.
370
  t = self._meta["type"]
371
 
372
  if t == "timm_densenet":
 
398
 
399
  else:
400
  # Transformers vision models are called with pixel_values and return ModelOutput.
401
+ # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환.
402
  outputs = self.backbone(
403
  pixel_values=pixel_values,
404
  output_attentions=output_attentions,
 
411
  attentions = getattr(outputs, "attentions", None)
412
 
413
  # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
414
+ # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환.
415
  logits = self.classifier(feats)
416
 
417
  loss = None
418
  if labels is not None:
419
  # Cross entropy expects labels as class indices in [0, num_labels).
420
+ # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대함.
421
  loss = F.cross_entropy(logits, labels)
422
 
423
  if not return_dict:
 
438
  # ============================================================
439
  def _set_requires_grad(module: nn.Module, flag: bool):
440
  # Toggle requires_grad for all parameters in a module.
441
+ # 모듈의 모든 파라미터에 대해 requires_grad를 토글.
442
  for p in module.parameters():
443
  p.requires_grad = flag
444
 
445
 
446
  def set_bn_eval(module: nn.Module):
447
  # Put BatchNorm layers into eval mode to freeze running stats.
448
+ # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정.
449
  for m in module.modules():
450
  if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
451
  m.eval()
 
453
 
454
  def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
455
  # Stage1: freeze backbone and train only the head.
456
+ # stage1: backbone을 freeze하고 head만 학습.
457
  _set_requires_grad(model.backbone, False)
458
  _set_requires_grad(model.classifier, True)
459
 
 
464
 
465
  def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
466
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
467
+ # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있음. (buffer 등을 유지하기 위해)
468
  model.train()
469
  meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
470
  if keep_bn_eval and meta.get("has_bn", False):
 
473
 
474
  def trainable_summary(model: nn.Module):
475
  # Print a compact summary of trainable parameters.
476
+ # 학습 가능 파라미터 요약을 간단히 출력.
477
  total = sum(p.numel() for p in model.parameters())
478
  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
479
  ratio = trainable / total if total > 0 else 0.0
 
487
  keep_bn_eval: bool = True,
488
  ):
489
  # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
490
+ # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 구현.
491
  freeze_backbone(model, freeze_bn=keep_bn_eval)
492
 
493
  n = int(last_n)
 
502
 
503
  if bb_type == "vit":
504
  # ViT blocks live under backbone.encoder.layer in the transformers implementation.
505
+ # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 존재함.
506
  blocks = list(model.backbone.encoder.layer)
507
  for blk in blocks[-n:]:
508
  _set_requires_grad(blk, True)
 
510
 
511
  if bb_type == "swin":
512
  # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
513
+ # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze.
514
  stages = list(model.backbone.encoder.layers)
515
  blocks: List[nn.Module] = []
516
  for st in stages:
 
521
 
522
  if bb_type == "resnet":
523
  # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
524
+ # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze.
525
  bb = model.backbone
526
  for name in ("layer1", "layer2", "layer3", "layer4"):
527
  if not hasattr(bb, name):
 
542
 
543
  if bb_type == "efficientnet":
544
  # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
545
+ # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze.
546
  bb = model.backbone
547
  if not hasattr(bb, "features"):
548
  raise RuntimeError("Unexpected EfficientNet structure: missing features")
 
560
 
561
  if bb_type in ("timm_densenet", "torchvision_densenet"):
562
  # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
563
+ # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze.
564
  bb = model.backbone
565
  if not hasattr(bb, "features"):
566
  raise RuntimeError("Unexpected DenseNet: missing features")
 
579
 
580
  def _denselayers(db: nn.Module) -> List[nn.Module]:
581
  # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
582
+ # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환.
583
  return list(db.children())
584
 
585
  blocks: List[nn.Module] = []
 
604
  # register
605
  # -------------------------
606
  # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
607
+ # from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록.
608
  BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")
models/microsoft__swin-tiny-patch4-window7-224/ds_proc.py CHANGED
@@ -4,8 +4,8 @@
4
  # src/ds_proc.py
5
 
6
  # ============================================================
7
- # (4) ImageProcessor (AutoImageProcessor integration)
8
- # (4) ImageProcessor (AutoImageProcessor 연동)
9
  # ============================================================
10
 
11
  from typing import Any
@@ -27,41 +27,38 @@ except ImportError:
27
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
28
  """
29
  This processor performs image preprocessing and outputs {"pixel_values": ...}.
30
- 이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 출력합니다.
31
 
32
  Key requirements:
33
  핵심 요구사항:
34
 
35
  1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
36
- save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 합니다.
37
-
38
  2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
39
- 런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됩니다.
40
-
41
  3) Runtime objects are rebuilt at init/load time based on backbone meta.
42
- 런타임 객체는 backbone meta에 따라 init/load 시점에 재구성됩니다.
43
-
44
  4) For reproducibility, use_fast must be explicitly persisted and honored on load.
45
- 재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 합니다.
46
  """
47
 
48
  # HF vision models conventionally expect "pixel_values" as the primary input key.
49
- # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대합니다.
50
  model_input_names = ["pixel_values"]
51
 
52
  def __init__(
53
  self,
54
  backbone_name_or_path: BackboneID,
55
- is_training: bool = False,
56
  use_fast: bool = False,
57
  **kwargs,
58
  ):
59
  # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
60
- # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리합니다.
61
  super().__init__(**kwargs)
62
 
63
  # Enforce whitelist via BACKBONE_META to keep behavior stable.
64
- # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제합니다.
65
  if backbone_name_or_path not in BACKBONE_META:
66
  raise ValueError(
67
  f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
@@ -69,23 +66,23 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
69
  )
70
 
71
  # Serializable fields only: these should appear in preprocessor_config.json.
72
- # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 합니다.
73
  self.backbone_name_or_path = backbone_name_or_path
74
  self.is_training = bool(is_training)
75
 
76
  # Reproducibility switch for transformers processors.
77
- # transformers processor의 fast/slow 선택을 재현 가능하게 고정합니다.
78
  self.use_fast = bool(use_fast)
79
 
80
  # Runtime-only fields: must never be serialized.
81
- # 런타임 전용 필드: 절대 직렬화되면 안 됩니다.
82
  self._meta = None
83
- self._delegate = None
84
- self._timm_transform = None
85
  self._torchvision_transform = None
86
 
87
  # Build runtime objects according to backbone type.
88
- # backbone type에 따라 런타임 객체를 구성합니다.
89
  self._build_runtime()
90
 
91
  # ============================================================
@@ -95,13 +92,13 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
95
  def _build_runtime(self):
96
  """
97
  Build runtime delegate/transform based on BACKBONE_META["type"].
98
- BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성합니다.
99
  """
100
  meta = BACKBONE_META[self.backbone_name_or_path]
101
  self._meta = meta
102
 
103
  # Always reset runtime fields before rebuilding.
104
- # 재구성 전 런타임 필드는 항상 초기화합니다.
105
  self._delegate = None
106
  self._timm_transform = None
107
  self._torchvision_transform = None
@@ -110,7 +107,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
110
 
111
  if t == "timm_densenet":
112
  # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
113
- # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용합니다.
114
  self._timm_transform = self._build_timm_transform(
115
  backbone_id=self.backbone_name_or_path,
116
  is_training=self.is_training,
@@ -119,17 +116,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
119
 
120
  if t == "torchvision_densenet":
121
  # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
122
- # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요합니다.
123
  self._torchvision_transform = self._build_torchvision_densenet_transform(
124
  is_training=self.is_training
125
  )
126
  return
127
 
128
  # Default: transformers backbone delegates to its official AutoImageProcessor.
129
- # 기본: transformers 백본은 공식 AutoImageProcessor에 위임합니다.
130
  #
131
  # IMPORTANT:
132
- # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달합니다.
133
  self._delegate = AutoImageProcessor.from_pretrained(
134
  self.backbone_name_or_path,
135
  use_fast=self.use_fast,
@@ -140,7 +137,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
140
  def _build_timm_transform(*, backbone_id: str, is_training: bool):
141
  """
142
  Create timm transform without storing non-serializable objects in config.
143
- 비직렬화 객체를 config에 저장하지 않고 timm transform을 생성합니다.
144
  """
145
  try:
146
  import timm
@@ -151,20 +148,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
151
  ) from e
152
 
153
  # We only need model metadata to resolve data config, so pretrained=False is preferred.
154
- # data config 추출만 필요하므로 pretrained=False를 우선 사용합니다.
155
  m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
156
  dc = resolve_model_data_config(m)
157
 
158
  # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
159
- # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환합니다.
160
- tfm = create_transform(**dc, is_training=is_training)
161
  return tfm
162
 
163
  @staticmethod
164
  def _build_torchvision_densenet_transform(*, is_training: bool):
165
  """
166
  Build torchvision preprocessing for DenseNet-121 (224 pipeline).
167
- DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성합니다.
168
  """
169
  try:
170
  from torchvision import transforms
@@ -174,28 +171,29 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
174
  ) from e
175
 
176
  # These are the standard ImageNet normalization stats used by torchvision weights.
177
- # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계입니다.
178
  mean = (0.485, 0.456, 0.406)
179
- std = (0.229, 0.224, 0.225)
180
 
181
  # Training pipeline typically uses RandomResizedCrop and horizontal flip.
182
- # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용합니다.
183
  if is_training:
184
  return transforms.Compose(
185
  [
186
- transforms.RandomResizedCrop(224),
187
- transforms.RandomHorizontalFlip(p=0.5),
 
188
  transforms.ToTensor(),
189
  transforms.Normalize(mean=mean, std=std),
190
  ]
191
  )
192
 
193
  # Inference pipeline typically uses Resize(256) + CenterCrop(224).
194
- # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용합니다.
195
  return transforms.Compose(
196
  [
197
  transforms.Resize(256),
198
- transforms.CenterCrop(224),
199
  transforms.ToTensor(),
200
  transforms.Normalize(mean=mean, std=std),
201
  ]
@@ -208,24 +206,24 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
208
  def to_dict(self) -> dict[str, Any]:
209
  """
210
  Return a JSON-serializable dict for preprocessor_config.json.
211
- preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환합니다.
212
 
213
  Important: do not leak runtime objects into the serialized dict.
214
- 중요: 런타임 객체가 직렬화 dict에 섞이면 안 됩니다.
215
  """
216
  # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
217
  # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
218
  d = super().to_dict()
219
 
220
  # Force minimal stable fields for long-term compatibility.
221
- # 장기 호환을 위해 최소 안정 필드를 강제합니다.
222
- d["image_processor_type"] = self.__class__.__name__
223
  d["backbone_name_or_path"] = self.backbone_name_or_path
224
  d["is_training"] = self.is_training
225
- d["use_fast"] = self.use_fast
226
 
227
  # Remove any runtime-only fields defensively.
228
- # 런타임 전용 필드는 보수적으로 제거합니다.
229
  for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
230
  d.pop(key, None)
231
 
@@ -235,14 +233,14 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
235
  def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
236
  """
237
  Standard load path used by BaseImageProcessor / AutoImageProcessor.
238
- BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로입니다.
239
  """
240
  backbone = image_processor_dict.get("backbone_name_or_path", None)
241
  if backbone is None:
242
  raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
243
 
244
  is_training = bool(image_processor_dict.get("is_training", False))
245
- use_fast = bool(image_processor_dict.get("use_fast", False))
246
 
247
  return cls(
248
  backbone_name_or_path=backbone,
@@ -255,20 +253,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
255
  def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
256
  """
257
  Fallback path if AutoImageProcessor calls class.from_pretrained directly.
258
- AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 경로입니다.
259
 
260
  Strategy:
261
  전략:
262
 
263
  - Read config.json via AutoConfig and recover backbone_name_or_path.
264
- AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구합니다.
265
  """
266
 
267
  # is_training is runtime-only and should default to False for inference/serving.
268
- # is_training은 런타임 전용이며 추론/서빙 기본값은 False 맞습니다.
269
  #
270
  # IMPORTANT:
271
- # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영합니다.
272
  use_fast = bool(kwargs.pop("use_fast", False))
273
 
274
  kwargs.pop("trust_remote_code", None)
@@ -289,7 +287,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
289
  @staticmethod
290
  def _ensure_list(images: Any) -> list[Any]:
291
  # Normalize scalar image input to a list for uniform processing.
292
- # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용합니다.
293
  if isinstance(images, (list, tuple)):
294
  return list(images)
295
  return [images]
@@ -297,7 +295,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
297
  @staticmethod
298
  def _to_pil_rgb(x: Any):
299
  # Convert common image inputs into PIL RGB images.
300
- # 일반적인 입력을 PIL RGB 이미지로 변환합니다.
301
  from PIL import Image as PILImage
302
 
303
  if isinstance(x, PILImage.Image):
@@ -314,17 +312,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
314
  ) -> dict[str, Any]:
315
  """
316
  Convert images into {"pixel_values": Tensor/ndarray}.
317
- 이미지를 {"pixel_values": Tensor/ndarray}로 변환합니다.
318
  """
319
  images = self._ensure_list(images)
320
 
321
  # Rebuild runtime if needed (e.g., right after deserialization).
322
- # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성합니다.
323
  if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
324
  self._build_runtime()
325
 
326
  # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
327
- # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
328
  if self._timm_transform is not None:
329
  pv: list[torch.Tensor] = []
330
  for im in images:
@@ -337,7 +335,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
337
  return self._format_return(pixel_values, return_tensors)
338
 
339
  # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
340
- # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
341
  if self._torchvision_transform is not None:
342
  pv: list[torch.Tensor] = []
343
  for im in images:
@@ -350,7 +348,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
350
  return self._format_return(pixel_values, return_tensors)
351
 
352
  # transformers delegate path: rely on official processor behavior.
353
- # transformers 위임 경로: 공식 processor 동작을 그대로 사용합니다.
354
  if self._delegate is None:
355
  raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
356
 
@@ -360,7 +358,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
360
  def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
361
  """
362
  Format pixel_values according to return_tensors.
363
- return_tensors에 맞춰 pixel_values 반환 포맷을 맞춥니다.
364
  """
365
  if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
366
  return {"pixel_values": pixel_values}
@@ -370,6 +368,6 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
370
 
371
 
372
  # Register this processor for AutoImageProcessor resolution.
373
- # AutoImageProcessor 해석을 위해 이 processor를 등록합니다.
374
  if __name__ != "__main__":
375
  BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")
 
4
  # src/ds_proc.py
5
 
6
  # ============================================================
7
+ # ImageProcessor (AutoImageProcessor integration)
8
+ # ImageProcessor (AutoImageProcessor 연동)
9
  # ============================================================
10
 
11
  from typing import Any
 
27
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
28
  """
29
  This processor performs image preprocessing and outputs {"pixel_values": ...}.
30
+ 이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 반환함.
31
 
32
  Key requirements:
33
  핵심 요구사항:
34
 
35
  1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
36
+ save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 함.
 
37
  2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
38
+ 런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됨.
 
39
  3) Runtime objects are rebuilt at init/load time based on backbone meta.
40
+ 런타임 객체는 backbone meta에 따라 init/load 시점에 재구성.
 
41
  4) For reproducibility, use_fast must be explicitly persisted and honored on load.
42
+ 재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 함.
43
  """
44
 
45
  # HF vision models conventionally expect "pixel_values" as the primary input key.
46
+ # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대.
47
  model_input_names = ["pixel_values"]
48
 
49
  def __init__(
50
  self,
51
  backbone_name_or_path: BackboneID,
52
+ is_training: bool = False, # timm 에서 data augmentation 용.
53
  use_fast: bool = False,
54
  **kwargs,
55
  ):
56
  # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
57
+ # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리.
58
  super().__init__(**kwargs)
59
 
60
  # Enforce whitelist via BACKBONE_META to keep behavior stable.
61
+ # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제. - fast fail
62
  if backbone_name_or_path not in BACKBONE_META:
63
  raise ValueError(
64
  f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
 
66
  )
67
 
68
  # Serializable fields only: these should appear in preprocessor_config.json.
69
+ # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야
70
  self.backbone_name_or_path = backbone_name_or_path
71
  self.is_training = bool(is_training)
72
 
73
  # Reproducibility switch for transformers processors.
74
+ # transformers processor의 fast/slow 선택을 재현 가능하게 고정.
75
  self.use_fast = bool(use_fast)
76
 
77
  # Runtime-only fields: must never be serialized.
78
+ # 런타임 전용 필드: 절대 직렬화되면 안 됨.
79
  self._meta = None
80
+ self._delegate = None
81
+ self._timm_transform = None
82
  self._torchvision_transform = None
83
 
84
  # Build runtime objects according to backbone type.
85
+ # backbone type에 따라 런타임 객체를 구성.
86
  self._build_runtime()
87
 
88
  # ============================================================
 
92
  def _build_runtime(self):
93
  """
94
  Build runtime delegate/transform based on BACKBONE_META["type"].
95
+ BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성.
96
  """
97
  meta = BACKBONE_META[self.backbone_name_or_path]
98
  self._meta = meta
99
 
100
  # Always reset runtime fields before rebuilding.
101
+ # 재구성 전 런타임 필드는 항상 초기화.
102
  self._delegate = None
103
  self._timm_transform = None
104
  self._torchvision_transform = None
 
107
 
108
  if t == "timm_densenet":
109
  # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
110
+ # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용.
111
  self._timm_transform = self._build_timm_transform(
112
  backbone_id=self.backbone_name_or_path,
113
  is_training=self.is_training,
 
116
 
117
  if t == "torchvision_densenet":
118
  # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
119
+ # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요.
120
  self._torchvision_transform = self._build_torchvision_densenet_transform(
121
  is_training=self.is_training
122
  )
123
  return
124
 
125
  # Default: transformers backbone delegates to its official AutoImageProcessor.
126
+ # 기본: transformers 백본은 공식 AutoImageProcessor에 위임.
127
  #
128
  # IMPORTANT:
129
+ # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달.
130
  self._delegate = AutoImageProcessor.from_pretrained(
131
  self.backbone_name_or_path,
132
  use_fast=self.use_fast,
 
137
  def _build_timm_transform(*, backbone_id: str, is_training: bool):
138
  """
139
  Create timm transform without storing non-serializable objects in config.
140
+ 비직렬화 객체를 config에 저장하지 않고 timm transform을 생성.
141
  """
142
  try:
143
  import timm
 
148
  ) from e
149
 
150
  # We only need model metadata to resolve data config, so pretrained=False is preferred.
151
+ # data config 추출만 필요하므로 pretrained=False를 우선 사용.
152
  m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
153
  dc = resolve_model_data_config(m)
154
 
155
  # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
156
+ # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환.
157
+ tfm = create_transform(**dc, is_training=is_training) # is_training :Data Aug.
158
  return tfm
159
 
160
  @staticmethod
161
  def _build_torchvision_densenet_transform(*, is_training: bool):
162
  """
163
  Build torchvision preprocessing for DenseNet-121 (224 pipeline).
164
+ DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성.
165
  """
166
  try:
167
  from torchvision import transforms
 
171
  ) from e
172
 
173
  # These are the standard ImageNet normalization stats used by torchvision weights.
174
+ # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계.
175
  mean = (0.485, 0.456, 0.406)
176
+ std = (0.229, 0.224, 0.225)
177
 
178
  # Training pipeline typically uses RandomResizedCrop and horizontal flip.
179
+ # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용.
180
  if is_training:
181
  return transforms.Compose(
182
  [
183
+ # transforms.RandomResizedCrop(224),
184
+ # transforms.RandomHorizontalFlip(p=0.5),
185
+ transforms.Resize(224),
186
  transforms.ToTensor(),
187
  transforms.Normalize(mean=mean, std=std),
188
  ]
189
  )
190
 
191
  # Inference pipeline typically uses Resize(256) + CenterCrop(224).
192
+ # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용.
193
  return transforms.Compose(
194
  [
195
  transforms.Resize(256),
196
+ # transforms.CenterCrop(224),
197
  transforms.ToTensor(),
198
  transforms.Normalize(mean=mean, std=std),
199
  ]
 
206
  def to_dict(self) -> dict[str, Any]:
207
  """
208
  Return a JSON-serializable dict for preprocessor_config.json.
209
+ preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환.
210
 
211
  Important: do not leak runtime objects into the serialized dict.
212
+ 중요: 런타임 객체가 직렬화 dict에 섞이면 안 됨.
213
  """
214
  # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
215
  # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
216
  d = super().to_dict()
217
 
218
  # Force minimal stable fields for long-term compatibility.
219
+ # 장기 호환을 위해 최소 안정 필드를 강제로 지정.
220
+ d["image_processor_type"] = self.__class__.__name__
221
  d["backbone_name_or_path"] = self.backbone_name_or_path
222
  d["is_training"] = self.is_training
223
+ d["use_fast"] = self.use_fast
224
 
225
  # Remove any runtime-only fields defensively.
226
+ # 런타임 전용 필드는 보수적으로 제거.
227
  for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
228
  d.pop(key, None)
229
 
 
233
  def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
234
  """
235
  Standard load path used by BaseImageProcessor / AutoImageProcessor.
236
+ BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로임.
237
  """
238
  backbone = image_processor_dict.get("backbone_name_or_path", None)
239
  if backbone is None:
240
  raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
241
 
242
  is_training = bool(image_processor_dict.get("is_training", False))
243
+ use_fast = bool(image_processor_dict.get("use_fast", False))
244
 
245
  return cls(
246
  backbone_name_or_path=backbone,
 
253
  def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
254
  """
255
  Fallback path if AutoImageProcessor calls class.from_pretrained directly.
256
+ AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 메서드.
257
 
258
  Strategy:
259
  전략:
260
 
261
  - Read config.json via AutoConfig and recover backbone_name_or_path.
262
+ AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구.
263
  """
264
 
265
  # is_training is runtime-only and should default to False for inference/serving.
266
+ # is_training은 런타임 전용이며 추론/서빙 기본값은 False 임.
267
  #
268
  # IMPORTANT:
269
+ # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영.
270
  use_fast = bool(kwargs.pop("use_fast", False))
271
 
272
  kwargs.pop("trust_remote_code", None)
 
287
  @staticmethod
288
  def _ensure_list(images: Any) -> list[Any]:
289
  # Normalize scalar image input to a list for uniform processing.
290
+ # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용.
291
  if isinstance(images, (list, tuple)):
292
  return list(images)
293
  return [images]
 
295
  @staticmethod
296
  def _to_pil_rgb(x: Any):
297
  # Convert common image inputs into PIL RGB images.
298
+ # 일반적인 입력을 PIL RGB 이미지로 변환.
299
  from PIL import Image as PILImage
300
 
301
  if isinstance(x, PILImage.Image):
 
312
  ) -> dict[str, Any]:
313
  """
314
  Convert images into {"pixel_values": Tensor/ndarray}.
315
+ 이미지를 {"pixel_values": Tensor/ndarray}로 변환.
316
  """
317
  images = self._ensure_list(images)
318
 
319
  # Rebuild runtime if needed (e.g., right after deserialization).
320
+ # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성.
321
  if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
322
  self._build_runtime()
323
 
324
  # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
325
+ # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
326
  if self._timm_transform is not None:
327
  pv: list[torch.Tensor] = []
328
  for im in images:
 
335
  return self._format_return(pixel_values, return_tensors)
336
 
337
  # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
338
+ # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
339
  if self._torchvision_transform is not None:
340
  pv: list[torch.Tensor] = []
341
  for im in images:
 
348
  return self._format_return(pixel_values, return_tensors)
349
 
350
  # transformers delegate path: rely on official processor behavior.
351
+ # transformers 위임 경로: 공식 processor 동작을 그대로 사용.
352
  if self._delegate is None:
353
  raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
354
 
 
358
  def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
359
  """
360
  Format pixel_values according to return_tensors.
361
+ return_tensors에 맞춰 pixel_values 반환 포맷을 변환.
362
  """
363
  if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
364
  return {"pixel_values": pixel_values}
 
368
 
369
 
370
  # Register this processor for AutoImageProcessor resolution.
371
+ # AutoImageProcessor 해석을 위해 이 processor를 등록.
372
  if __name__ != "__main__":
373
  BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")
models/microsoft__swin-tiny-patch4-window7-224/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:002362a9ce13fb5833945df3227b93f727613a2630c259525ac9eb3f1e5ecf69
3
  size 111128348
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ec1d099f3a94683a6ded551823d596bc3cf56dd7d801adee5dc10c5b155e52
3
  size 111128348
models/timm__densenet121.tv_in1k/config.json CHANGED
@@ -24,7 +24,7 @@
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
- "created_at": "20260210_170738",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/timm__densenet121.tv_in1k",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
 
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
+ "created_at": "20260212_202546",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/timm__densenet121.tv_in1k",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
models/timm__densenet121.tv_in1k/ds_model.py CHANGED
@@ -94,14 +94,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
94
 
95
  def __init__(self, config: BackboneMLPHeadConfig):
96
  # PreTrainedModel expects a config object and stores it internally.
97
- # PreTrainedModel은 config 객체를 받아 내부에 저장하는 전제를 가집니다.
98
  super().__init__(config)
99
 
100
  # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
101
- # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않습니다.
102
  #
103
  # Note: Transformers may create configs with no args, but models are conventionally created with configs.
104
- # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성됩니다.
105
  if config.backbone_name_or_path is None:
106
  raise ValueError(
107
  "config.backbone_name_or_path is None. "
@@ -109,10 +109,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
109
  )
110
 
111
  # Fail-fast: training/inference requires a positive number of labels.
112
- # fail-fast: 학습/추론은 num_labels가 양수여야 합니다.
113
  #
114
  # Config may exist in a minimal form for internal serialization paths, but the model should not.
115
- # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 그러면 안 됩니다.
116
  if int(getattr(config, "num_labels", 0)) <= 0:
117
  raise ValueError(
118
  f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
@@ -120,17 +120,17 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
120
  )
121
 
122
  # Meta is a single source of truth for extraction and fine-tuning rules.
123
- # meta는 feature 추출 및 미세조정 규칙의 단일 기준입니다.
124
- # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
125
  # Prefer config.backbone_meta to keep Hub runtime self-contained.
126
  self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
127
 
128
  # Backbone skeleton is always created without pretrained weights.
129
- # backbone skeleton은 항상 pretrained weight 없이 생성합니다.
130
  self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
131
 
132
  # Head shape is driven by meta feat_dim and config.num_labels.
133
- # head shape은 meta의 feat_dim과 config.num_labels로 결정됩니다.
134
  self.classifier = MLPHead(
135
  in_dim=int(self._meta["feat_dim"]),
136
  num_labels=int(config.num_labels),
@@ -139,16 +139,20 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
139
  )
140
 
141
  # HF initialization hook, but we override init_weights to initialize head-only.
142
- # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화합니다.
143
  self.post_init()
144
 
145
  def init_weights(self):
146
  """
147
  Initialize only the head to avoid touching the backbone skeleton.
148
- backbone skeleton을 건드리지 않기 위해 head만 초기화합니다.
149
 
150
  HF's default init may traverse the entire module tree, which is undesirable here.
151
- HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서는 부적절합니다.
 
 
 
 
152
  """
153
  if getattr(self, "classifier", None) is not None:
154
  self.classifier.apply(self._init_weights)
@@ -160,7 +164,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
160
  # ----------------------------
161
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
162
  # Meta decides which loader path to use.
163
- # meta가 어떤 로더 경로를 사용할지 결정합니다.
164
  meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
165
  if meta is None:
166
  raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
@@ -174,14 +178,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
174
  return self._build_torchvision_densenet_skeleton(backbone_id)
175
 
176
  # For transformers backbones: build a random-weight skeleton from config only.
177
- # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성합니다.
178
  bb_cfg = AutoConfig.from_pretrained(backbone_id)
179
  return AutoModel.from_config(bb_cfg)
180
 
181
  @staticmethod
182
  def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
183
  # timm is an optional dependency and should be imported lazily.
184
- # timm은 옵션 의존성이므로 지연 import 합니다.
185
  try:
186
  import timm
187
  except Exception as e:
@@ -190,7 +194,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
190
  ) from e
191
 
192
  # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
193
- # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0)합니다.
194
  return timm.create_model(
195
  f"hf_hub:{hf_repo_id}",
196
  pretrained=False,
@@ -200,12 +204,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
200
  @staticmethod
201
  def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
202
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
203
- # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
204
  if model_id != "torchvision/densenet121":
205
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
206
 
207
  # Build structure only (weights=None) to avoid implicit pretrained loading.
208
- # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None)합니다.
209
  m = tv_models.densenet121(weights=None)
210
  return m
211
 
@@ -222,10 +226,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
222
  ):
223
  """
224
  Fresh-start only: inject pretrained backbone weights into the skeleton.
225
- fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입합니다.
226
 
227
  Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
228
- from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 절대 호출하면 안 됩니다.
229
  """
230
  bb = self.config.backbone_name_or_path
231
  meta = self._meta
@@ -240,7 +244,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
240
  return
241
 
242
  # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
243
- # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사합니다.
244
  ref = AutoModel.from_pretrained(
245
  bb,
246
  low_cpu_mem_usage=low_cpu_mem_usage,
@@ -248,18 +252,18 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
248
  )
249
 
250
  # strict=False is used to tolerate harmless key differences across minor versions.
251
- # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용합니다.
252
  self.backbone.load_state_dict(ref.state_dict(), strict=False)
253
  del ref
254
 
255
  @torch.no_grad()
256
  def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
257
  # timm must be present for timm backbones.
258
- # timm 백본에는 timm 설치가 필요합니다.
259
  import timm
260
 
261
  # Create a pretrained reference model and copy its weights strictly.
262
- # pretrained reference 모델을 만들고 가중치를 strict하게 복사합니다.
263
  ref = timm.create_model(
264
  f"hf_hub:{hf_repo_id}",
265
  pretrained=True,
@@ -272,12 +276,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
272
  @torch.no_grad()
273
  def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
274
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
275
- # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
276
  if model_id != "torchvision/densenet121":
277
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
278
 
279
  # Use torchvision's default pretrained weights for densenet121.
280
- # torchvision의 densenet121 기본 pretrained weights를 사용합니다.
281
  ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
282
 
283
  self.backbone.load_state_dict(ref.state_dict(), strict=True)
@@ -290,7 +294,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
290
  @staticmethod
291
  def _pool_or_gap(outputs) -> torch.Tensor:
292
  # Some transformers vision CNNs provide pooler_output explicitly.
293
- # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공합니다.
294
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
295
  x = outputs.pooler_output
296
  if x.dim() == 2:
@@ -300,7 +304,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
300
  raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
301
 
302
  # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
303
- # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용합니다.
304
  x = outputs.last_hidden_state
305
  if x.dim() == 4:
306
  return x.mean(dim=(2, 3))
@@ -312,29 +316,29 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
312
 
313
  def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
314
  # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
315
- # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적으로 유지되어야 합니다.
316
  rule = self._meta["feat_rule"]
317
 
318
  if rule == "cls":
319
  # ViT-style: use CLS token embedding from last_hidden_state.
320
- # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용합니다.
321
  return outputs.last_hidden_state[:, 0, :]
322
 
323
  if rule == "pool_or_mean":
324
  # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
325
- # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용합니다.
326
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
327
  return outputs.pooler_output
328
  return outputs.last_hidden_state.mean(dim=1)
329
 
330
  if rule == "pool_or_gap":
331
  # CNN-style: use pooler_output if present, else GAP over spatial dims.
332
- # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용합니다.
333
  return self._pool_or_gap(outputs)
334
 
335
  if rule == "timm_gap":
336
  # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
337
- # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 만듭니다.
338
  if not isinstance(outputs, torch.Tensor):
339
  raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
340
  if outputs.dim() != 4:
@@ -343,7 +347,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
343
 
344
  if rule == "torchvision_densenet_gap":
345
  # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
346
- # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요합니다.
347
  if not isinstance(outputs, torch.Tensor):
348
  raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
349
  if outputs.dim() != 4:
@@ -362,7 +366,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
362
  **kwargs,
363
  ):
364
  # Type decides the backbone forward path and output format.
365
- # type이 backbone forward 경로 및 출력 포맷을 결정합니다.
366
  t = self._meta["type"]
367
 
368
  if t == "timm_densenet":
@@ -394,7 +398,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
394
 
395
  else:
396
  # Transformers vision models are called with pixel_values and return ModelOutput.
397
- # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환합니다.
398
  outputs = self.backbone(
399
  pixel_values=pixel_values,
400
  output_attentions=output_attentions,
@@ -407,13 +411,13 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
407
  attentions = getattr(outputs, "attentions", None)
408
 
409
  # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
410
- # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환합니다.
411
  logits = self.classifier(feats)
412
 
413
  loss = None
414
  if labels is not None:
415
  # Cross entropy expects labels as class indices in [0, num_labels).
416
- # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대합니다.
417
  loss = F.cross_entropy(logits, labels)
418
 
419
  if not return_dict:
@@ -434,14 +438,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
434
  # ============================================================
435
  def _set_requires_grad(module: nn.Module, flag: bool):
436
  # Toggle requires_grad for all parameters in a module.
437
- # 모듈의 모든 파라미터에 대해 requires_grad를 토글합니다.
438
  for p in module.parameters():
439
  p.requires_grad = flag
440
 
441
 
442
  def set_bn_eval(module: nn.Module):
443
  # Put BatchNorm layers into eval mode to freeze running stats.
444
- # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정합니다.
445
  for m in module.modules():
446
  if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
447
  m.eval()
@@ -449,7 +453,7 @@ def set_bn_eval(module: nn.Module):
449
 
450
  def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
451
  # Stage1: freeze backbone and train only the head.
452
- # stage1: backbone을 freeze하고 head만 학습합니다.
453
  _set_requires_grad(model.backbone, False)
454
  _set_requires_grad(model.classifier, True)
455
 
@@ -460,7 +464,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
460
 
461
  def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
462
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
463
- # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있습니다.
464
  model.train()
465
  meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
466
  if keep_bn_eval and meta.get("has_bn", False):
@@ -469,7 +473,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
469
 
470
  def trainable_summary(model: nn.Module):
471
  # Print a compact summary of trainable parameters.
472
- # 학습 가능 파라미터 요약을 간단히 출력합니다.
473
  total = sum(p.numel() for p in model.parameters())
474
  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
475
  ratio = trainable / total if total > 0 else 0.0
@@ -483,7 +487,7 @@ def unfreeze_last_stage(
483
  keep_bn_eval: bool = True,
484
  ):
485
  # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
486
- # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 ���현합니다.
487
  freeze_backbone(model, freeze_bn=keep_bn_eval)
488
 
489
  n = int(last_n)
@@ -498,7 +502,7 @@ def unfreeze_last_stage(
498
 
499
  if bb_type == "vit":
500
  # ViT blocks live under backbone.encoder.layer in the transformers implementation.
501
- # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 있습니다.
502
  blocks = list(model.backbone.encoder.layer)
503
  for blk in blocks[-n:]:
504
  _set_requires_grad(blk, True)
@@ -506,7 +510,7 @@ def unfreeze_last_stage(
506
 
507
  if bb_type == "swin":
508
  # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
509
- # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze 합니다.
510
  stages = list(model.backbone.encoder.layers)
511
  blocks: List[nn.Module] = []
512
  for st in stages:
@@ -517,7 +521,7 @@ def unfreeze_last_stage(
517
 
518
  if bb_type == "resnet":
519
  # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
520
- # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze 합니다.
521
  bb = model.backbone
522
  for name in ("layer1", "layer2", "layer3", "layer4"):
523
  if not hasattr(bb, name):
@@ -538,7 +542,7 @@ def unfreeze_last_stage(
538
 
539
  if bb_type == "efficientnet":
540
  # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
541
- # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze 합니다.
542
  bb = model.backbone
543
  if not hasattr(bb, "features"):
544
  raise RuntimeError("Unexpected EfficientNet structure: missing features")
@@ -556,7 +560,7 @@ def unfreeze_last_stage(
556
 
557
  if bb_type in ("timm_densenet", "torchvision_densenet"):
558
  # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
559
- # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze 합니다.
560
  bb = model.backbone
561
  if not hasattr(bb, "features"):
562
  raise RuntimeError("Unexpected DenseNet: missing features")
@@ -575,7 +579,7 @@ def unfreeze_last_stage(
575
 
576
  def _denselayers(db: nn.Module) -> List[nn.Module]:
577
  # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
578
- # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환합니다.
579
  return list(db.children())
580
 
581
  blocks: List[nn.Module] = []
@@ -600,5 +604,5 @@ def unfreeze_last_stage(
600
  # register
601
  # -------------------------
602
  # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
603
- # from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록합니다.
604
  BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")
 
94
 
95
  def __init__(self, config: BackboneMLPHeadConfig):
96
  # PreTrainedModel expects a config object and stores it internally.
97
+ # PreTrainedModel은 config 객체를 받아 내부에 저장함.
98
  super().__init__(config)
99
 
100
  # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
101
+ # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않음 - fast fail.
102
  #
103
  # Note: Transformers may create configs with no args, but models are conventionally created with configs.
104
+ # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성.
105
  if config.backbone_name_or_path is None:
106
  raise ValueError(
107
  "config.backbone_name_or_path is None. "
 
109
  )
110
 
111
  # Fail-fast: training/inference requires a positive number of labels.
112
+ # fail-fast: 학습/추론은 num_labels가 양수여야 함.
113
  #
114
  # Config may exist in a minimal form for internal serialization paths, but the model should not.
115
+ # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 해당 없음.
116
  if int(getattr(config, "num_labels", 0)) <= 0:
117
  raise ValueError(
118
  f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
 
120
  )
121
 
122
  # Meta is a single source of truth for extraction and fine-tuning rules.
123
+ # meta는 feature 추출 및 미세조정 규칙의 단일 기준.
124
+ # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
125
  # Prefer config.backbone_meta to keep Hub runtime self-contained.
126
  self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
127
 
128
  # Backbone skeleton is always created without pretrained weights.
129
+ # backbone skeleton은 항상 pretrained weight 없이 생성.
130
  self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
131
 
132
  # Head shape is driven by meta feat_dim and config.num_labels.
133
+ # head shape은 meta의 feat_dim과 config.num_labels로 결정.
134
  self.classifier = MLPHead(
135
  in_dim=int(self._meta["feat_dim"]),
136
  num_labels=int(config.num_labels),
 
139
  )
140
 
141
  # HF initialization hook, but we override init_weights to initialize head-only.
142
+ # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화하도록 변경.
143
  self.post_init()
144
 
145
  def init_weights(self):
146
  """
147
  Initialize only the head to avoid touching the backbone skeleton.
148
+ backbone skeleton을 건드리지 않기 위해 head만 초기화.
149
 
150
  HF's default init may traverse the entire module tree, which is undesirable here.
151
+ HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서 그대로 사용하기 부적절.
152
+
153
+ 초기 설계에서 __init__ 내부에서 backbone의 가중치 로드를 수행함(편리를 위해).
154
+ 이 경우, HF의 post_init()으로 인해 해당 로드가 취소되는 경우가 존재(timm, torchvision 등의 백본).
155
+ 때문에 이를 오버라이드 하여 classifier만 초기화 하도록 변경함.
156
  """
157
  if getattr(self, "classifier", None) is not None:
158
  self.classifier.apply(self._init_weights)
 
164
  # ----------------------------
165
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
166
  # Meta decides which loader path to use.
167
+ # meta가 어떤 로더 경로를 사용할지 결정.
168
  meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
169
  if meta is None:
170
  raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
 
178
  return self._build_torchvision_densenet_skeleton(backbone_id)
179
 
180
  # For transformers backbones: build a random-weight skeleton from config only.
181
+ # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성.
182
  bb_cfg = AutoConfig.from_pretrained(backbone_id)
183
  return AutoModel.from_config(bb_cfg)
184
 
185
  @staticmethod
186
  def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
187
  # timm is an optional dependency and should be imported lazily.
188
+ # timm은 옵션 의존성이므로 지연 import 수행.
189
  try:
190
  import timm
191
  except Exception as e:
 
194
  ) from e
195
 
196
  # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
197
+ # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0).
198
  return timm.create_model(
199
  f"hf_hub:{hf_repo_id}",
200
  pretrained=False,
 
204
  @staticmethod
205
  def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
206
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
207
+ # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원.
208
  if model_id != "torchvision/densenet121":
209
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
210
 
211
  # Build structure only (weights=None) to avoid implicit pretrained loading.
212
+ # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None).
213
  m = tv_models.densenet121(weights=None)
214
  return m
215
 
 
226
  ):
227
  """
228
  Fresh-start only: inject pretrained backbone weights into the skeleton.
229
+ fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입.
230
 
231
  Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
232
+ from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 주의할 것.
233
  """
234
  bb = self.config.backbone_name_or_path
235
  meta = self._meta
 
244
  return
245
 
246
  # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
247
+ # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사.
248
  ref = AutoModel.from_pretrained(
249
  bb,
250
  low_cpu_mem_usage=low_cpu_mem_usage,
 
252
  )
253
 
254
  # strict=False is used to tolerate harmless key differences across minor versions.
255
+ # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용.
256
  self.backbone.load_state_dict(ref.state_dict(), strict=False)
257
  del ref
258
 
259
  @torch.no_grad()
260
  def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
261
  # timm must be present for timm backbones.
262
+ # timm 백본에��� timm 설치가 필요.
263
  import timm
264
 
265
  # Create a pretrained reference model and copy its weights strictly.
266
+ # pretrained reference 모델을 만들고 가중치를 strict하게 복사.
267
  ref = timm.create_model(
268
  f"hf_hub:{hf_repo_id}",
269
  pretrained=True,
 
276
  @torch.no_grad()
277
  def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
278
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
279
+ # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 지원.
280
  if model_id != "torchvision/densenet121":
281
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
282
 
283
  # Use torchvision's default pretrained weights for densenet121.
284
+ # torchvision의 densenet121 기본 pretrained weights를 사용.
285
  ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
286
 
287
  self.backbone.load_state_dict(ref.state_dict(), strict=True)
 
294
  @staticmethod
295
  def _pool_or_gap(outputs) -> torch.Tensor:
296
  # Some transformers vision CNNs provide pooler_output explicitly.
297
+ # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공.
298
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
299
  x = outputs.pooler_output
300
  if x.dim() == 2:
 
304
  raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
305
 
306
  # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
307
+ # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용.
308
  x = outputs.last_hidden_state
309
  if x.dim() == 4:
310
  return x.mean(dim=(2, 3))
 
316
 
317
  def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
318
  # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
319
+ # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적 동작을 위해 제한된 모델만 사용.
320
  rule = self._meta["feat_rule"]
321
 
322
  if rule == "cls":
323
  # ViT-style: use CLS token embedding from last_hidden_state.
324
+ # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용.
325
  return outputs.last_hidden_state[:, 0, :]
326
 
327
  if rule == "pool_or_mean":
328
  # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
329
+ # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용.
330
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
331
  return outputs.pooler_output
332
  return outputs.last_hidden_state.mean(dim=1)
333
 
334
  if rule == "pool_or_gap":
335
  # CNN-style: use pooler_output if present, else GAP over spatial dims.
336
+ # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용.
337
  return self._pool_or_gap(outputs)
338
 
339
  if rule == "timm_gap":
340
  # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
341
+ # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 변환.
342
  if not isinstance(outputs, torch.Tensor):
343
  raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
344
  if outputs.dim() != 4:
 
347
 
348
  if rule == "torchvision_densenet_gap":
349
  # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
350
+ # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요.
351
  if not isinstance(outputs, torch.Tensor):
352
  raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
353
  if outputs.dim() != 4:
 
366
  **kwargs,
367
  ):
368
  # Type decides the backbone forward path and output format.
369
+ # type이 backbone forward 경로 및 출력 포맷을 결정.
370
  t = self._meta["type"]
371
 
372
  if t == "timm_densenet":
 
398
 
399
  else:
400
  # Transformers vision models are called with pixel_values and return ModelOutput.
401
+ # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환.
402
  outputs = self.backbone(
403
  pixel_values=pixel_values,
404
  output_attentions=output_attentions,
 
411
  attentions = getattr(outputs, "attentions", None)
412
 
413
  # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
414
+ # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환.
415
  logits = self.classifier(feats)
416
 
417
  loss = None
418
  if labels is not None:
419
  # Cross entropy expects labels as class indices in [0, num_labels).
420
+ # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대함.
421
  loss = F.cross_entropy(logits, labels)
422
 
423
  if not return_dict:
 
438
  # ============================================================
439
  def _set_requires_grad(module: nn.Module, flag: bool):
440
  # Toggle requires_grad for all parameters in a module.
441
+ # 모듈의 모든 파라미터에 대해 requires_grad를 토글.
442
  for p in module.parameters():
443
  p.requires_grad = flag
444
 
445
 
446
  def set_bn_eval(module: nn.Module):
447
  # Put BatchNorm layers into eval mode to freeze running stats.
448
+ # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정.
449
  for m in module.modules():
450
  if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
451
  m.eval()
 
453
 
454
  def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
455
  # Stage1: freeze backbone and train only the head.
456
+ # stage1: backbone을 freeze하고 head만 학습.
457
  _set_requires_grad(model.backbone, False)
458
  _set_requires_grad(model.classifier, True)
459
 
 
464
 
465
  def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
466
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
467
+ # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있음. (buffer 등을 유지하기 위해)
468
  model.train()
469
  meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
470
  if keep_bn_eval and meta.get("has_bn", False):
 
473
 
474
  def trainable_summary(model: nn.Module):
475
  # Print a compact summary of trainable parameters.
476
+ # 학습 가능 파라미터 요약을 간단히 출력.
477
  total = sum(p.numel() for p in model.parameters())
478
  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
479
  ratio = trainable / total if total > 0 else 0.0
 
487
  keep_bn_eval: bool = True,
488
  ):
489
  # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
490
+ # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 구현.
491
  freeze_backbone(model, freeze_bn=keep_bn_eval)
492
 
493
  n = int(last_n)
 
502
 
503
  if bb_type == "vit":
504
  # ViT blocks live under backbone.encoder.layer in the transformers implementation.
505
+ # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 존재함.
506
  blocks = list(model.backbone.encoder.layer)
507
  for blk in blocks[-n:]:
508
  _set_requires_grad(blk, True)
 
510
 
511
  if bb_type == "swin":
512
  # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
513
+ # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze.
514
  stages = list(model.backbone.encoder.layers)
515
  blocks: List[nn.Module] = []
516
  for st in stages:
 
521
 
522
  if bb_type == "resnet":
523
  # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
524
+ # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze.
525
  bb = model.backbone
526
  for name in ("layer1", "layer2", "layer3", "layer4"):
527
  if not hasattr(bb, name):
 
542
 
543
  if bb_type == "efficientnet":
544
  # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
545
+ # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze.
546
  bb = model.backbone
547
  if not hasattr(bb, "features"):
548
  raise RuntimeError("Unexpected EfficientNet structure: missing features")
 
560
 
561
  if bb_type in ("timm_densenet", "torchvision_densenet"):
562
  # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
563
+ # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze.
564
  bb = model.backbone
565
  if not hasattr(bb, "features"):
566
  raise RuntimeError("Unexpected DenseNet: missing features")
 
579
 
580
  def _denselayers(db: nn.Module) -> List[nn.Module]:
581
  # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
582
+ # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환.
583
  return list(db.children())
584
 
585
  blocks: List[nn.Module] = []
 
604
  # register
605
  # -------------------------
606
  # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
607
+ # from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록.
608
  BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")
models/timm__densenet121.tv_in1k/ds_proc.py CHANGED
@@ -4,8 +4,8 @@
4
  # src/ds_proc.py
5
 
6
  # ============================================================
7
- # (4) ImageProcessor (AutoImageProcessor integration)
8
- # (4) ImageProcessor (AutoImageProcessor 연동)
9
  # ============================================================
10
 
11
  from typing import Any
@@ -27,41 +27,38 @@ except ImportError:
27
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
28
  """
29
  This processor performs image preprocessing and outputs {"pixel_values": ...}.
30
- 이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 출력합니다.
31
 
32
  Key requirements:
33
  핵심 요구사항:
34
 
35
  1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
36
- save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 합니다.
37
-
38
  2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
39
- 런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됩니다.
40
-
41
  3) Runtime objects are rebuilt at init/load time based on backbone meta.
42
- 런타임 객체는 backbone meta에 따라 init/load 시점에 재구성됩니다.
43
-
44
  4) For reproducibility, use_fast must be explicitly persisted and honored on load.
45
- 재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 합니다.
46
  """
47
 
48
  # HF vision models conventionally expect "pixel_values" as the primary input key.
49
- # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대합니다.
50
  model_input_names = ["pixel_values"]
51
 
52
  def __init__(
53
  self,
54
  backbone_name_or_path: BackboneID,
55
- is_training: bool = False,
56
  use_fast: bool = False,
57
  **kwargs,
58
  ):
59
  # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
60
- # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리합니다.
61
  super().__init__(**kwargs)
62
 
63
  # Enforce whitelist via BACKBONE_META to keep behavior stable.
64
- # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제합니다.
65
  if backbone_name_or_path not in BACKBONE_META:
66
  raise ValueError(
67
  f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
@@ -69,23 +66,23 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
69
  )
70
 
71
  # Serializable fields only: these should appear in preprocessor_config.json.
72
- # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 합니다.
73
  self.backbone_name_or_path = backbone_name_or_path
74
  self.is_training = bool(is_training)
75
 
76
  # Reproducibility switch for transformers processors.
77
- # transformers processor의 fast/slow 선택을 재현 가능하게 고정합니다.
78
  self.use_fast = bool(use_fast)
79
 
80
  # Runtime-only fields: must never be serialized.
81
- # 런타임 전용 필드: 절대 직렬화되면 안 됩니다.
82
  self._meta = None
83
- self._delegate = None
84
- self._timm_transform = None
85
  self._torchvision_transform = None
86
 
87
  # Build runtime objects according to backbone type.
88
- # backbone type에 따라 런타임 객체를 구성합니다.
89
  self._build_runtime()
90
 
91
  # ============================================================
@@ -95,13 +92,13 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
95
  def _build_runtime(self):
96
  """
97
  Build runtime delegate/transform based on BACKBONE_META["type"].
98
- BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성합니다.
99
  """
100
  meta = BACKBONE_META[self.backbone_name_or_path]
101
  self._meta = meta
102
 
103
  # Always reset runtime fields before rebuilding.
104
- # 재구성 전 런타임 필드는 항상 초기화합니다.
105
  self._delegate = None
106
  self._timm_transform = None
107
  self._torchvision_transform = None
@@ -110,7 +107,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
110
 
111
  if t == "timm_densenet":
112
  # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
113
- # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용합니다.
114
  self._timm_transform = self._build_timm_transform(
115
  backbone_id=self.backbone_name_or_path,
116
  is_training=self.is_training,
@@ -119,17 +116,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
119
 
120
  if t == "torchvision_densenet":
121
  # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
122
- # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요합니다.
123
  self._torchvision_transform = self._build_torchvision_densenet_transform(
124
  is_training=self.is_training
125
  )
126
  return
127
 
128
  # Default: transformers backbone delegates to its official AutoImageProcessor.
129
- # 기본: transformers 백본은 공식 AutoImageProcessor에 위임합니다.
130
  #
131
  # IMPORTANT:
132
- # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달합니다.
133
  self._delegate = AutoImageProcessor.from_pretrained(
134
  self.backbone_name_or_path,
135
  use_fast=self.use_fast,
@@ -140,7 +137,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
140
  def _build_timm_transform(*, backbone_id: str, is_training: bool):
141
  """
142
  Create timm transform without storing non-serializable objects in config.
143
- 비직렬화 객체를 config에 저장하지 않고 timm transform을 생성합니다.
144
  """
145
  try:
146
  import timm
@@ -151,20 +148,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
151
  ) from e
152
 
153
  # We only need model metadata to resolve data config, so pretrained=False is preferred.
154
- # data config 추출만 필요하므로 pretrained=False를 우선 사용합니다.
155
  m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
156
  dc = resolve_model_data_config(m)
157
 
158
  # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
159
- # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환합니다.
160
- tfm = create_transform(**dc, is_training=is_training)
161
  return tfm
162
 
163
  @staticmethod
164
  def _build_torchvision_densenet_transform(*, is_training: bool):
165
  """
166
  Build torchvision preprocessing for DenseNet-121 (224 pipeline).
167
- DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성합니다.
168
  """
169
  try:
170
  from torchvision import transforms
@@ -174,28 +171,29 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
174
  ) from e
175
 
176
  # These are the standard ImageNet normalization stats used by torchvision weights.
177
- # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계입니다.
178
  mean = (0.485, 0.456, 0.406)
179
- std = (0.229, 0.224, 0.225)
180
 
181
  # Training pipeline typically uses RandomResizedCrop and horizontal flip.
182
- # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용합니다.
183
  if is_training:
184
  return transforms.Compose(
185
  [
186
- transforms.RandomResizedCrop(224),
187
- transforms.RandomHorizontalFlip(p=0.5),
 
188
  transforms.ToTensor(),
189
  transforms.Normalize(mean=mean, std=std),
190
  ]
191
  )
192
 
193
  # Inference pipeline typically uses Resize(256) + CenterCrop(224).
194
- # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용합니다.
195
  return transforms.Compose(
196
  [
197
  transforms.Resize(256),
198
- transforms.CenterCrop(224),
199
  transforms.ToTensor(),
200
  transforms.Normalize(mean=mean, std=std),
201
  ]
@@ -208,24 +206,24 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
208
  def to_dict(self) -> dict[str, Any]:
209
  """
210
  Return a JSON-serializable dict for preprocessor_config.json.
211
- preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환합니다.
212
 
213
  Important: do not leak runtime objects into the serialized dict.
214
- 중요: 런타임 객체가 직렬화 dict에 섞이면 안 됩니다.
215
  """
216
  # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
217
  # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
218
  d = super().to_dict()
219
 
220
  # Force minimal stable fields for long-term compatibility.
221
- # 장기 호환을 위해 최소 안정 필드를 강제합니다.
222
- d["image_processor_type"] = self.__class__.__name__
223
  d["backbone_name_or_path"] = self.backbone_name_or_path
224
  d["is_training"] = self.is_training
225
- d["use_fast"] = self.use_fast
226
 
227
  # Remove any runtime-only fields defensively.
228
- # 런타임 전용 필드는 보수적으로 제거합니다.
229
  for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
230
  d.pop(key, None)
231
 
@@ -235,14 +233,14 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
235
  def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
236
  """
237
  Standard load path used by BaseImageProcessor / AutoImageProcessor.
238
- BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로입니다.
239
  """
240
  backbone = image_processor_dict.get("backbone_name_or_path", None)
241
  if backbone is None:
242
  raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
243
 
244
  is_training = bool(image_processor_dict.get("is_training", False))
245
- use_fast = bool(image_processor_dict.get("use_fast", False))
246
 
247
  return cls(
248
  backbone_name_or_path=backbone,
@@ -255,20 +253,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
255
  def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
256
  """
257
  Fallback path if AutoImageProcessor calls class.from_pretrained directly.
258
- AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 경로입니다.
259
 
260
  Strategy:
261
  전략:
262
 
263
  - Read config.json via AutoConfig and recover backbone_name_or_path.
264
- AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구합니다.
265
  """
266
 
267
  # is_training is runtime-only and should default to False for inference/serving.
268
- # is_training은 런타임 전용이며 추론/서빙 기본값은 False 맞습니다.
269
  #
270
  # IMPORTANT:
271
- # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영합니다.
272
  use_fast = bool(kwargs.pop("use_fast", False))
273
 
274
  kwargs.pop("trust_remote_code", None)
@@ -289,7 +287,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
289
  @staticmethod
290
  def _ensure_list(images: Any) -> list[Any]:
291
  # Normalize scalar image input to a list for uniform processing.
292
- # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용합니다.
293
  if isinstance(images, (list, tuple)):
294
  return list(images)
295
  return [images]
@@ -297,7 +295,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
297
  @staticmethod
298
  def _to_pil_rgb(x: Any):
299
  # Convert common image inputs into PIL RGB images.
300
- # 일반적인 입력을 PIL RGB 이미지로 변환합니다.
301
  from PIL import Image as PILImage
302
 
303
  if isinstance(x, PILImage.Image):
@@ -314,17 +312,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
314
  ) -> dict[str, Any]:
315
  """
316
  Convert images into {"pixel_values": Tensor/ndarray}.
317
- 이미지를 {"pixel_values": Tensor/ndarray}로 변환합니다.
318
  """
319
  images = self._ensure_list(images)
320
 
321
  # Rebuild runtime if needed (e.g., right after deserialization).
322
- # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성합니다.
323
  if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
324
  self._build_runtime()
325
 
326
  # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
327
- # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
328
  if self._timm_transform is not None:
329
  pv: list[torch.Tensor] = []
330
  for im in images:
@@ -337,7 +335,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
337
  return self._format_return(pixel_values, return_tensors)
338
 
339
  # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
340
- # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
341
  if self._torchvision_transform is not None:
342
  pv: list[torch.Tensor] = []
343
  for im in images:
@@ -350,7 +348,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
350
  return self._format_return(pixel_values, return_tensors)
351
 
352
  # transformers delegate path: rely on official processor behavior.
353
- # transformers 위임 경로: 공식 processor 동작을 그대로 사용합니다.
354
  if self._delegate is None:
355
  raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
356
 
@@ -360,7 +358,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
360
  def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
361
  """
362
  Format pixel_values according to return_tensors.
363
- return_tensors에 맞춰 pixel_values 반환 포맷을 맞춥니다.
364
  """
365
  if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
366
  return {"pixel_values": pixel_values}
@@ -370,6 +368,6 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
370
 
371
 
372
  # Register this processor for AutoImageProcessor resolution.
373
- # AutoImageProcessor 해석을 위해 이 processor를 등록합니다.
374
  if __name__ != "__main__":
375
  BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")
 
4
  # src/ds_proc.py
5
 
6
  # ============================================================
7
+ # ImageProcessor (AutoImageProcessor integration)
8
+ # ImageProcessor (AutoImageProcessor 연동)
9
  # ============================================================
10
 
11
  from typing import Any
 
27
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
28
  """
29
  This processor performs image preprocessing and outputs {"pixel_values": ...}.
30
+ 이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 반환함.
31
 
32
  Key requirements:
33
  핵심 요구사항:
34
 
35
  1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
36
+ save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 함.
 
37
  2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
38
+ 런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됨.
 
39
  3) Runtime objects are rebuilt at init/load time based on backbone meta.
40
+ 런타임 객체는 backbone meta에 따라 init/load 시점에 재구성.
 
41
  4) For reproducibility, use_fast must be explicitly persisted and honored on load.
42
+ 재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 함.
43
  """
44
 
45
  # HF vision models conventionally expect "pixel_values" as the primary input key.
46
+ # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대.
47
  model_input_names = ["pixel_values"]
48
 
49
  def __init__(
50
  self,
51
  backbone_name_or_path: BackboneID,
52
+ is_training: bool = False, # timm 에서 data augmentation 용.
53
  use_fast: bool = False,
54
  **kwargs,
55
  ):
56
  # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
57
+ # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리.
58
  super().__init__(**kwargs)
59
 
60
  # Enforce whitelist via BACKBONE_META to keep behavior stable.
61
+ # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제. - fast fail
62
  if backbone_name_or_path not in BACKBONE_META:
63
  raise ValueError(
64
  f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
 
66
  )
67
 
68
  # Serializable fields only: these should appear in preprocessor_config.json.
69
+ # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야
70
  self.backbone_name_or_path = backbone_name_or_path
71
  self.is_training = bool(is_training)
72
 
73
  # Reproducibility switch for transformers processors.
74
+ # transformers processor의 fast/slow 선택을 재현 가능하게 고정.
75
  self.use_fast = bool(use_fast)
76
 
77
  # Runtime-only fields: must never be serialized.
78
+ # 런타임 전용 필드: 절대 직렬화되면 안 됨.
79
  self._meta = None
80
+ self._delegate = None
81
+ self._timm_transform = None
82
  self._torchvision_transform = None
83
 
84
  # Build runtime objects according to backbone type.
85
+ # backbone type에 따라 런타임 객체를 구성.
86
  self._build_runtime()
87
 
88
  # ============================================================
 
92
  def _build_runtime(self):
93
  """
94
  Build runtime delegate/transform based on BACKBONE_META["type"].
95
+ BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성.
96
  """
97
  meta = BACKBONE_META[self.backbone_name_or_path]
98
  self._meta = meta
99
 
100
  # Always reset runtime fields before rebuilding.
101
+ # 재구성 전 런타임 필드는 항상 초기화.
102
  self._delegate = None
103
  self._timm_transform = None
104
  self._torchvision_transform = None
 
107
 
108
  if t == "timm_densenet":
109
  # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
110
+ # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용.
111
  self._timm_transform = self._build_timm_transform(
112
  backbone_id=self.backbone_name_or_path,
113
  is_training=self.is_training,
 
116
 
117
  if t == "torchvision_densenet":
118
  # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
119
+ # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요.
120
  self._torchvision_transform = self._build_torchvision_densenet_transform(
121
  is_training=self.is_training
122
  )
123
  return
124
 
125
  # Default: transformers backbone delegates to its official AutoImageProcessor.
126
+ # 기본: transformers 백본은 공식 AutoImageProcessor에 위임.
127
  #
128
  # IMPORTANT:
129
+ # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달.
130
  self._delegate = AutoImageProcessor.from_pretrained(
131
  self.backbone_name_or_path,
132
  use_fast=self.use_fast,
 
137
  def _build_timm_transform(*, backbone_id: str, is_training: bool):
138
  """
139
  Create timm transform without storing non-serializable objects in config.
140
+ 비직렬화 객체를 config에 저장하지 않고 timm transform을 생성.
141
  """
142
  try:
143
  import timm
 
148
  ) from e
149
 
150
  # We only need model metadata to resolve data config, so pretrained=False is preferred.
151
+ # data config 추출만 필요하므로 pretrained=False를 우선 사용.
152
  m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
153
  dc = resolve_model_data_config(m)
154
 
155
  # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
156
+ # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환.
157
+ tfm = create_transform(**dc, is_training=is_training) # is_training :Data Aug.
158
  return tfm
159
 
160
  @staticmethod
161
  def _build_torchvision_densenet_transform(*, is_training: bool):
162
  """
163
  Build torchvision preprocessing for DenseNet-121 (224 pipeline).
164
+ DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성.
165
  """
166
  try:
167
  from torchvision import transforms
 
171
  ) from e
172
 
173
  # These are the standard ImageNet normalization stats used by torchvision weights.
174
+ # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계.
175
  mean = (0.485, 0.456, 0.406)
176
+ std = (0.229, 0.224, 0.225)
177
 
178
  # Training pipeline typically uses RandomResizedCrop and horizontal flip.
179
+ # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용.
180
  if is_training:
181
  return transforms.Compose(
182
  [
183
+ # transforms.RandomResizedCrop(224),
184
+ # transforms.RandomHorizontalFlip(p=0.5),
185
+ transforms.Resize(224),
186
  transforms.ToTensor(),
187
  transforms.Normalize(mean=mean, std=std),
188
  ]
189
  )
190
 
191
  # Inference pipeline typically uses Resize(256) + CenterCrop(224).
192
+ # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용.
193
  return transforms.Compose(
194
  [
195
  transforms.Resize(256),
196
+ # transforms.CenterCrop(224),
197
  transforms.ToTensor(),
198
  transforms.Normalize(mean=mean, std=std),
199
  ]
 
206
  def to_dict(self) -> dict[str, Any]:
207
  """
208
  Return a JSON-serializable dict for preprocessor_config.json.
209
+ preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환.
210
 
211
  Important: do not leak runtime objects into the serialized dict.
212
+ 중요: 런타임 객체가 직렬화 dict에 섞이면 안 됨.
213
  """
214
  # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
215
  # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
216
  d = super().to_dict()
217
 
218
  # Force minimal stable fields for long-term compatibility.
219
+ # 장기 호환을 위해 최소 안정 필드를 강제로 지정.
220
+ d["image_processor_type"] = self.__class__.__name__
221
  d["backbone_name_or_path"] = self.backbone_name_or_path
222
  d["is_training"] = self.is_training
223
+ d["use_fast"] = self.use_fast
224
 
225
  # Remove any runtime-only fields defensively.
226
+ # 런타임 전용 필드는 보수적으로 제거.
227
  for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
228
  d.pop(key, None)
229
 
 
233
  def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
234
  """
235
  Standard load path used by BaseImageProcessor / AutoImageProcessor.
236
+ BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로임.
237
  """
238
  backbone = image_processor_dict.get("backbone_name_or_path", None)
239
  if backbone is None:
240
  raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
241
 
242
  is_training = bool(image_processor_dict.get("is_training", False))
243
+ use_fast = bool(image_processor_dict.get("use_fast", False))
244
 
245
  return cls(
246
  backbone_name_or_path=backbone,
 
253
  def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
254
  """
255
  Fallback path if AutoImageProcessor calls class.from_pretrained directly.
256
+ AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 메서드.
257
 
258
  Strategy:
259
  전략:
260
 
261
  - Read config.json via AutoConfig and recover backbone_name_or_path.
262
+ AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구.
263
  """
264
 
265
  # is_training is runtime-only and should default to False for inference/serving.
266
+ # is_training은 런타임 전용이며 추론/서빙 기본값은 False 임.
267
  #
268
  # IMPORTANT:
269
+ # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영.
270
  use_fast = bool(kwargs.pop("use_fast", False))
271
 
272
  kwargs.pop("trust_remote_code", None)
 
287
  @staticmethod
288
  def _ensure_list(images: Any) -> list[Any]:
289
  # Normalize scalar image input to a list for uniform processing.
290
+ # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용.
291
  if isinstance(images, (list, tuple)):
292
  return list(images)
293
  return [images]
 
295
  @staticmethod
296
  def _to_pil_rgb(x: Any):
297
  # Convert common image inputs into PIL RGB images.
298
+ # 일반적인 입력을 PIL RGB 이미지로 변환.
299
  from PIL import Image as PILImage
300
 
301
  if isinstance(x, PILImage.Image):
 
312
  ) -> dict[str, Any]:
313
  """
314
  Convert images into {"pixel_values": Tensor/ndarray}.
315
+ 이미지를 {"pixel_values": Tensor/ndarray}로 변환.
316
  """
317
  images = self._ensure_list(images)
318
 
319
  # Rebuild runtime if needed (e.g., right after deserialization).
320
+ # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성.
321
  if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
322
  self._build_runtime()
323
 
324
  # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
325
+ # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
326
  if self._timm_transform is not None:
327
  pv: list[torch.Tensor] = []
328
  for im in images:
 
335
  return self._format_return(pixel_values, return_tensors)
336
 
337
  # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
338
+ # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
339
  if self._torchvision_transform is not None:
340
  pv: list[torch.Tensor] = []
341
  for im in images:
 
348
  return self._format_return(pixel_values, return_tensors)
349
 
350
  # transformers delegate path: rely on official processor behavior.
351
+ # transformers 위임 경로: 공식 processor 동작을 그대로 사용.
352
  if self._delegate is None:
353
  raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
354
 
 
358
  def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
359
  """
360
  Format pixel_values according to return_tensors.
361
+ return_tensors에 맞춰 pixel_values 반환 포맷을 변환.
362
  """
363
  if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
364
  return {"pixel_values": pixel_values}
 
368
 
369
 
370
  # Register this processor for AutoImageProcessor resolution.
371
+ # AutoImageProcessor 해석을 위해 이 processor를 등록.
372
  if __name__ != "__main__":
373
  BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")
models/timm__densenet121.tv_in1k/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41040153bbc2066a084f36774b3ce20a45d9adf38ade5d33ae544c53d69350e6
3
  size 29293620
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0108b6737a776209aa44a8119bbb5034c69205bcd45dee8c5f0e3218eea1cf3a
3
  size 29293620
models/torchvision__densenet121/config.json CHANGED
@@ -24,7 +24,7 @@
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
- "created_at": "20260210_170738",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/torchvision__densenet121",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
 
24
  "num_labels": 3,
25
  "transformers_version": "5.1.0",
26
  "ds_provenance": {
27
+ "created_at": "20260212_202546",
28
  "repo_id": "dsaint31/bb_mlp_224",
29
  "subdir": "models/torchvision__densenet121",
30
  "wrapper_class": "BackboneWithMLPHeadForImageClassification",
models/torchvision__densenet121/ds_model.py CHANGED
@@ -94,14 +94,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
94
 
95
  def __init__(self, config: BackboneMLPHeadConfig):
96
  # PreTrainedModel expects a config object and stores it internally.
97
- # PreTrainedModel은 config 객체를 받아 내부에 저장하는 전제를 가집니다.
98
  super().__init__(config)
99
 
100
  # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
101
- # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않습니다.
102
  #
103
  # Note: Transformers may create configs with no args, but models are conventionally created with configs.
104
- # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성됩니다.
105
  if config.backbone_name_or_path is None:
106
  raise ValueError(
107
  "config.backbone_name_or_path is None. "
@@ -109,10 +109,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
109
  )
110
 
111
  # Fail-fast: training/inference requires a positive number of labels.
112
- # fail-fast: 학습/추론은 num_labels가 양수여야 합니다.
113
  #
114
  # Config may exist in a minimal form for internal serialization paths, but the model should not.
115
- # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 그러면 안 됩니다.
116
  if int(getattr(config, "num_labels", 0)) <= 0:
117
  raise ValueError(
118
  f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
@@ -120,17 +120,17 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
120
  )
121
 
122
  # Meta is a single source of truth for extraction and fine-tuning rules.
123
- # meta는 feature 추출 및 미세조정 규칙의 단일 기준입니다.
124
- # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
125
  # Prefer config.backbone_meta to keep Hub runtime self-contained.
126
  self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
127
 
128
  # Backbone skeleton is always created without pretrained weights.
129
- # backbone skeleton은 항상 pretrained weight 없이 생성합니다.
130
  self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
131
 
132
  # Head shape is driven by meta feat_dim and config.num_labels.
133
- # head shape은 meta의 feat_dim과 config.num_labels로 결정됩니다.
134
  self.classifier = MLPHead(
135
  in_dim=int(self._meta["feat_dim"]),
136
  num_labels=int(config.num_labels),
@@ -139,16 +139,20 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
139
  )
140
 
141
  # HF initialization hook, but we override init_weights to initialize head-only.
142
- # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화합니다.
143
  self.post_init()
144
 
145
  def init_weights(self):
146
  """
147
  Initialize only the head to avoid touching the backbone skeleton.
148
- backbone skeleton을 건드리지 않기 위해 head만 초기화합니다.
149
 
150
  HF's default init may traverse the entire module tree, which is undesirable here.
151
- HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서는 부적절합니다.
 
 
 
 
152
  """
153
  if getattr(self, "classifier", None) is not None:
154
  self.classifier.apply(self._init_weights)
@@ -160,7 +164,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
160
  # ----------------------------
161
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
162
  # Meta decides which loader path to use.
163
- # meta가 어떤 로더 경로를 사용할지 결정합니다.
164
  meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
165
  if meta is None:
166
  raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
@@ -174,14 +178,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
174
  return self._build_torchvision_densenet_skeleton(backbone_id)
175
 
176
  # For transformers backbones: build a random-weight skeleton from config only.
177
- # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성합니다.
178
  bb_cfg = AutoConfig.from_pretrained(backbone_id)
179
  return AutoModel.from_config(bb_cfg)
180
 
181
  @staticmethod
182
  def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
183
  # timm is an optional dependency and should be imported lazily.
184
- # timm은 옵션 의존성이므로 지연 import 합니다.
185
  try:
186
  import timm
187
  except Exception as e:
@@ -190,7 +194,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
190
  ) from e
191
 
192
  # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
193
- # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0)합니다.
194
  return timm.create_model(
195
  f"hf_hub:{hf_repo_id}",
196
  pretrained=False,
@@ -200,12 +204,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
200
  @staticmethod
201
  def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
202
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
203
- # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
204
  if model_id != "torchvision/densenet121":
205
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
206
 
207
  # Build structure only (weights=None) to avoid implicit pretrained loading.
208
- # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None)합니다.
209
  m = tv_models.densenet121(weights=None)
210
  return m
211
 
@@ -222,10 +226,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
222
  ):
223
  """
224
  Fresh-start only: inject pretrained backbone weights into the skeleton.
225
- fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입합니다.
226
 
227
  Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
228
- from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 절대 호출하면 안 됩니다.
229
  """
230
  bb = self.config.backbone_name_or_path
231
  meta = self._meta
@@ -240,7 +244,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
240
  return
241
 
242
  # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
243
- # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사합니다.
244
  ref = AutoModel.from_pretrained(
245
  bb,
246
  low_cpu_mem_usage=low_cpu_mem_usage,
@@ -248,18 +252,18 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
248
  )
249
 
250
  # strict=False is used to tolerate harmless key differences across minor versions.
251
- # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용합니다.
252
  self.backbone.load_state_dict(ref.state_dict(), strict=False)
253
  del ref
254
 
255
  @torch.no_grad()
256
  def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
257
  # timm must be present for timm backbones.
258
- # timm 백본에는 timm 설치가 필요합니다.
259
  import timm
260
 
261
  # Create a pretrained reference model and copy its weights strictly.
262
- # pretrained reference 모델을 만들고 가중치를 strict하게 복사합니다.
263
  ref = timm.create_model(
264
  f"hf_hub:{hf_repo_id}",
265
  pretrained=True,
@@ -272,12 +276,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
272
  @torch.no_grad()
273
  def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
274
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
275
- # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
276
  if model_id != "torchvision/densenet121":
277
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
278
 
279
  # Use torchvision's default pretrained weights for densenet121.
280
- # torchvision의 densenet121 기본 pretrained weights를 사용합니다.
281
  ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
282
 
283
  self.backbone.load_state_dict(ref.state_dict(), strict=True)
@@ -290,7 +294,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
290
  @staticmethod
291
  def _pool_or_gap(outputs) -> torch.Tensor:
292
  # Some transformers vision CNNs provide pooler_output explicitly.
293
- # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공합니다.
294
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
295
  x = outputs.pooler_output
296
  if x.dim() == 2:
@@ -300,7 +304,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
300
  raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
301
 
302
  # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
303
- # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용합니다.
304
  x = outputs.last_hidden_state
305
  if x.dim() == 4:
306
  return x.mean(dim=(2, 3))
@@ -312,29 +316,29 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
312
 
313
  def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
314
  # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
315
- # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적으로 유지되어야 합니다.
316
  rule = self._meta["feat_rule"]
317
 
318
  if rule == "cls":
319
  # ViT-style: use CLS token embedding from last_hidden_state.
320
- # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용합니다.
321
  return outputs.last_hidden_state[:, 0, :]
322
 
323
  if rule == "pool_or_mean":
324
  # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
325
- # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용합니다.
326
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
327
  return outputs.pooler_output
328
  return outputs.last_hidden_state.mean(dim=1)
329
 
330
  if rule == "pool_or_gap":
331
  # CNN-style: use pooler_output if present, else GAP over spatial dims.
332
- # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용합니다.
333
  return self._pool_or_gap(outputs)
334
 
335
  if rule == "timm_gap":
336
  # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
337
- # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 만듭니다.
338
  if not isinstance(outputs, torch.Tensor):
339
  raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
340
  if outputs.dim() != 4:
@@ -343,7 +347,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
343
 
344
  if rule == "torchvision_densenet_gap":
345
  # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
346
- # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요합니다.
347
  if not isinstance(outputs, torch.Tensor):
348
  raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
349
  if outputs.dim() != 4:
@@ -362,7 +366,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
362
  **kwargs,
363
  ):
364
  # Type decides the backbone forward path and output format.
365
- # type이 backbone forward 경로 및 출력 포맷을 결정합니다.
366
  t = self._meta["type"]
367
 
368
  if t == "timm_densenet":
@@ -394,7 +398,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
394
 
395
  else:
396
  # Transformers vision models are called with pixel_values and return ModelOutput.
397
- # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환합니다.
398
  outputs = self.backbone(
399
  pixel_values=pixel_values,
400
  output_attentions=output_attentions,
@@ -407,13 +411,13 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
407
  attentions = getattr(outputs, "attentions", None)
408
 
409
  # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
410
- # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환합니다.
411
  logits = self.classifier(feats)
412
 
413
  loss = None
414
  if labels is not None:
415
  # Cross entropy expects labels as class indices in [0, num_labels).
416
- # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대합니다.
417
  loss = F.cross_entropy(logits, labels)
418
 
419
  if not return_dict:
@@ -434,14 +438,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
434
  # ============================================================
435
  def _set_requires_grad(module: nn.Module, flag: bool):
436
  # Toggle requires_grad for all parameters in a module.
437
- # 모듈의 모든 파라미터에 대해 requires_grad를 토글합니다.
438
  for p in module.parameters():
439
  p.requires_grad = flag
440
 
441
 
442
  def set_bn_eval(module: nn.Module):
443
  # Put BatchNorm layers into eval mode to freeze running stats.
444
- # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정합니다.
445
  for m in module.modules():
446
  if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
447
  m.eval()
@@ -449,7 +453,7 @@ def set_bn_eval(module: nn.Module):
449
 
450
  def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
451
  # Stage1: freeze backbone and train only the head.
452
- # stage1: backbone을 freeze하고 head만 학습합니다.
453
  _set_requires_grad(model.backbone, False)
454
  _set_requires_grad(model.classifier, True)
455
 
@@ -460,7 +464,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
460
 
461
  def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
462
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
463
- # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있습니다.
464
  model.train()
465
  meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
466
  if keep_bn_eval and meta.get("has_bn", False):
@@ -469,7 +473,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
469
 
470
  def trainable_summary(model: nn.Module):
471
  # Print a compact summary of trainable parameters.
472
- # 학습 가능 파라미터 요약을 간단히 출력합니다.
473
  total = sum(p.numel() for p in model.parameters())
474
  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
475
  ratio = trainable / total if total > 0 else 0.0
@@ -483,7 +487,7 @@ def unfreeze_last_stage(
483
  keep_bn_eval: bool = True,
484
  ):
485
  # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
486
- # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 ���현합니다.
487
  freeze_backbone(model, freeze_bn=keep_bn_eval)
488
 
489
  n = int(last_n)
@@ -498,7 +502,7 @@ def unfreeze_last_stage(
498
 
499
  if bb_type == "vit":
500
  # ViT blocks live under backbone.encoder.layer in the transformers implementation.
501
- # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 있습니다.
502
  blocks = list(model.backbone.encoder.layer)
503
  for blk in blocks[-n:]:
504
  _set_requires_grad(blk, True)
@@ -506,7 +510,7 @@ def unfreeze_last_stage(
506
 
507
  if bb_type == "swin":
508
  # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
509
- # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze 합니다.
510
  stages = list(model.backbone.encoder.layers)
511
  blocks: List[nn.Module] = []
512
  for st in stages:
@@ -517,7 +521,7 @@ def unfreeze_last_stage(
517
 
518
  if bb_type == "resnet":
519
  # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
520
- # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze 합니다.
521
  bb = model.backbone
522
  for name in ("layer1", "layer2", "layer3", "layer4"):
523
  if not hasattr(bb, name):
@@ -538,7 +542,7 @@ def unfreeze_last_stage(
538
 
539
  if bb_type == "efficientnet":
540
  # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
541
- # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze 합니다.
542
  bb = model.backbone
543
  if not hasattr(bb, "features"):
544
  raise RuntimeError("Unexpected EfficientNet structure: missing features")
@@ -556,7 +560,7 @@ def unfreeze_last_stage(
556
 
557
  if bb_type in ("timm_densenet", "torchvision_densenet"):
558
  # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
559
- # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze 합니다.
560
  bb = model.backbone
561
  if not hasattr(bb, "features"):
562
  raise RuntimeError("Unexpected DenseNet: missing features")
@@ -575,7 +579,7 @@ def unfreeze_last_stage(
575
 
576
  def _denselayers(db: nn.Module) -> List[nn.Module]:
577
  # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
578
- # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환합니다.
579
  return list(db.children())
580
 
581
  blocks: List[nn.Module] = []
@@ -600,5 +604,5 @@ def unfreeze_last_stage(
600
  # register
601
  # -------------------------
602
  # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
603
- # from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록합니다.
604
  BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")
 
94
 
95
  def __init__(self, config: BackboneMLPHeadConfig):
96
  # PreTrainedModel expects a config object and stores it internally.
97
+ # PreTrainedModel은 config 객체를 받아 내부에 저장함.
98
  super().__init__(config)
99
 
100
  # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
101
+ # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않음 - fast fail.
102
  #
103
  # Note: Transformers may create configs with no args, but models are conventionally created with configs.
104
+ # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성.
105
  if config.backbone_name_or_path is None:
106
  raise ValueError(
107
  "config.backbone_name_or_path is None. "
 
109
  )
110
 
111
  # Fail-fast: training/inference requires a positive number of labels.
112
+ # fail-fast: 학습/추론은 num_labels가 양수여야 함.
113
  #
114
  # Config may exist in a minimal form for internal serialization paths, but the model should not.
115
+ # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 해당 없음.
116
  if int(getattr(config, "num_labels", 0)) <= 0:
117
  raise ValueError(
118
  f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
 
120
  )
121
 
122
  # Meta is a single source of truth for extraction and fine-tuning rules.
123
+ # meta는 feature 추출 및 미세조정 규칙의 단일 기준.
124
+ # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
125
  # Prefer config.backbone_meta to keep Hub runtime self-contained.
126
  self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
127
 
128
  # Backbone skeleton is always created without pretrained weights.
129
+ # backbone skeleton은 항상 pretrained weight 없이 생성.
130
  self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
131
 
132
  # Head shape is driven by meta feat_dim and config.num_labels.
133
+ # head shape은 meta의 feat_dim과 config.num_labels로 결정.
134
  self.classifier = MLPHead(
135
  in_dim=int(self._meta["feat_dim"]),
136
  num_labels=int(config.num_labels),
 
139
  )
140
 
141
  # HF initialization hook, but we override init_weights to initialize head-only.
142
+ # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화하도록 변경.
143
  self.post_init()
144
 
145
  def init_weights(self):
146
  """
147
  Initialize only the head to avoid touching the backbone skeleton.
148
+ backbone skeleton을 건드리지 않기 위해 head만 초기화.
149
 
150
  HF's default init may traverse the entire module tree, which is undesirable here.
151
+ HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서 그대로 사용하기 부적절.
152
+
153
+ 초기 설계에서 __init__ 내부에서 backbone의 가중치 로드를 수행함(편리를 위해).
154
+ 이 경우, HF의 post_init()으로 인해 해당 로드가 취소되는 경우가 존재(timm, torchvision 등의 백본).
155
+ 때문에 이를 오버라이드 하여 classifier만 초기화 하도록 변경함.
156
  """
157
  if getattr(self, "classifier", None) is not None:
158
  self.classifier.apply(self._init_weights)
 
164
  # ----------------------------
165
  def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
166
  # Meta decides which loader path to use.
167
+ # meta가 어떤 로더 경로를 사용할지 결정.
168
  meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
169
  if meta is None:
170
  raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
 
178
  return self._build_torchvision_densenet_skeleton(backbone_id)
179
 
180
  # For transformers backbones: build a random-weight skeleton from config only.
181
+ # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성.
182
  bb_cfg = AutoConfig.from_pretrained(backbone_id)
183
  return AutoModel.from_config(bb_cfg)
184
 
185
  @staticmethod
186
  def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
187
  # timm is an optional dependency and should be imported lazily.
188
+ # timm은 옵션 의존성이므로 지연 import 수행.
189
  try:
190
  import timm
191
  except Exception as e:
 
194
  ) from e
195
 
196
  # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
197
+ # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0).
198
  return timm.create_model(
199
  f"hf_hub:{hf_repo_id}",
200
  pretrained=False,
 
204
  @staticmethod
205
  def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
206
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
207
+ # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원.
208
  if model_id != "torchvision/densenet121":
209
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
210
 
211
  # Build structure only (weights=None) to avoid implicit pretrained loading.
212
+ # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None).
213
  m = tv_models.densenet121(weights=None)
214
  return m
215
 
 
226
  ):
227
  """
228
  Fresh-start only: inject pretrained backbone weights into the skeleton.
229
+ fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입.
230
 
231
  Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
232
+ from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 주의할 것.
233
  """
234
  bb = self.config.backbone_name_or_path
235
  meta = self._meta
 
244
  return
245
 
246
  # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
247
+ # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사.
248
  ref = AutoModel.from_pretrained(
249
  bb,
250
  low_cpu_mem_usage=low_cpu_mem_usage,
 
252
  )
253
 
254
  # strict=False is used to tolerate harmless key differences across minor versions.
255
+ # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용.
256
  self.backbone.load_state_dict(ref.state_dict(), strict=False)
257
  del ref
258
 
259
  @torch.no_grad()
260
  def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
261
  # timm must be present for timm backbones.
262
+ # timm 백본에��� timm 설치가 필요.
263
  import timm
264
 
265
  # Create a pretrained reference model and copy its weights strictly.
266
+ # pretrained reference 모델을 만들고 가중치를 strict하게 복사.
267
  ref = timm.create_model(
268
  f"hf_hub:{hf_repo_id}",
269
  pretrained=True,
 
276
  @torch.no_grad()
277
  def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
278
  # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
279
+ # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 지원.
280
  if model_id != "torchvision/densenet121":
281
  raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
282
 
283
  # Use torchvision's default pretrained weights for densenet121.
284
+ # torchvision의 densenet121 기본 pretrained weights를 사용.
285
  ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
286
 
287
  self.backbone.load_state_dict(ref.state_dict(), strict=True)
 
294
  @staticmethod
295
  def _pool_or_gap(outputs) -> torch.Tensor:
296
  # Some transformers vision CNNs provide pooler_output explicitly.
297
+ # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공.
298
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
299
  x = outputs.pooler_output
300
  if x.dim() == 2:
 
304
  raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
305
 
306
  # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
307
+ # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용.
308
  x = outputs.last_hidden_state
309
  if x.dim() == 4:
310
  return x.mean(dim=(2, 3))
 
316
 
317
  def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
318
  # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
319
+ # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적 동작을 위해 제한된 모델만 사용.
320
  rule = self._meta["feat_rule"]
321
 
322
  if rule == "cls":
323
  # ViT-style: use CLS token embedding from last_hidden_state.
324
+ # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용.
325
  return outputs.last_hidden_state[:, 0, :]
326
 
327
  if rule == "pool_or_mean":
328
  # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
329
+ # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용.
330
  if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
331
  return outputs.pooler_output
332
  return outputs.last_hidden_state.mean(dim=1)
333
 
334
  if rule == "pool_or_gap":
335
  # CNN-style: use pooler_output if present, else GAP over spatial dims.
336
+ # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용.
337
  return self._pool_or_gap(outputs)
338
 
339
  if rule == "timm_gap":
340
  # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
341
+ # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 변환.
342
  if not isinstance(outputs, torch.Tensor):
343
  raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
344
  if outputs.dim() != 4:
 
347
 
348
  if rule == "torchvision_densenet_gap":
349
  # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
350
+ # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요.
351
  if not isinstance(outputs, torch.Tensor):
352
  raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
353
  if outputs.dim() != 4:
 
366
  **kwargs,
367
  ):
368
  # Type decides the backbone forward path and output format.
369
+ # type이 backbone forward 경로 및 출력 포맷을 결정.
370
  t = self._meta["type"]
371
 
372
  if t == "timm_densenet":
 
398
 
399
  else:
400
  # Transformers vision models are called with pixel_values and return ModelOutput.
401
+ # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환.
402
  outputs = self.backbone(
403
  pixel_values=pixel_values,
404
  output_attentions=output_attentions,
 
411
  attentions = getattr(outputs, "attentions", None)
412
 
413
  # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
414
+ # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환.
415
  logits = self.classifier(feats)
416
 
417
  loss = None
418
  if labels is not None:
419
  # Cross entropy expects labels as class indices in [0, num_labels).
420
+ # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대함.
421
  loss = F.cross_entropy(logits, labels)
422
 
423
  if not return_dict:
 
438
  # ============================================================
439
  def _set_requires_grad(module: nn.Module, flag: bool):
440
  # Toggle requires_grad for all parameters in a module.
441
+ # 모듈의 모든 파라미터에 대해 requires_grad를 토글.
442
  for p in module.parameters():
443
  p.requires_grad = flag
444
 
445
 
446
  def set_bn_eval(module: nn.Module):
447
  # Put BatchNorm layers into eval mode to freeze running stats.
448
+ # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정.
449
  for m in module.modules():
450
  if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
451
  m.eval()
 
453
 
454
  def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
455
  # Stage1: freeze backbone and train only the head.
456
+ # stage1: backbone을 freeze하고 head만 학습.
457
  _set_requires_grad(model.backbone, False)
458
  _set_requires_grad(model.classifier, True)
459
 
 
464
 
465
  def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
466
  # Stage2: train mode, optionally keeping BN layers in eval for stability.
467
+ # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있음. (buffer 등을 유지하기 위해)
468
  model.train()
469
  meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
470
  if keep_bn_eval and meta.get("has_bn", False):
 
473
 
474
  def trainable_summary(model: nn.Module):
475
  # Print a compact summary of trainable parameters.
476
+ # 학습 가능 파라미터 요약을 간단히 출력.
477
  total = sum(p.numel() for p in model.parameters())
478
  trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
479
  ratio = trainable / total if total > 0 else 0.0
 
487
  keep_bn_eval: bool = True,
488
  ):
489
  # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
490
+ # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 구현.
491
  freeze_backbone(model, freeze_bn=keep_bn_eval)
492
 
493
  n = int(last_n)
 
502
 
503
  if bb_type == "vit":
504
  # ViT blocks live under backbone.encoder.layer in the transformers implementation.
505
+ # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 존재함.
506
  blocks = list(model.backbone.encoder.layer)
507
  for blk in blocks[-n:]:
508
  _set_requires_grad(blk, True)
 
510
 
511
  if bb_type == "swin":
512
  # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
513
+ # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze.
514
  stages = list(model.backbone.encoder.layers)
515
  blocks: List[nn.Module] = []
516
  for st in stages:
 
521
 
522
  if bb_type == "resnet":
523
  # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
524
+ # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze.
525
  bb = model.backbone
526
  for name in ("layer1", "layer2", "layer3", "layer4"):
527
  if not hasattr(bb, name):
 
542
 
543
  if bb_type == "efficientnet":
544
  # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
545
+ # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze.
546
  bb = model.backbone
547
  if not hasattr(bb, "features"):
548
  raise RuntimeError("Unexpected EfficientNet structure: missing features")
 
560
 
561
  if bb_type in ("timm_densenet", "torchvision_densenet"):
562
  # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
563
+ # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze.
564
  bb = model.backbone
565
  if not hasattr(bb, "features"):
566
  raise RuntimeError("Unexpected DenseNet: missing features")
 
579
 
580
  def _denselayers(db: nn.Module) -> List[nn.Module]:
581
  # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
582
+ # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환.
583
  return list(db.children())
584
 
585
  blocks: List[nn.Module] = []
 
604
  # register
605
  # -------------------------
606
  # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
607
+ # from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록.
608
  BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")
models/torchvision__densenet121/ds_proc.py CHANGED
@@ -4,8 +4,8 @@
4
  # src/ds_proc.py
5
 
6
  # ============================================================
7
- # (4) ImageProcessor (AutoImageProcessor integration)
8
- # (4) ImageProcessor (AutoImageProcessor 연동)
9
  # ============================================================
10
 
11
  from typing import Any
@@ -27,41 +27,38 @@ except ImportError:
27
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
28
  """
29
  This processor performs image preprocessing and outputs {"pixel_values": ...}.
30
- 이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 출력합니다.
31
 
32
  Key requirements:
33
  핵심 요구사항:
34
 
35
  1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
36
- save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 합니다.
37
-
38
  2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
39
- 런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됩니다.
40
-
41
  3) Runtime objects are rebuilt at init/load time based on backbone meta.
42
- 런타임 객체는 backbone meta에 따라 init/load 시점에 재구성됩니다.
43
-
44
  4) For reproducibility, use_fast must be explicitly persisted and honored on load.
45
- 재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 합니다.
46
  """
47
 
48
  # HF vision models conventionally expect "pixel_values" as the primary input key.
49
- # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대합니다.
50
  model_input_names = ["pixel_values"]
51
 
52
  def __init__(
53
  self,
54
  backbone_name_or_path: BackboneID,
55
- is_training: bool = False,
56
  use_fast: bool = False,
57
  **kwargs,
58
  ):
59
  # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
60
- # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리합니다.
61
  super().__init__(**kwargs)
62
 
63
  # Enforce whitelist via BACKBONE_META to keep behavior stable.
64
- # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제합니다.
65
  if backbone_name_or_path not in BACKBONE_META:
66
  raise ValueError(
67
  f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
@@ -69,23 +66,23 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
69
  )
70
 
71
  # Serializable fields only: these should appear in preprocessor_config.json.
72
- # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 합니다.
73
  self.backbone_name_or_path = backbone_name_or_path
74
  self.is_training = bool(is_training)
75
 
76
  # Reproducibility switch for transformers processors.
77
- # transformers processor의 fast/slow 선택을 재현 가능하게 고정합니다.
78
  self.use_fast = bool(use_fast)
79
 
80
  # Runtime-only fields: must never be serialized.
81
- # 런타임 전용 필드: 절대 직렬화되면 안 됩니다.
82
  self._meta = None
83
- self._delegate = None
84
- self._timm_transform = None
85
  self._torchvision_transform = None
86
 
87
  # Build runtime objects according to backbone type.
88
- # backbone type에 따라 런타임 객체를 구성합니다.
89
  self._build_runtime()
90
 
91
  # ============================================================
@@ -95,13 +92,13 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
95
  def _build_runtime(self):
96
  """
97
  Build runtime delegate/transform based on BACKBONE_META["type"].
98
- BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성합니다.
99
  """
100
  meta = BACKBONE_META[self.backbone_name_or_path]
101
  self._meta = meta
102
 
103
  # Always reset runtime fields before rebuilding.
104
- # 재구성 전 런타임 필드는 항상 초기화합니다.
105
  self._delegate = None
106
  self._timm_transform = None
107
  self._torchvision_transform = None
@@ -110,7 +107,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
110
 
111
  if t == "timm_densenet":
112
  # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
113
- # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용합니다.
114
  self._timm_transform = self._build_timm_transform(
115
  backbone_id=self.backbone_name_or_path,
116
  is_training=self.is_training,
@@ -119,17 +116,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
119
 
120
  if t == "torchvision_densenet":
121
  # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
122
- # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요합니다.
123
  self._torchvision_transform = self._build_torchvision_densenet_transform(
124
  is_training=self.is_training
125
  )
126
  return
127
 
128
  # Default: transformers backbone delegates to its official AutoImageProcessor.
129
- # 기본: transformers 백본은 공식 AutoImageProcessor에 위임합니다.
130
  #
131
  # IMPORTANT:
132
- # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달합니다.
133
  self._delegate = AutoImageProcessor.from_pretrained(
134
  self.backbone_name_or_path,
135
  use_fast=self.use_fast,
@@ -140,7 +137,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
140
  def _build_timm_transform(*, backbone_id: str, is_training: bool):
141
  """
142
  Create timm transform without storing non-serializable objects in config.
143
- 비직렬화 객체를 config에 저장하지 않고 timm transform을 생성합니다.
144
  """
145
  try:
146
  import timm
@@ -151,20 +148,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
151
  ) from e
152
 
153
  # We only need model metadata to resolve data config, so pretrained=False is preferred.
154
- # data config 추출만 필요하므로 pretrained=False를 우선 사용합니다.
155
  m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
156
  dc = resolve_model_data_config(m)
157
 
158
  # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
159
- # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환합니다.
160
- tfm = create_transform(**dc, is_training=is_training)
161
  return tfm
162
 
163
  @staticmethod
164
  def _build_torchvision_densenet_transform(*, is_training: bool):
165
  """
166
  Build torchvision preprocessing for DenseNet-121 (224 pipeline).
167
- DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성합니다.
168
  """
169
  try:
170
  from torchvision import transforms
@@ -174,28 +171,29 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
174
  ) from e
175
 
176
  # These are the standard ImageNet normalization stats used by torchvision weights.
177
- # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계입니다.
178
  mean = (0.485, 0.456, 0.406)
179
- std = (0.229, 0.224, 0.225)
180
 
181
  # Training pipeline typically uses RandomResizedCrop and horizontal flip.
182
- # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용합니다.
183
  if is_training:
184
  return transforms.Compose(
185
  [
186
- transforms.RandomResizedCrop(224),
187
- transforms.RandomHorizontalFlip(p=0.5),
 
188
  transforms.ToTensor(),
189
  transforms.Normalize(mean=mean, std=std),
190
  ]
191
  )
192
 
193
  # Inference pipeline typically uses Resize(256) + CenterCrop(224).
194
- # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용합니다.
195
  return transforms.Compose(
196
  [
197
  transforms.Resize(256),
198
- transforms.CenterCrop(224),
199
  transforms.ToTensor(),
200
  transforms.Normalize(mean=mean, std=std),
201
  ]
@@ -208,24 +206,24 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
208
  def to_dict(self) -> dict[str, Any]:
209
  """
210
  Return a JSON-serializable dict for preprocessor_config.json.
211
- preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환합니다.
212
 
213
  Important: do not leak runtime objects into the serialized dict.
214
- 중요: 런타임 객체가 직렬화 dict에 섞이면 안 됩니다.
215
  """
216
  # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
217
  # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
218
  d = super().to_dict()
219
 
220
  # Force minimal stable fields for long-term compatibility.
221
- # 장기 호환을 위해 최소 안정 필드를 강제합니다.
222
- d["image_processor_type"] = self.__class__.__name__
223
  d["backbone_name_or_path"] = self.backbone_name_or_path
224
  d["is_training"] = self.is_training
225
- d["use_fast"] = self.use_fast
226
 
227
  # Remove any runtime-only fields defensively.
228
- # 런타임 전용 필드는 보수적으로 제거합니다.
229
  for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
230
  d.pop(key, None)
231
 
@@ -235,14 +233,14 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
235
  def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
236
  """
237
  Standard load path used by BaseImageProcessor / AutoImageProcessor.
238
- BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로입니다.
239
  """
240
  backbone = image_processor_dict.get("backbone_name_or_path", None)
241
  if backbone is None:
242
  raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
243
 
244
  is_training = bool(image_processor_dict.get("is_training", False))
245
- use_fast = bool(image_processor_dict.get("use_fast", False))
246
 
247
  return cls(
248
  backbone_name_or_path=backbone,
@@ -255,20 +253,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
255
  def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
256
  """
257
  Fallback path if AutoImageProcessor calls class.from_pretrained directly.
258
- AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 경로입니다.
259
 
260
  Strategy:
261
  전략:
262
 
263
  - Read config.json via AutoConfig and recover backbone_name_or_path.
264
- AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구합니다.
265
  """
266
 
267
  # is_training is runtime-only and should default to False for inference/serving.
268
- # is_training은 런타임 전용이며 추론/서빙 기본값은 False 맞습니다.
269
  #
270
  # IMPORTANT:
271
- # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영합니다.
272
  use_fast = bool(kwargs.pop("use_fast", False))
273
 
274
  kwargs.pop("trust_remote_code", None)
@@ -289,7 +287,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
289
  @staticmethod
290
  def _ensure_list(images: Any) -> list[Any]:
291
  # Normalize scalar image input to a list for uniform processing.
292
- # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용합니다.
293
  if isinstance(images, (list, tuple)):
294
  return list(images)
295
  return [images]
@@ -297,7 +295,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
297
  @staticmethod
298
  def _to_pil_rgb(x: Any):
299
  # Convert common image inputs into PIL RGB images.
300
- # 일반적인 입력을 PIL RGB 이미지로 변환합니다.
301
  from PIL import Image as PILImage
302
 
303
  if isinstance(x, PILImage.Image):
@@ -314,17 +312,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
314
  ) -> dict[str, Any]:
315
  """
316
  Convert images into {"pixel_values": Tensor/ndarray}.
317
- 이미지를 {"pixel_values": Tensor/ndarray}로 변환합니다.
318
  """
319
  images = self._ensure_list(images)
320
 
321
  # Rebuild runtime if needed (e.g., right after deserialization).
322
- # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성합니다.
323
  if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
324
  self._build_runtime()
325
 
326
  # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
327
- # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
328
  if self._timm_transform is not None:
329
  pv: list[torch.Tensor] = []
330
  for im in images:
@@ -337,7 +335,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
337
  return self._format_return(pixel_values, return_tensors)
338
 
339
  # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
340
- # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
341
  if self._torchvision_transform is not None:
342
  pv: list[torch.Tensor] = []
343
  for im in images:
@@ -350,7 +348,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
350
  return self._format_return(pixel_values, return_tensors)
351
 
352
  # transformers delegate path: rely on official processor behavior.
353
- # transformers 위임 경로: 공식 processor 동작을 그대로 사용합니다.
354
  if self._delegate is None:
355
  raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
356
 
@@ -360,7 +358,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
360
  def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
361
  """
362
  Format pixel_values according to return_tensors.
363
- return_tensors에 맞춰 pixel_values 반환 포맷을 맞춥니다.
364
  """
365
  if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
366
  return {"pixel_values": pixel_values}
@@ -370,6 +368,6 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
370
 
371
 
372
  # Register this processor for AutoImageProcessor resolution.
373
- # AutoImageProcessor 해석을 위해 이 processor를 등록합니다.
374
  if __name__ != "__main__":
375
  BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")
 
4
  # src/ds_proc.py
5
 
6
  # ============================================================
7
+ # ImageProcessor (AutoImageProcessor integration)
8
+ # ImageProcessor (AutoImageProcessor 연동)
9
  # ============================================================
10
 
11
  from typing import Any
 
27
  class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
28
  """
29
  This processor performs image preprocessing and outputs {"pixel_values": ...}.
30
+ 이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 반환함.
31
 
32
  Key requirements:
33
  핵심 요구사항:
34
 
35
  1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
36
+ save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 함.
 
37
  2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
38
+ 런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됨.
 
39
  3) Runtime objects are rebuilt at init/load time based on backbone meta.
40
+ 런타임 객체는 backbone meta에 따라 init/load 시점에 재구성.
 
41
  4) For reproducibility, use_fast must be explicitly persisted and honored on load.
42
+ 재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 함.
43
  """
44
 
45
  # HF vision models conventionally expect "pixel_values" as the primary input key.
46
+ # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대.
47
  model_input_names = ["pixel_values"]
48
 
49
  def __init__(
50
  self,
51
  backbone_name_or_path: BackboneID,
52
+ is_training: bool = False, # timm 에서 data augmentation 용.
53
  use_fast: bool = False,
54
  **kwargs,
55
  ):
56
  # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
57
+ # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리.
58
  super().__init__(**kwargs)
59
 
60
  # Enforce whitelist via BACKBONE_META to keep behavior stable.
61
+ # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제. - fast fail
62
  if backbone_name_or_path not in BACKBONE_META:
63
  raise ValueError(
64
  f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
 
66
  )
67
 
68
  # Serializable fields only: these should appear in preprocessor_config.json.
69
+ # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야
70
  self.backbone_name_or_path = backbone_name_or_path
71
  self.is_training = bool(is_training)
72
 
73
  # Reproducibility switch for transformers processors.
74
+ # transformers processor의 fast/slow 선택을 재현 가능하게 고정.
75
  self.use_fast = bool(use_fast)
76
 
77
  # Runtime-only fields: must never be serialized.
78
+ # 런타임 전용 필드: 절대 직렬화되면 안 됨.
79
  self._meta = None
80
+ self._delegate = None
81
+ self._timm_transform = None
82
  self._torchvision_transform = None
83
 
84
  # Build runtime objects according to backbone type.
85
+ # backbone type에 따라 런타임 객체를 구성.
86
  self._build_runtime()
87
 
88
  # ============================================================
 
92
  def _build_runtime(self):
93
  """
94
  Build runtime delegate/transform based on BACKBONE_META["type"].
95
+ BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성.
96
  """
97
  meta = BACKBONE_META[self.backbone_name_or_path]
98
  self._meta = meta
99
 
100
  # Always reset runtime fields before rebuilding.
101
+ # 재구성 전 런타임 필드는 항상 초기화.
102
  self._delegate = None
103
  self._timm_transform = None
104
  self._torchvision_transform = None
 
107
 
108
  if t == "timm_densenet":
109
  # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
110
+ # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용.
111
  self._timm_transform = self._build_timm_transform(
112
  backbone_id=self.backbone_name_or_path,
113
  is_training=self.is_training,
 
116
 
117
  if t == "torchvision_densenet":
118
  # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
119
+ # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요.
120
  self._torchvision_transform = self._build_torchvision_densenet_transform(
121
  is_training=self.is_training
122
  )
123
  return
124
 
125
  # Default: transformers backbone delegates to its official AutoImageProcessor.
126
+ # 기본: transformers 백본은 공식 AutoImageProcessor에 위임.
127
  #
128
  # IMPORTANT:
129
+ # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달.
130
  self._delegate = AutoImageProcessor.from_pretrained(
131
  self.backbone_name_or_path,
132
  use_fast=self.use_fast,
 
137
  def _build_timm_transform(*, backbone_id: str, is_training: bool):
138
  """
139
  Create timm transform without storing non-serializable objects in config.
140
+ 비직렬화 객체를 config에 저장하지 않고 timm transform을 생성.
141
  """
142
  try:
143
  import timm
 
148
  ) from e
149
 
150
  # We only need model metadata to resolve data config, so pretrained=False is preferred.
151
+ # data config 추출만 필요하므로 pretrained=False를 우선 사용.
152
  m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
153
  dc = resolve_model_data_config(m)
154
 
155
  # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
156
+ # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환.
157
+ tfm = create_transform(**dc, is_training=is_training) # is_training :Data Aug.
158
  return tfm
159
 
160
  @staticmethod
161
  def _build_torchvision_densenet_transform(*, is_training: bool):
162
  """
163
  Build torchvision preprocessing for DenseNet-121 (224 pipeline).
164
+ DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성.
165
  """
166
  try:
167
  from torchvision import transforms
 
171
  ) from e
172
 
173
  # These are the standard ImageNet normalization stats used by torchvision weights.
174
+ # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계.
175
  mean = (0.485, 0.456, 0.406)
176
+ std = (0.229, 0.224, 0.225)
177
 
178
  # Training pipeline typically uses RandomResizedCrop and horizontal flip.
179
+ # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용.
180
  if is_training:
181
  return transforms.Compose(
182
  [
183
+ # transforms.RandomResizedCrop(224),
184
+ # transforms.RandomHorizontalFlip(p=0.5),
185
+ transforms.Resize(224),
186
  transforms.ToTensor(),
187
  transforms.Normalize(mean=mean, std=std),
188
  ]
189
  )
190
 
191
  # Inference pipeline typically uses Resize(256) + CenterCrop(224).
192
+ # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용.
193
  return transforms.Compose(
194
  [
195
  transforms.Resize(256),
196
+ # transforms.CenterCrop(224),
197
  transforms.ToTensor(),
198
  transforms.Normalize(mean=mean, std=std),
199
  ]
 
206
  def to_dict(self) -> dict[str, Any]:
207
  """
208
  Return a JSON-serializable dict for preprocessor_config.json.
209
+ preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환.
210
 
211
  Important: do not leak runtime objects into the serialized dict.
212
+ 중요: 런타임 객체가 직렬화 dict에 섞이면 안 됨.
213
  """
214
  # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
215
  # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
216
  d = super().to_dict()
217
 
218
  # Force minimal stable fields for long-term compatibility.
219
+ # 장기 호환을 위해 최소 안정 필드를 강제로 지정.
220
+ d["image_processor_type"] = self.__class__.__name__
221
  d["backbone_name_or_path"] = self.backbone_name_or_path
222
  d["is_training"] = self.is_training
223
+ d["use_fast"] = self.use_fast
224
 
225
  # Remove any runtime-only fields defensively.
226
+ # 런타임 전용 필드는 보수적으로 제거.
227
  for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
228
  d.pop(key, None)
229
 
 
233
  def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
234
  """
235
  Standard load path used by BaseImageProcessor / AutoImageProcessor.
236
+ BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로임.
237
  """
238
  backbone = image_processor_dict.get("backbone_name_or_path", None)
239
  if backbone is None:
240
  raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
241
 
242
  is_training = bool(image_processor_dict.get("is_training", False))
243
+ use_fast = bool(image_processor_dict.get("use_fast", False))
244
 
245
  return cls(
246
  backbone_name_or_path=backbone,
 
253
  def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
254
  """
255
  Fallback path if AutoImageProcessor calls class.from_pretrained directly.
256
+ AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 메서드.
257
 
258
  Strategy:
259
  전략:
260
 
261
  - Read config.json via AutoConfig and recover backbone_name_or_path.
262
+ AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구.
263
  """
264
 
265
  # is_training is runtime-only and should default to False for inference/serving.
266
+ # is_training은 런타임 전용이며 추론/서빙 기본값은 False 임.
267
  #
268
  # IMPORTANT:
269
+ # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영.
270
  use_fast = bool(kwargs.pop("use_fast", False))
271
 
272
  kwargs.pop("trust_remote_code", None)
 
287
  @staticmethod
288
  def _ensure_list(images: Any) -> list[Any]:
289
  # Normalize scalar image input to a list for uniform processing.
290
+ # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용.
291
  if isinstance(images, (list, tuple)):
292
  return list(images)
293
  return [images]
 
295
  @staticmethod
296
  def _to_pil_rgb(x: Any):
297
  # Convert common image inputs into PIL RGB images.
298
+ # 일반적인 입력을 PIL RGB 이미지로 변환.
299
  from PIL import Image as PILImage
300
 
301
  if isinstance(x, PILImage.Image):
 
312
  ) -> dict[str, Any]:
313
  """
314
  Convert images into {"pixel_values": Tensor/ndarray}.
315
+ 이미지를 {"pixel_values": Tensor/ndarray}로 변환.
316
  """
317
  images = self._ensure_list(images)
318
 
319
  # Rebuild runtime if needed (e.g., right after deserialization).
320
+ # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성.
321
  if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
322
  self._build_runtime()
323
 
324
  # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
325
+ # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
326
  if self._timm_transform is not None:
327
  pv: list[torch.Tensor] = []
328
  for im in images:
 
335
  return self._format_return(pixel_values, return_tensors)
336
 
337
  # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
338
+ # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
339
  if self._torchvision_transform is not None:
340
  pv: list[torch.Tensor] = []
341
  for im in images:
 
348
  return self._format_return(pixel_values, return_tensors)
349
 
350
  # transformers delegate path: rely on official processor behavior.
351
+ # transformers 위임 경로: 공식 processor 동작을 그대로 사용.
352
  if self._delegate is None:
353
  raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
354
 
 
358
  def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
359
  """
360
  Format pixel_values according to return_tensors.
361
+ return_tensors에 맞춰 pixel_values 반환 포맷을 변환.
362
  """
363
  if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
364
  return {"pixel_values": pixel_values}
 
368
 
369
 
370
  # Register this processor for AutoImageProcessor resolution.
371
+ # AutoImageProcessor 해석을 위해 이 processor를 등록.
372
  if __name__ != "__main__":
373
  BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")
models/torchvision__densenet121/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:717710e4f328713f7cfb85f1d12c5c1b24ff290c7afea055ff32dd5ad0d0bcc3
3
  size 33394052
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3d373da2c45fdd83a13526586f67a2ccdc791505d1d5d26f878d6cb2a982e87
3
  size 33394052