dsaint31 commited on 19 days ago

Commit

200cb5d

verified ·

1 Parent(s): e115a15

Add/Update backbone checkpoints (count=6)

Browse files

Files changed (27) hide show

ds_model.py +55 -51
ds_proc.py +57 -59
manifest_20260212_202546.json +53 -0
models/google__efficientnet-b0/config.json +1 -1
models/google__efficientnet-b0/ds_model.py +55 -51
models/google__efficientnet-b0/ds_proc.py +57 -59
models/google__efficientnet-b0/model.safetensors +1 -1
models/google__vit-base-patch16-224/config.json +1 -1
models/google__vit-base-patch16-224/ds_model.py +55 -51
models/google__vit-base-patch16-224/ds_proc.py +57 -59
models/google__vit-base-patch16-224/model.safetensors +1 -1
models/microsoft__resnet-50/config.json +1 -1
models/microsoft__resnet-50/ds_model.py +55 -51
models/microsoft__resnet-50/ds_proc.py +57 -59
models/microsoft__resnet-50/model.safetensors +1 -1
models/microsoft__swin-tiny-patch4-window7-224/config.json +1 -1
models/microsoft__swin-tiny-patch4-window7-224/ds_model.py +55 -51
models/microsoft__swin-tiny-patch4-window7-224/ds_proc.py +57 -59
models/microsoft__swin-tiny-patch4-window7-224/model.safetensors +1 -1
models/timm__densenet121.tv_in1k/config.json +1 -1
models/timm__densenet121.tv_in1k/ds_model.py +55 -51
models/timm__densenet121.tv_in1k/ds_proc.py +57 -59
models/timm__densenet121.tv_in1k/model.safetensors +1 -1
models/torchvision__densenet121/config.json +1 -1
models/torchvision__densenet121/ds_model.py +55 -51
models/torchvision__densenet121/ds_proc.py +57 -59
models/torchvision__densenet121/model.safetensors +1 -1

ds_model.py CHANGED Viewed

@@ -94,14 +94,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     def __init__(self, config: BackboneMLPHeadConfig):
         # PreTrainedModel expects a config object and stores it internally.
-        # PreTrainedModel은 config 객체를 받아 내부에 저장하는 전제를 가집니다.
         super().__init__(config)
         # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
-        # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않습니다.
         #
         # Note: Transformers may create configs with no args, but models are conventionally created with configs.
-        # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성됩니다.
         if config.backbone_name_or_path is None:
             raise ValueError(
                 "config.backbone_name_or_path is None. "
@@ -109,10 +109,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             )
         # Fail-fast: training/inference requires a positive number of labels.
-        # fail-fast: 학습/추론은 num_labels가 양수여야 합니다.
         #
         # Config may exist in a minimal form for internal serialization paths, but the model should not.
-        # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 그러면 안 됩니다.
         if int(getattr(config, "num_labels", 0)) <= 0:
             raise ValueError(
                 f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
@@ -120,17 +120,17 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             )
         # Meta is a single source of truth for extraction and fine-tuning rules.
-        # meta는 feature 추출 및 미세조정 규칙의 단일 기준입니다.
-                # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
         # Prefer config.backbone_meta to keep Hub runtime self-contained.
         self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
         # Backbone skeleton is always created without pretrained weights.
-        # backbone skeleton은 항상 pretrained weight 없이 생성합니다.
         self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
         # Head shape is driven by meta feat_dim and config.num_labels.
-        # head shape은 meta의 feat_dim과 config.num_labels로 결정됩니다.
         self.classifier = MLPHead(
             in_dim=int(self._meta["feat_dim"]),
             num_labels=int(config.num_labels),
@@ -139,16 +139,20 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         )
         # HF initialization hook, but we override init_weights to initialize head-only.
-        # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화합니다.
         self.post_init()
     def init_weights(self):
         """
         Initialize only the head to avoid touching the backbone skeleton.
-        backbone skeleton을 건드리지 않기 위해 head만 초기화합니다.
         HF's default init may traverse the entire module tree, which is undesirable here.
-        HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서는 부적절합니다.
         """
         if getattr(self, "classifier", None) is not None:
             self.classifier.apply(self._init_weights)
@@ -160,7 +164,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     # ----------------------------
     def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
         # Meta decides which loader path to use.
-        # meta가 어떤 로더 경로를 사용할지 결정합니다.
         meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
         if meta is None:
             raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
@@ -174,14 +178,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             return self._build_torchvision_densenet_skeleton(backbone_id)
         # For transformers backbones: build a random-weight skeleton from config only.
-        # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성합니다.
         bb_cfg = AutoConfig.from_pretrained(backbone_id)
         return AutoModel.from_config(bb_cfg)
     @staticmethod
     def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
         # timm is an optional dependency and should be imported lazily.
-        # timm은 옵션 의존성이므로 지연 import 합니다.
         try:
             import timm
         except Exception as e:
@@ -190,7 +194,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             ) from e
         # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
-        # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0)합니다.
         return timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=False,
@@ -200,12 +204,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @staticmethod
     def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
-        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Build structure only (weights=None) to avoid implicit pretrained loading.
-        # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None)합니다.
         m = tv_models.densenet121(weights=None)
         return m
@@ -222,10 +226,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     ):
         """
         Fresh-start only: inject pretrained backbone weights into the skeleton.
-        fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입합니다.
         Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
-        from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 절대 호출하면 안 됩니다.
         """
         bb = self.config.backbone_name_or_path
         meta = self._meta
@@ -240,7 +244,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             return
         # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
-        # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사합니다.
         ref = AutoModel.from_pretrained(
             bb,
             low_cpu_mem_usage=low_cpu_mem_usage,
@@ -248,18 +252,18 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         )
         # strict=False is used to tolerate harmless key differences across minor versions.
-        # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용합니다.
         self.backbone.load_state_dict(ref.state_dict(), strict=False)
         del ref
     @torch.no_grad()
     def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
         # timm must be present for timm backbones.
-        # timm 백본에는 timm 설치가 필요합니다.
         import timm
         # Create a pretrained reference model and copy its weights strictly.
-        # pretrained reference 모델을 만들고 가중치를 strict하게 복사합니다.
         ref = timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=True,
@@ -272,12 +276,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @torch.no_grad()
     def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
-        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Use torchvision's default pretrained weights for densenet121.
-        # torchvision의 densenet121 기본 pretrained weights를 사용합니다.
         ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
         self.backbone.load_state_dict(ref.state_dict(), strict=True)
@@ -290,7 +294,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @staticmethod
     def _pool_or_gap(outputs) -> torch.Tensor:
         # Some transformers vision CNNs provide pooler_output explicitly.
-        # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공합니다.
         if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
             x = outputs.pooler_output
             if x.dim() == 2:
@@ -300,7 +304,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
         # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
-        # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용합니다.
         x = outputs.last_hidden_state
         if x.dim() == 4:
             return x.mean(dim=(2, 3))
@@ -312,29 +316,29 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
         # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
-        # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적으로 유지되어야 합니다.
         rule = self._meta["feat_rule"]
         if rule == "cls":
             # ViT-style: use CLS token embedding from last_hidden_state.
-            # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용합니다.
             return outputs.last_hidden_state[:, 0, :]
         if rule == "pool_or_mean":
             # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
-            # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용합니다.
             if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
                 return outputs.pooler_output
             return outputs.last_hidden_state.mean(dim=1)
         if rule == "pool_or_gap":
             # CNN-style: use pooler_output if present, else GAP over spatial dims.
-            # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용합니다.
             return self._pool_or_gap(outputs)
         if rule == "timm_gap":
             # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
-            # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 만듭니다.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
             if outputs.dim() != 4:
@@ -343,7 +347,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         if rule == "torchvision_densenet_gap":
             # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
-            # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요합니다.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
             if outputs.dim() != 4:
@@ -362,7 +366,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         **kwargs,
     ):
         # Type decides the backbone forward path and output format.
-        # type이 backbone forward 경로 및 출력 포맷을 결정합니다.
         t = self._meta["type"]
         if t == "timm_densenet":
@@ -394,7 +398,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         else:
             # Transformers vision models are called with pixel_values and return ModelOutput.
-            # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환합니다.
             outputs = self.backbone(
                 pixel_values=pixel_values,
                 output_attentions=output_attentions,
@@ -407,13 +411,13 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             attentions = getattr(outputs, "attentions", None)
         # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
-        # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환합니다.
         logits = self.classifier(feats)
         loss = None
         if labels is not None:
             # Cross entropy expects labels as class indices in [0, num_labels).
-            # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대합니다.
             loss = F.cross_entropy(logits, labels)
         if not return_dict:
@@ -434,14 +438,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
 # ============================================================
 def _set_requires_grad(module: nn.Module, flag: bool):
     # Toggle requires_grad for all parameters in a module.
-    # 모듈의 모든 파라미터에 대해 requires_grad를 토글합니다.
     for p in module.parameters():
         p.requires_grad = flag
 def set_bn_eval(module: nn.Module):
     # Put BatchNorm layers into eval mode to freeze running stats.
-    # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정합니다.
     for m in module.modules():
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
             m.eval()
@@ -449,7 +453,7 @@ def set_bn_eval(module: nn.Module):
 def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
     # Stage1: freeze backbone and train only the head.
-    # stage1: backbone을 freeze하고 head만 학습합니다.
     _set_requires_grad(model.backbone, False)
     _set_requires_grad(model.classifier, True)
@@ -460,7 +464,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
 def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
     # Stage2: train mode, optionally keeping BN layers in eval for stability.
-    # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있습니다.
     model.train()
     meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
     if keep_bn_eval and meta.get("has_bn", False):
@@ -469,7 +473,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
 def trainable_summary(model: nn.Module):
     # Print a compact summary of trainable parameters.
-    # 학습 가능 파라미터 요약을 간단히 출력합니다.
     total = sum(p.numel() for p in model.parameters())
     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
     ratio = trainable / total if total > 0 else 0.0
@@ -483,7 +487,7 @@ def unfreeze_last_stage(
     keep_bn_eval: bool = True,
 ):
     # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
-    # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 ���현합니다.
     freeze_backbone(model, freeze_bn=keep_bn_eval)
     n = int(last_n)
@@ -498,7 +502,7 @@ def unfreeze_last_stage(
     if bb_type == "vit":
         # ViT blocks live under backbone.encoder.layer in the transformers implementation.
-        # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 있습니다.
         blocks = list(model.backbone.encoder.layer)
         for blk in blocks[-n:]:
             _set_requires_grad(blk, True)
@@ -506,7 +510,7 @@ def unfreeze_last_stage(
     if bb_type == "swin":
         # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
-        # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze 합니다.
         stages = list(model.backbone.encoder.layers)
         blocks: List[nn.Module] = []
         for st in stages:
@@ -517,7 +521,7 @@ def unfreeze_last_stage(
     if bb_type == "resnet":
         # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
-        # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze 합니다.
         bb = model.backbone
         for name in ("layer1", "layer2", "layer3", "layer4"):
             if not hasattr(bb, name):
@@ -538,7 +542,7 @@ def unfreeze_last_stage(
     if bb_type == "efficientnet":
         # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
-        # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze 합니다.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected EfficientNet structure: missing features")
@@ -556,7 +560,7 @@ def unfreeze_last_stage(
     if bb_type in ("timm_densenet", "torchvision_densenet"):
         # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
-        # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze 합니다.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected DenseNet: missing features")
@@ -575,7 +579,7 @@ def unfreeze_last_stage(
         def _denselayers(db: nn.Module) -> List[nn.Module]:
             # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
-            # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환합니다.
             return list(db.children())
         blocks: List[nn.Module] = []
@@ -600,5 +604,5 @@ def unfreeze_last_stage(
 # register
 # -------------------------
 # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
-# from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록합니다.
 BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")

     def __init__(self, config: BackboneMLPHeadConfig):
         # PreTrainedModel expects a config object and stores it internally.
+        # PreTrainedModel은 config 객체를 받아 내부에 저장함.
         super().__init__(config)
         # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
+        # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않음 - fast fail.
         #
         # Note: Transformers may create configs with no args, but models are conventionally created with configs.
+        # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성.
         if config.backbone_name_or_path is None:
             raise ValueError(
                 "config.backbone_name_or_path is None. "
             )
         # Fail-fast: training/inference requires a positive number of labels.
+        # fail-fast: 학습/추론은 num_labels가 양수여야 함.
         #
         # Config may exist in a minimal form for internal serialization paths, but the model should not.
+        # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 해당 없음.
         if int(getattr(config, "num_labels", 0)) <= 0:
             raise ValueError(
                 f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
             )
         # Meta is a single source of truth for extraction and fine-tuning rules.
+        # meta는 feature 추출 및 미세조정 규칙의 단일 기준.
+        # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
         # Prefer config.backbone_meta to keep Hub runtime self-contained.
         self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
         # Backbone skeleton is always created without pretrained weights.
+        # backbone skeleton은 항상 pretrained weight 없이 생성.
         self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
         # Head shape is driven by meta feat_dim and config.num_labels.
+        # head shape은 meta의 feat_dim과 config.num_labels로 결정.
         self.classifier = MLPHead(
             in_dim=int(self._meta["feat_dim"]),
             num_labels=int(config.num_labels),
         )
         # HF initialization hook, but we override init_weights to initialize head-only.
+        # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화하도록 변경.
         self.post_init()
     def init_weights(self):
         """
         Initialize only the head to avoid touching the backbone skeleton.
+        backbone skeleton을 건드리지 않기 위해 head만 초기화.
         HF's default init may traverse the entire module tree, which is undesirable here.
+        HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서 그대로 사용하기 부적절.
+        초기 설계에서 __init__ 내부에서 backbone의 가중치 로드를 수행함(편리를 위해).
+        이 경우, HF의 post_init()으로 인해 해당 로드가 취소되는 경우가 존재(timm, torchvision 등의 백본).
+        때문에 이를 오버라이드 하여 classifier만 초기화 하도록 변경함.
         """
         if getattr(self, "classifier", None) is not None:
             self.classifier.apply(self._init_weights)
     # ----------------------------
     def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
         # Meta decides which loader path to use.
+        # meta가 어떤 로더 경로를 사용할지 결정.
         meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
         if meta is None:
             raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
             return self._build_torchvision_densenet_skeleton(backbone_id)
         # For transformers backbones: build a random-weight skeleton from config only.
+        # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성.
         bb_cfg = AutoConfig.from_pretrained(backbone_id)
         return AutoModel.from_config(bb_cfg)
     @staticmethod
     def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
         # timm is an optional dependency and should be imported lazily.
+        # timm은 옵션 의존성이므로 지연 import 수행.
         try:
             import timm
         except Exception as e:
             ) from e
         # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
+        # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0).
         return timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=False,
     @staticmethod
     def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
+        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Build structure only (weights=None) to avoid implicit pretrained loading.
+        # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None).
         m = tv_models.densenet121(weights=None)
         return m
     ):
         """
         Fresh-start only: inject pretrained backbone weights into the skeleton.
+        fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입.
         Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
+        from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 주의할 것.
         """
         bb = self.config.backbone_name_or_path
         meta = self._meta
             return
         # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
+        # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사.
         ref = AutoModel.from_pretrained(
             bb,
             low_cpu_mem_usage=low_cpu_mem_usage,
         )
         # strict=False is used to tolerate harmless key differences across minor versions.
+        # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용.
         self.backbone.load_state_dict(ref.state_dict(), strict=False)
         del ref
     @torch.no_grad()
     def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
         # timm must be present for timm backbones.
+        # timm 백본에��� timm 설치가 필요.
         import timm
         # Create a pretrained reference model and copy its weights strictly.
+        # pretrained reference 모델을 만들고 가중치를 strict하게 복사.
         ref = timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=True,
     @torch.no_grad()
     def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
+        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 지원.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Use torchvision's default pretrained weights for densenet121.
+        # torchvision의 densenet121 기본 pretrained weights를 사용.
         ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
         self.backbone.load_state_dict(ref.state_dict(), strict=True)
     @staticmethod
     def _pool_or_gap(outputs) -> torch.Tensor:
         # Some transformers vision CNNs provide pooler_output explicitly.
+        # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공.
         if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
             x = outputs.pooler_output
             if x.dim() == 2:
             raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
         # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
+        # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용.
         x = outputs.last_hidden_state
         if x.dim() == 4:
             return x.mean(dim=(2, 3))
     def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
         # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
+        # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적 동작을 위해 제한된 모델만 사용.
         rule = self._meta["feat_rule"]
         if rule == "cls":
             # ViT-style: use CLS token embedding from last_hidden_state.
+            # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용.
             return outputs.last_hidden_state[:, 0, :]
         if rule == "pool_or_mean":
             # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
+            # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용.
             if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
                 return outputs.pooler_output
             return outputs.last_hidden_state.mean(dim=1)
         if rule == "pool_or_gap":
             # CNN-style: use pooler_output if present, else GAP over spatial dims.
+            # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용.
             return self._pool_or_gap(outputs)
         if rule == "timm_gap":
             # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
+            # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 변환.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
             if outputs.dim() != 4:
         if rule == "torchvision_densenet_gap":
             # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
+            # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
             if outputs.dim() != 4:
         **kwargs,
     ):
         # Type decides the backbone forward path and output format.
+        # type이 backbone forward 경로 및 출력 포맷을 결정.
         t = self._meta["type"]
         if t == "timm_densenet":
         else:
             # Transformers vision models are called with pixel_values and return ModelOutput.
+            # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환.
             outputs = self.backbone(
                 pixel_values=pixel_values,
                 output_attentions=output_attentions,
             attentions = getattr(outputs, "attentions", None)
         # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
+        # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환.
         logits = self.classifier(feats)
         loss = None
         if labels is not None:
             # Cross entropy expects labels as class indices in [0, num_labels).
+            # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대함.
             loss = F.cross_entropy(logits, labels)
         if not return_dict:
 # ============================================================
 def _set_requires_grad(module: nn.Module, flag: bool):
     # Toggle requires_grad for all parameters in a module.
+    # 모듈의 모든 파라미터에 대해 requires_grad를 토글.
     for p in module.parameters():
         p.requires_grad = flag
 def set_bn_eval(module: nn.Module):
     # Put BatchNorm layers into eval mode to freeze running stats.
+    # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정.
     for m in module.modules():
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
             m.eval()
 def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
     # Stage1: freeze backbone and train only the head.
+    # stage1: backbone을 freeze하고 head만 학습.
     _set_requires_grad(model.backbone, False)
     _set_requires_grad(model.classifier, True)
 def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
     # Stage2: train mode, optionally keeping BN layers in eval for stability.
+    # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있음. (buffer 등을 유지하기 위해)
     model.train()
     meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
     if keep_bn_eval and meta.get("has_bn", False):
 def trainable_summary(model: nn.Module):
     # Print a compact summary of trainable parameters.
+    # 학습 가능 파라미터 요약을 간단히 출력.
     total = sum(p.numel() for p in model.parameters())
     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
     ratio = trainable / total if total > 0 else 0.0
     keep_bn_eval: bool = True,
 ):
     # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
+    # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 구현.
     freeze_backbone(model, freeze_bn=keep_bn_eval)
     n = int(last_n)
     if bb_type == "vit":
         # ViT blocks live under backbone.encoder.layer in the transformers implementation.
+        # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 존재함.
         blocks = list(model.backbone.encoder.layer)
         for blk in blocks[-n:]:
             _set_requires_grad(blk, True)
     if bb_type == "swin":
         # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
+        # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze.
         stages = list(model.backbone.encoder.layers)
         blocks: List[nn.Module] = []
         for st in stages:
     if bb_type == "resnet":
         # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
+        # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze.
         bb = model.backbone
         for name in ("layer1", "layer2", "layer3", "layer4"):
             if not hasattr(bb, name):
     if bb_type == "efficientnet":
         # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
+        # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected EfficientNet structure: missing features")
     if bb_type in ("timm_densenet", "torchvision_densenet"):
         # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
+        # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected DenseNet: missing features")
         def _denselayers(db: nn.Module) -> List[nn.Module]:
             # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
+            # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환.
             return list(db.children())
         blocks: List[nn.Module] = []
 # register
 # -------------------------
 # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
+# from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록.
 BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")

ds_proc.py CHANGED Viewed

@@ -4,8 +4,8 @@
 # src/ds_proc.py
 # ============================================================
-# (4) ImageProcessor (AutoImageProcessor integration)
-# (4) ImageProcessor (AutoImageProcessor 연동)
 # ============================================================
 from typing import Any
@@ -27,41 +27,38 @@ except ImportError:
 class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     """
     This processor performs image preprocessing and outputs {"pixel_values": ...}.
-    이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 출력합니다.
     Key requirements:
     핵심 요구사항:
     1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
-       save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 합니다.
     2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
-       런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됩니다.
     3) Runtime objects are rebuilt at init/load time based on backbone meta.
-       런타임 객체는 backbone meta에 따라 init/load 시점에 재구성됩니다.
     4) For reproducibility, use_fast must be explicitly persisted and honored on load.
-       재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 합니다.
     """
     # HF vision models conventionally expect "pixel_values" as the primary input key.
-    # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대합니다.
     model_input_names = ["pixel_values"]
     def __init__(
         self,
         backbone_name_or_path: BackboneID,
-        is_training: bool = False,
         use_fast: bool = False,
         **kwargs,
     ):
         # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
-        # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리합니다.
         super().__init__(**kwargs)
         # Enforce whitelist via BACKBONE_META to keep behavior stable.
-        # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제합니다.
         if backbone_name_or_path not in BACKBONE_META:
             raise ValueError(
                 f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
@@ -69,23 +66,23 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             )
         # Serializable fields only: these should appear in preprocessor_config.json.
-        # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 합니다.
         self.backbone_name_or_path = backbone_name_or_path
         self.is_training = bool(is_training)
         # Reproducibility switch for transformers processors.
-        # transformers processor의 fast/slow 선택을 재현 가능하게 고정합니다.
         self.use_fast = bool(use_fast)
         # Runtime-only fields: must never be serialized.
-        # 런타임 전용 필드: 절대 직렬화되면 안 됩니다.
         self._meta = None
-        self._delegate = None
-        self._timm_transform = None
         self._torchvision_transform = None
         # Build runtime objects according to backbone type.
-        # backbone type에 따라 런타임 객체를 구성합니다.
         self._build_runtime()
     # ============================================================
@@ -95,13 +92,13 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _build_runtime(self):
         """
         Build runtime delegate/transform based on BACKBONE_META["type"].
-        BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성합니다.
         """
         meta = BACKBONE_META[self.backbone_name_or_path]
         self._meta = meta
         # Always reset runtime fields before rebuilding.
-        # 재구성 전 런타임 필드는 항상 초기화합니다.
         self._delegate = None
         self._timm_transform = None
         self._torchvision_transform = None
@@ -110,7 +107,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
         if t == "timm_densenet":
             # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
-            # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용합니다.
             self._timm_transform = self._build_timm_transform(
                 backbone_id=self.backbone_name_or_path,
                 is_training=self.is_training,
@@ -119,17 +116,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
         if t == "torchvision_densenet":
             # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
-            # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요합니다.
             self._torchvision_transform = self._build_torchvision_densenet_transform(
                 is_training=self.is_training
             )
             return
         # Default: transformers backbone delegates to its official AutoImageProcessor.
-        # 기본: transformers 백본은 공식 AutoImageProcessor에 위임합니다.
         #
         # IMPORTANT:
-        # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달합니다.
         self._delegate = AutoImageProcessor.from_pretrained(
             self.backbone_name_or_path,
             use_fast=self.use_fast,
@@ -140,7 +137,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _build_timm_transform(*, backbone_id: str, is_training: bool):
         """
         Create timm transform without storing non-serializable objects in config.
-        비직렬화 객체를 config에 저장하지 않고 timm transform을 생성합니다.
         """
         try:
             import timm
@@ -151,20 +148,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             ) from e
         # We only need model metadata to resolve data config, so pretrained=False is preferred.
-        # data config 추출만 필요하므로 pretrained=False를 우선 사용합니다.
         m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
         dc = resolve_model_data_config(m)
         # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
-        # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환합니다.
-        tfm = create_transform(**dc, is_training=is_training)
         return tfm
     @staticmethod
     def _build_torchvision_densenet_transform(*, is_training: bool):
         """
         Build torchvision preprocessing for DenseNet-121 (224 pipeline).
-        DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성합니다.
         """
         try:
             from torchvision import transforms
@@ -174,28 +171,29 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             ) from e
         # These are the standard ImageNet normalization stats used by torchvision weights.
-        # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계입니다.
         mean = (0.485, 0.456, 0.406)
-        std = (0.229, 0.224, 0.225)
         # Training pipeline typically uses RandomResizedCrop and horizontal flip.
-        # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용합니다.
         if is_training:
             return transforms.Compose(
                 [
-                    transforms.RandomResizedCrop(224),
-                    transforms.RandomHorizontalFlip(p=0.5),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=mean, std=std),
                 ]
             )
         # Inference pipeline typically uses Resize(256) + CenterCrop(224).
-        # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용합니다.
         return transforms.Compose(
             [
                 transforms.Resize(256),
-                transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=mean, std=std),
             ]
@@ -208,24 +206,24 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def to_dict(self) -> dict[str, Any]:
         """
         Return a JSON-serializable dict for preprocessor_config.json.
-        preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환합니다.
         Important: do not leak runtime objects into the serialized dict.
-        중요: 런타임 객체가 직렬화 dict에 섞이면 안 됩니다.
         """
         # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
         # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
         d = super().to_dict()
         # Force minimal stable fields for long-term compatibility.
-        # 장기 호환을 위해 최소 안정 필드를 강제합니다.
-        d["image_processor_type"] = self.__class__.__name__
         d["backbone_name_or_path"] = self.backbone_name_or_path
         d["is_training"] = self.is_training
-        d["use_fast"] = self.use_fast
         # Remove any runtime-only fields defensively.
-        # 런타임 전용 필드는 보수적으로 제거합니다.
         for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
             d.pop(key, None)
@@ -235,14 +233,14 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
         """
         Standard load path used by BaseImageProcessor / AutoImageProcessor.
-        BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로입니다.
         """
         backbone = image_processor_dict.get("backbone_name_or_path", None)
         if backbone is None:
             raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
         is_training = bool(image_processor_dict.get("is_training", False))
-        use_fast = bool(image_processor_dict.get("use_fast", False))
         return cls(
             backbone_name_or_path=backbone,
@@ -255,20 +253,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
         """
         Fallback path if AutoImageProcessor calls class.from_pretrained directly.
-        AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 경로입니다.
         Strategy:
         전략:
         - Read config.json via AutoConfig and recover backbone_name_or_path.
-          AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구합니다.
         """
         # is_training is runtime-only and should default to False for inference/serving.
-        # is_training은 런타임 전용이며 추론/서빙 기본값은 False가 맞습니다.
         #
         # IMPORTANT:
-        # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영합니다.
         use_fast = bool(kwargs.pop("use_fast", False))
         kwargs.pop("trust_remote_code", None)
@@ -289,7 +287,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     @staticmethod
     def _ensure_list(images: Any) -> list[Any]:
         # Normalize scalar image input to a list for uniform processing.
-        # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용합니다.
         if isinstance(images, (list, tuple)):
             return list(images)
         return [images]
@@ -297,7 +295,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     @staticmethod
     def _to_pil_rgb(x: Any):
         # Convert common image inputs into PIL RGB images.
-        # 일반적인 입력을 PIL RGB 이미지로 변환합니다.
         from PIL import Image as PILImage
         if isinstance(x, PILImage.Image):
@@ -314,17 +312,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     ) -> dict[str, Any]:
         """
         Convert images into {"pixel_values": Tensor/ndarray}.
-        이미지를 {"pixel_values": Tensor/ndarray}로 변환합니다.
         """
         images = self._ensure_list(images)
         # Rebuild runtime if needed (e.g., right after deserialization).
-        # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성합니다.
         if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
             self._build_runtime()
         # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
-        # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
         if self._timm_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
@@ -337,7 +335,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             return self._format_return(pixel_values, return_tensors)
         # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
-        # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
         if self._torchvision_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
@@ -350,7 +348,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             return self._format_return(pixel_values, return_tensors)
         # transformers delegate path: rely on official processor behavior.
-        # transformers 위임 경로: 공식 processor 동작을 그대로 사용합니다.
         if self._delegate is None:
             raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
@@ -360,7 +358,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
         """
         Format pixel_values according to return_tensors.
-        return_tensors에 맞춰 pixel_values 반환 포맷을 맞춥니다.
         """
         if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
             return {"pixel_values": pixel_values}
@@ -370,6 +368,6 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
 # Register this processor for AutoImageProcessor resolution.
-# AutoImageProcessor 해석을 위해 이 processor를 등록합니다.
 if __name__ != "__main__":
     BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")

 # src/ds_proc.py
 # ============================================================
+# ImageProcessor (AutoImageProcessor integration)
+# ImageProcessor (AutoImageProcessor 연동)
 # ============================================================
 from typing import Any
 class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     """
     This processor performs image preprocessing and outputs {"pixel_values": ...}.
+    이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 반환함.
     Key requirements:
     핵심 요구사항:
     1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
+       save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 함.
     2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
+       런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됨.
     3) Runtime objects are rebuilt at init/load time based on backbone meta.
+       런타임 객체는 backbone meta에 따라 init/load 시점에 재구성.
     4) For reproducibility, use_fast must be explicitly persisted and honored on load.
+       재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 함.
     """
     # HF vision models conventionally expect "pixel_values" as the primary input key.
+    # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대.
     model_input_names = ["pixel_values"]
     def __init__(
         self,
         backbone_name_or_path: BackboneID,
+        is_training: bool = False, # timm 에서 data augmentation 용.
         use_fast: bool = False,
         **kwargs,
     ):
         # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
+        # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리.
         super().__init__(**kwargs)
         # Enforce whitelist via BACKBONE_META to keep behavior stable.
+        # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제. - fast fail
         if backbone_name_or_path not in BACKBONE_META:
             raise ValueError(
                 f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
             )
         # Serializable fields only: these should appear in preprocessor_config.json.
+        # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 함
         self.backbone_name_or_path = backbone_name_or_path
         self.is_training = bool(is_training)
         # Reproducibility switch for transformers processors.
+        # transformers processor의 fast/slow 선택을 재현 가능하게 고정.
         self.use_fast = bool(use_fast)
         # Runtime-only fields: must never be serialized.
+        # 런타임 전용 필드: 절대 직렬화되면 안 됨.
         self._meta = None
+        self._delegate              = None
+        self._timm_transform        = None
         self._torchvision_transform = None
         # Build runtime objects according to backbone type.
+        # backbone type에 따라 런타임 객체를 구성.
         self._build_runtime()
     # ============================================================
     def _build_runtime(self):
         """
         Build runtime delegate/transform based on BACKBONE_META["type"].
+        BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성.
         """
         meta = BACKBONE_META[self.backbone_name_or_path]
         self._meta = meta
         # Always reset runtime fields before rebuilding.
+        # 재구성 전 런타임 필드는 항상 초기화.
         self._delegate = None
         self._timm_transform = None
         self._torchvision_transform = None
         if t == "timm_densenet":
             # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
+            # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용.
             self._timm_transform = self._build_timm_transform(
                 backbone_id=self.backbone_name_or_path,
                 is_training=self.is_training,
         if t == "torchvision_densenet":
             # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
+            # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요.
             self._torchvision_transform = self._build_torchvision_densenet_transform(
                 is_training=self.is_training
             )
             return
         # Default: transformers backbone delegates to its official AutoImageProcessor.
+        # 기본: transformers 백본은 공식 AutoImageProcessor에 위임.
         #
         # IMPORTANT:
+        # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달.
         self._delegate = AutoImageProcessor.from_pretrained(
             self.backbone_name_or_path,
             use_fast=self.use_fast,
     def _build_timm_transform(*, backbone_id: str, is_training: bool):
         """
         Create timm transform without storing non-serializable objects in config.
+        비직렬화 객체를 config에 저장하지 않고 timm transform을 생성.
         """
         try:
             import timm
             ) from e
         # We only need model metadata to resolve data config, so pretrained=False is preferred.
+        # data config 추출만 필요하므로 pretrained=False를 우선 사용.
         m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
         dc = resolve_model_data_config(m)
         # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
+        # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환.
+        tfm = create_transform(**dc, is_training=is_training) # is_training :Data Aug.
         return tfm
     @staticmethod
     def _build_torchvision_densenet_transform(*, is_training: bool):
         """
         Build torchvision preprocessing for DenseNet-121 (224 pipeline).
+        DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성.
         """
         try:
             from torchvision import transforms
             ) from e
         # These are the standard ImageNet normalization stats used by torchvision weights.
+        # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계.
         mean = (0.485, 0.456, 0.406)
+        std  = (0.229, 0.224, 0.225)
         # Training pipeline typically uses RandomResizedCrop and horizontal flip.
+        # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용.
         if is_training:
             return transforms.Compose(
                 [
+                    # transforms.RandomResizedCrop(224),
+                    # transforms.RandomHorizontalFlip(p=0.5),
+                    transforms.Resize(224),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=mean, std=std),
                 ]
             )
         # Inference pipeline typically uses Resize(256) + CenterCrop(224).
+        # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용.
         return transforms.Compose(
             [
                 transforms.Resize(256),
+                # transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=mean, std=std),
             ]
     def to_dict(self) -> dict[str, Any]:
         """
         Return a JSON-serializable dict for preprocessor_config.json.
+        preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환.
         Important: do not leak runtime objects into the serialized dict.
+        중요: 런타임 객체가 직렬화 dict에 섞이면 안 됨.
         """
         # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
         # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
         d = super().to_dict()
         # Force minimal stable fields for long-term compatibility.
+        # 장기 호환을 위해 최소 안정 필드를 강제로 지정.
+        d["image_processor_type"]  = self.__class__.__name__
         d["backbone_name_or_path"] = self.backbone_name_or_path
         d["is_training"] = self.is_training
+        d["use_fast"]    = self.use_fast
         # Remove any runtime-only fields defensively.
+        # 런타임 전용 필드는 보수적으로 제거.
         for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
             d.pop(key, None)
     def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
         """
         Standard load path used by BaseImageProcessor / AutoImageProcessor.
+        BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로임.
         """
         backbone = image_processor_dict.get("backbone_name_or_path", None)
         if backbone is None:
             raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
         is_training = bool(image_processor_dict.get("is_training", False))
+        use_fast    = bool(image_processor_dict.get("use_fast", False))
         return cls(
             backbone_name_or_path=backbone,
     def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
         """
         Fallback path if AutoImageProcessor calls class.from_pretrained directly.
+        AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 메서드.
         Strategy:
         전략:
         - Read config.json via AutoConfig and recover backbone_name_or_path.
+          AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구.
         """
         # is_training is runtime-only and should default to False for inference/serving.
+        # is_training은 런타임 전용이며 추론/서빙 기본값은 False 임.
         #
         # IMPORTANT:
+        # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영.
         use_fast = bool(kwargs.pop("use_fast", False))
         kwargs.pop("trust_remote_code", None)
     @staticmethod
     def _ensure_list(images: Any) -> list[Any]:
         # Normalize scalar image input to a list for uniform processing.
+        # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용.
         if isinstance(images, (list, tuple)):
             return list(images)
         return [images]
     @staticmethod
     def _to_pil_rgb(x: Any):
         # Convert common image inputs into PIL RGB images.
+        # 일반적인 입력을 PIL RGB 이미지로 변환.
         from PIL import Image as PILImage
         if isinstance(x, PILImage.Image):
     ) -> dict[str, Any]:
         """
         Convert images into {"pixel_values": Tensor/ndarray}.
+        이미지를 {"pixel_values": Tensor/ndarray}로 변환.
         """
         images = self._ensure_list(images)
         # Rebuild runtime if needed (e.g., right after deserialization).
+        # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성.
         if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
             self._build_runtime()
         # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
+        # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
         if self._timm_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
             return self._format_return(pixel_values, return_tensors)
         # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
+        # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
         if self._torchvision_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
             return self._format_return(pixel_values, return_tensors)
         # transformers delegate path: rely on official processor behavior.
+        # transformers 위임 경로: 공식 processor 동작을 그대로 사용.
         if self._delegate is None:
             raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
     def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
         """
         Format pixel_values according to return_tensors.
+        return_tensors에 맞춰 pixel_values 반환 포맷을 변환.
         """
         if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
             return {"pixel_values": pixel_values}
 # Register this processor for AutoImageProcessor resolution.
+# AutoImageProcessor 해석을 위해 이 processor를 등록.
 if __name__ != "__main__":
     BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")

manifest_20260212_202546.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+  "timestamp": "20260212_202546",
+  "repo_id": "dsaint31/bb_mlp_224",
+  "revision": "main",
+  "tag": null,
+  "num_labels": 3,
+  "build_device": "mps",
+  "count": 6,
+  "items": [
+    {
+      "backbone": "google/vit-base-patch16-224",
+      "subdir": "models/google__vit-base-patch16-224",
+      "dirname": "google__vit-base-patch16-224"
+    },
+    {
+      "backbone": "microsoft/swin-tiny-patch4-window7-224",
+      "subdir": "models/microsoft__swin-tiny-patch4-window7-224",
+      "dirname": "microsoft__swin-tiny-patch4-window7-224"
+    },
+    {
+      "backbone": "microsoft/resnet-50",
+      "subdir": "models/microsoft__resnet-50",
+      "dirname": "microsoft__resnet-50"
+    },
+    {
+      "backbone": "google/efficientnet-b0",
+      "subdir": "models/google__efficientnet-b0",
+      "dirname": "google__efficientnet-b0"
+    },
+    {
+      "backbone": "timm/densenet121.tv_in1k",
+      "subdir": "models/timm__densenet121.tv_in1k",
+      "dirname": "timm__densenet121.tv_in1k"
+    },
+    {
+      "backbone": "torchvision/densenet121",
+      "subdir": "models/torchvision__densenet121",
+      "dirname": "torchvision__densenet121"
+    }
+  ],
+  "root_code_included": true,
+  "root_code_files": [
+    "ds_proc.py",
+    "ds_model.py",
+    "ds_cfg.py"
+  ],
+  "subfolder_code_included": true,
+  "subfolder_code_files": [
+    "ds_proc.py",
+    "ds_model.py",
+    "ds_cfg.py"
+  ]
+}

models/google__efficientnet-b0/config.json CHANGED Viewed

@@ -24,7 +24,7 @@
   "num_labels": 3,
   "transformers_version": "5.1.0",
   "ds_provenance": {
-    "created_at": "20260210_170738",
     "repo_id": "dsaint31/bb_mlp_224",
     "subdir": "models/google__efficientnet-b0",
     "wrapper_class": "BackboneWithMLPHeadForImageClassification",

   "num_labels": 3,
   "transformers_version": "5.1.0",
   "ds_provenance": {
+    "created_at": "20260212_202546",
     "repo_id": "dsaint31/bb_mlp_224",
     "subdir": "models/google__efficientnet-b0",
     "wrapper_class": "BackboneWithMLPHeadForImageClassification",

models/google__efficientnet-b0/ds_model.py CHANGED Viewed

@@ -94,14 +94,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     def __init__(self, config: BackboneMLPHeadConfig):
         # PreTrainedModel expects a config object and stores it internally.
-        # PreTrainedModel은 config 객체를 받아 내부에 저장하는 전제를 가집니다.
         super().__init__(config)
         # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
-        # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않습니다.
         #
         # Note: Transformers may create configs with no args, but models are conventionally created with configs.
-        # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성됩니다.
         if config.backbone_name_or_path is None:
             raise ValueError(
                 "config.backbone_name_or_path is None. "
@@ -109,10 +109,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             )
         # Fail-fast: training/inference requires a positive number of labels.
-        # fail-fast: 학습/추론은 num_labels가 양수여야 합니다.
         #
         # Config may exist in a minimal form for internal serialization paths, but the model should not.
-        # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 그러면 안 됩니다.
         if int(getattr(config, "num_labels", 0)) <= 0:
             raise ValueError(
                 f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
@@ -120,17 +120,17 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             )
         # Meta is a single source of truth for extraction and fine-tuning rules.
-        # meta는 feature 추출 및 미세조정 규칙의 단일 기준입니다.
-                # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
         # Prefer config.backbone_meta to keep Hub runtime self-contained.
         self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
         # Backbone skeleton is always created without pretrained weights.
-        # backbone skeleton은 항상 pretrained weight 없이 생성합니다.
         self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
         # Head shape is driven by meta feat_dim and config.num_labels.
-        # head shape은 meta의 feat_dim과 config.num_labels로 결정됩니다.
         self.classifier = MLPHead(
             in_dim=int(self._meta["feat_dim"]),
             num_labels=int(config.num_labels),
@@ -139,16 +139,20 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         )
         # HF initialization hook, but we override init_weights to initialize head-only.
-        # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화합니다.
         self.post_init()
     def init_weights(self):
         """
         Initialize only the head to avoid touching the backbone skeleton.
-        backbone skeleton을 건드리지 않기 위해 head만 초기화합니다.
         HF's default init may traverse the entire module tree, which is undesirable here.
-        HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서는 부적절합니다.
         """
         if getattr(self, "classifier", None) is not None:
             self.classifier.apply(self._init_weights)
@@ -160,7 +164,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     # ----------------------------
     def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
         # Meta decides which loader path to use.
-        # meta가 어떤 로더 경로를 사용할지 결정합니다.
         meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
         if meta is None:
             raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
@@ -174,14 +178,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             return self._build_torchvision_densenet_skeleton(backbone_id)
         # For transformers backbones: build a random-weight skeleton from config only.
-        # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성합니다.
         bb_cfg = AutoConfig.from_pretrained(backbone_id)
         return AutoModel.from_config(bb_cfg)
     @staticmethod
     def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
         # timm is an optional dependency and should be imported lazily.
-        # timm은 옵션 의존성이므로 지연 import 합니다.
         try:
             import timm
         except Exception as e:
@@ -190,7 +194,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             ) from e
         # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
-        # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0)합니다.
         return timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=False,
@@ -200,12 +204,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @staticmethod
     def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
-        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Build structure only (weights=None) to avoid implicit pretrained loading.
-        # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None)합니다.
         m = tv_models.densenet121(weights=None)
         return m
@@ -222,10 +226,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     ):
         """
         Fresh-start only: inject pretrained backbone weights into the skeleton.
-        fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입합니다.
         Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
-        from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 절대 호출하면 안 됩니다.
         """
         bb = self.config.backbone_name_or_path
         meta = self._meta
@@ -240,7 +244,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             return
         # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
-        # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사합니다.
         ref = AutoModel.from_pretrained(
             bb,
             low_cpu_mem_usage=low_cpu_mem_usage,
@@ -248,18 +252,18 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         )
         # strict=False is used to tolerate harmless key differences across minor versions.
-        # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용합니다.
         self.backbone.load_state_dict(ref.state_dict(), strict=False)
         del ref
     @torch.no_grad()
     def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
         # timm must be present for timm backbones.
-        # timm 백본에는 timm 설치가 필요합니다.
         import timm
         # Create a pretrained reference model and copy its weights strictly.
-        # pretrained reference 모델을 만들고 가중치를 strict하게 복사합니다.
         ref = timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=True,
@@ -272,12 +276,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @torch.no_grad()
     def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
-        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Use torchvision's default pretrained weights for densenet121.
-        # torchvision의 densenet121 기본 pretrained weights를 사용합니다.
         ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
         self.backbone.load_state_dict(ref.state_dict(), strict=True)
@@ -290,7 +294,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @staticmethod
     def _pool_or_gap(outputs) -> torch.Tensor:
         # Some transformers vision CNNs provide pooler_output explicitly.
-        # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공합니다.
         if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
             x = outputs.pooler_output
             if x.dim() == 2:
@@ -300,7 +304,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
         # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
-        # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용합니다.
         x = outputs.last_hidden_state
         if x.dim() == 4:
             return x.mean(dim=(2, 3))
@@ -312,29 +316,29 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
         # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
-        # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적으로 유지되어야 합니다.
         rule = self._meta["feat_rule"]
         if rule == "cls":
             # ViT-style: use CLS token embedding from last_hidden_state.
-            # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용합니다.
             return outputs.last_hidden_state[:, 0, :]
         if rule == "pool_or_mean":
             # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
-            # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용합니다.
             if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
                 return outputs.pooler_output
             return outputs.last_hidden_state.mean(dim=1)
         if rule == "pool_or_gap":
             # CNN-style: use pooler_output if present, else GAP over spatial dims.
-            # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용합니다.
             return self._pool_or_gap(outputs)
         if rule == "timm_gap":
             # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
-            # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 만듭니다.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
             if outputs.dim() != 4:
@@ -343,7 +347,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         if rule == "torchvision_densenet_gap":
             # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
-            # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요합니다.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
             if outputs.dim() != 4:
@@ -362,7 +366,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         **kwargs,
     ):
         # Type decides the backbone forward path and output format.
-        # type이 backbone forward 경로 및 출력 포맷을 결정합니다.
         t = self._meta["type"]
         if t == "timm_densenet":
@@ -394,7 +398,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         else:
             # Transformers vision models are called with pixel_values and return ModelOutput.
-            # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환합니다.
             outputs = self.backbone(
                 pixel_values=pixel_values,
                 output_attentions=output_attentions,
@@ -407,13 +411,13 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             attentions = getattr(outputs, "attentions", None)
         # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
-        # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환합니다.
         logits = self.classifier(feats)
         loss = None
         if labels is not None:
             # Cross entropy expects labels as class indices in [0, num_labels).
-            # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대합니다.
             loss = F.cross_entropy(logits, labels)
         if not return_dict:
@@ -434,14 +438,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
 # ============================================================
 def _set_requires_grad(module: nn.Module, flag: bool):
     # Toggle requires_grad for all parameters in a module.
-    # 모듈의 모든 파라미터에 대해 requires_grad를 토글합니다.
     for p in module.parameters():
         p.requires_grad = flag
 def set_bn_eval(module: nn.Module):
     # Put BatchNorm layers into eval mode to freeze running stats.
-    # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정합니다.
     for m in module.modules():
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
             m.eval()
@@ -449,7 +453,7 @@ def set_bn_eval(module: nn.Module):
 def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
     # Stage1: freeze backbone and train only the head.
-    # stage1: backbone을 freeze하고 head만 학습합니다.
     _set_requires_grad(model.backbone, False)
     _set_requires_grad(model.classifier, True)
@@ -460,7 +464,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
 def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
     # Stage2: train mode, optionally keeping BN layers in eval for stability.
-    # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있습니다.
     model.train()
     meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
     if keep_bn_eval and meta.get("has_bn", False):
@@ -469,7 +473,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
 def trainable_summary(model: nn.Module):
     # Print a compact summary of trainable parameters.
-    # 학습 가능 파라미터 요약을 간단히 출력합니다.
     total = sum(p.numel() for p in model.parameters())
     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
     ratio = trainable / total if total > 0 else 0.0
@@ -483,7 +487,7 @@ def unfreeze_last_stage(
     keep_bn_eval: bool = True,
 ):
     # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
-    # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 ���현합니다.
     freeze_backbone(model, freeze_bn=keep_bn_eval)
     n = int(last_n)
@@ -498,7 +502,7 @@ def unfreeze_last_stage(
     if bb_type == "vit":
         # ViT blocks live under backbone.encoder.layer in the transformers implementation.
-        # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 있습니다.
         blocks = list(model.backbone.encoder.layer)
         for blk in blocks[-n:]:
             _set_requires_grad(blk, True)
@@ -506,7 +510,7 @@ def unfreeze_last_stage(
     if bb_type == "swin":
         # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
-        # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze 합니다.
         stages = list(model.backbone.encoder.layers)
         blocks: List[nn.Module] = []
         for st in stages:
@@ -517,7 +521,7 @@ def unfreeze_last_stage(
     if bb_type == "resnet":
         # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
-        # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze 합니다.
         bb = model.backbone
         for name in ("layer1", "layer2", "layer3", "layer4"):
             if not hasattr(bb, name):
@@ -538,7 +542,7 @@ def unfreeze_last_stage(
     if bb_type == "efficientnet":
         # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
-        # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze 합니다.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected EfficientNet structure: missing features")
@@ -556,7 +560,7 @@ def unfreeze_last_stage(
     if bb_type in ("timm_densenet", "torchvision_densenet"):
         # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
-        # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze 합니다.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected DenseNet: missing features")
@@ -575,7 +579,7 @@ def unfreeze_last_stage(
         def _denselayers(db: nn.Module) -> List[nn.Module]:
             # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
-            # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환합니다.
             return list(db.children())
         blocks: List[nn.Module] = []
@@ -600,5 +604,5 @@ def unfreeze_last_stage(
 # register
 # -------------------------
 # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
-# from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록합니다.
 BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")

     def __init__(self, config: BackboneMLPHeadConfig):
         # PreTrainedModel expects a config object and stores it internally.
+        # PreTrainedModel은 config 객체를 받아 내부에 저장함.
         super().__init__(config)
         # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
+        # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않음 - fast fail.
         #
         # Note: Transformers may create configs with no args, but models are conventionally created with configs.
+        # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성.
         if config.backbone_name_or_path is None:
             raise ValueError(
                 "config.backbone_name_or_path is None. "
             )
         # Fail-fast: training/inference requires a positive number of labels.
+        # fail-fast: 학습/추론은 num_labels가 양수여야 함.
         #
         # Config may exist in a minimal form for internal serialization paths, but the model should not.
+        # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 해당 없음.
         if int(getattr(config, "num_labels", 0)) <= 0:
             raise ValueError(
                 f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
             )
         # Meta is a single source of truth for extraction and fine-tuning rules.
+        # meta는 feature 추출 및 미세조정 규칙의 단일 기준.
+        # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
         # Prefer config.backbone_meta to keep Hub runtime self-contained.
         self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
         # Backbone skeleton is always created without pretrained weights.
+        # backbone skeleton은 항상 pretrained weight 없이 생성.
         self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
         # Head shape is driven by meta feat_dim and config.num_labels.
+        # head shape은 meta의 feat_dim과 config.num_labels로 결정.
         self.classifier = MLPHead(
             in_dim=int(self._meta["feat_dim"]),
             num_labels=int(config.num_labels),
         )
         # HF initialization hook, but we override init_weights to initialize head-only.
+        # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화하도록 변경.
         self.post_init()
     def init_weights(self):
         """
         Initialize only the head to avoid touching the backbone skeleton.
+        backbone skeleton을 건드리지 않기 위해 head만 초기화.
         HF's default init may traverse the entire module tree, which is undesirable here.
+        HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서 그대로 사용하기 부적절.
+        초기 설계에서 __init__ 내부에서 backbone의 가중치 로드를 수행함(편리를 위해).
+        이 경우, HF의 post_init()으로 인해 해당 로드가 취소되는 경우가 존재(timm, torchvision 등의 백본).
+        때문에 이를 오버라이드 하여 classifier만 초기화 하도록 변경함.
         """
         if getattr(self, "classifier", None) is not None:
             self.classifier.apply(self._init_weights)
     # ----------------------------
     def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
         # Meta decides which loader path to use.
+        # meta가 어떤 로더 경로를 사용할지 결정.
         meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
         if meta is None:
             raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
             return self._build_torchvision_densenet_skeleton(backbone_id)
         # For transformers backbones: build a random-weight skeleton from config only.
+        # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성.
         bb_cfg = AutoConfig.from_pretrained(backbone_id)
         return AutoModel.from_config(bb_cfg)
     @staticmethod
     def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
         # timm is an optional dependency and should be imported lazily.
+        # timm은 옵션 의존성이므로 지연 import 수행.
         try:
             import timm
         except Exception as e:
             ) from e
         # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
+        # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0).
         return timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=False,
     @staticmethod
     def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
+        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Build structure only (weights=None) to avoid implicit pretrained loading.
+        # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None).
         m = tv_models.densenet121(weights=None)
         return m
     ):
         """
         Fresh-start only: inject pretrained backbone weights into the skeleton.
+        fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입.
         Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
+        from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 주의할 것.
         """
         bb = self.config.backbone_name_or_path
         meta = self._meta
             return
         # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
+        # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사.
         ref = AutoModel.from_pretrained(
             bb,
             low_cpu_mem_usage=low_cpu_mem_usage,
         )
         # strict=False is used to tolerate harmless key differences across minor versions.
+        # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용.
         self.backbone.load_state_dict(ref.state_dict(), strict=False)
         del ref
     @torch.no_grad()
     def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
         # timm must be present for timm backbones.
+        # timm 백본에��� timm 설치가 필요.
         import timm
         # Create a pretrained reference model and copy its weights strictly.
+        # pretrained reference 모델을 만들고 가중치를 strict하게 복사.
         ref = timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=True,
     @torch.no_grad()
     def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
+        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 지원.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Use torchvision's default pretrained weights for densenet121.
+        # torchvision의 densenet121 기본 pretrained weights를 사용.
         ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
         self.backbone.load_state_dict(ref.state_dict(), strict=True)
     @staticmethod
     def _pool_or_gap(outputs) -> torch.Tensor:
         # Some transformers vision CNNs provide pooler_output explicitly.
+        # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공.
         if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
             x = outputs.pooler_output
             if x.dim() == 2:
             raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
         # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
+        # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용.
         x = outputs.last_hidden_state
         if x.dim() == 4:
             return x.mean(dim=(2, 3))
     def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
         # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
+        # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적 동작을 위해 제한된 모델만 사용.
         rule = self._meta["feat_rule"]
         if rule == "cls":
             # ViT-style: use CLS token embedding from last_hidden_state.
+            # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용.
             return outputs.last_hidden_state[:, 0, :]
         if rule == "pool_or_mean":
             # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
+            # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용.
             if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
                 return outputs.pooler_output
             return outputs.last_hidden_state.mean(dim=1)
         if rule == "pool_or_gap":
             # CNN-style: use pooler_output if present, else GAP over spatial dims.
+            # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용.
             return self._pool_or_gap(outputs)
         if rule == "timm_gap":
             # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
+            # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 변환.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
             if outputs.dim() != 4:
         if rule == "torchvision_densenet_gap":
             # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
+            # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
             if outputs.dim() != 4:
         **kwargs,
     ):
         # Type decides the backbone forward path and output format.
+        # type이 backbone forward 경로 및 출력 포맷을 결정.
         t = self._meta["type"]
         if t == "timm_densenet":
         else:
             # Transformers vision models are called with pixel_values and return ModelOutput.
+            # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환.
             outputs = self.backbone(
                 pixel_values=pixel_values,
                 output_attentions=output_attentions,
             attentions = getattr(outputs, "attentions", None)
         # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
+        # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환.
         logits = self.classifier(feats)
         loss = None
         if labels is not None:
             # Cross entropy expects labels as class indices in [0, num_labels).
+            # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대함.
             loss = F.cross_entropy(logits, labels)
         if not return_dict:
 # ============================================================
 def _set_requires_grad(module: nn.Module, flag: bool):
     # Toggle requires_grad for all parameters in a module.
+    # 모듈의 모든 파라미터에 대해 requires_grad를 토글.
     for p in module.parameters():
         p.requires_grad = flag
 def set_bn_eval(module: nn.Module):
     # Put BatchNorm layers into eval mode to freeze running stats.
+    # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정.
     for m in module.modules():
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
             m.eval()
 def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
     # Stage1: freeze backbone and train only the head.
+    # stage1: backbone을 freeze하고 head만 학습.
     _set_requires_grad(model.backbone, False)
     _set_requires_grad(model.classifier, True)
 def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
     # Stage2: train mode, optionally keeping BN layers in eval for stability.
+    # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있음. (buffer 등을 유지하기 위해)
     model.train()
     meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
     if keep_bn_eval and meta.get("has_bn", False):
 def trainable_summary(model: nn.Module):
     # Print a compact summary of trainable parameters.
+    # 학습 가능 파라미터 요약을 간단히 출력.
     total = sum(p.numel() for p in model.parameters())
     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
     ratio = trainable / total if total > 0 else 0.0
     keep_bn_eval: bool = True,
 ):
     # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
+    # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 구현.
     freeze_backbone(model, freeze_bn=keep_bn_eval)
     n = int(last_n)
     if bb_type == "vit":
         # ViT blocks live under backbone.encoder.layer in the transformers implementation.
+        # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 존재함.
         blocks = list(model.backbone.encoder.layer)
         for blk in blocks[-n:]:
             _set_requires_grad(blk, True)
     if bb_type == "swin":
         # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
+        # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze.
         stages = list(model.backbone.encoder.layers)
         blocks: List[nn.Module] = []
         for st in stages:
     if bb_type == "resnet":
         # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
+        # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze.
         bb = model.backbone
         for name in ("layer1", "layer2", "layer3", "layer4"):
             if not hasattr(bb, name):
     if bb_type == "efficientnet":
         # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
+        # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected EfficientNet structure: missing features")
     if bb_type in ("timm_densenet", "torchvision_densenet"):
         # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
+        # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected DenseNet: missing features")
         def _denselayers(db: nn.Module) -> List[nn.Module]:
             # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
+            # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환.
             return list(db.children())
         blocks: List[nn.Module] = []
 # register
 # -------------------------
 # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
+# from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록.
 BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")

models/google__efficientnet-b0/ds_proc.py CHANGED Viewed

@@ -4,8 +4,8 @@
 # src/ds_proc.py
 # ============================================================
-# (4) ImageProcessor (AutoImageProcessor integration)
-# (4) ImageProcessor (AutoImageProcessor 연동)
 # ============================================================
 from typing import Any
@@ -27,41 +27,38 @@ except ImportError:
 class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     """
     This processor performs image preprocessing and outputs {"pixel_values": ...}.
-    이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 출력합니다.
     Key requirements:
     핵심 요구사항:
     1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
-       save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 합니다.
     2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
-       런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됩니다.
     3) Runtime objects are rebuilt at init/load time based on backbone meta.
-       런타임 객체는 backbone meta에 따라 init/load 시점에 재구성됩니다.
     4) For reproducibility, use_fast must be explicitly persisted and honored on load.
-       재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 합니다.
     """
     # HF vision models conventionally expect "pixel_values" as the primary input key.
-    # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대합니다.
     model_input_names = ["pixel_values"]
     def __init__(
         self,
         backbone_name_or_path: BackboneID,
-        is_training: bool = False,
         use_fast: bool = False,
         **kwargs,
     ):
         # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
-        # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리합니다.
         super().__init__(**kwargs)
         # Enforce whitelist via BACKBONE_META to keep behavior stable.
-        # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제합니다.
         if backbone_name_or_path not in BACKBONE_META:
             raise ValueError(
                 f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
@@ -69,23 +66,23 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             )
         # Serializable fields only: these should appear in preprocessor_config.json.
-        # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 합니다.
         self.backbone_name_or_path = backbone_name_or_path
         self.is_training = bool(is_training)
         # Reproducibility switch for transformers processors.
-        # transformers processor의 fast/slow 선택을 재현 가능하게 고정합니다.
         self.use_fast = bool(use_fast)
         # Runtime-only fields: must never be serialized.
-        # 런타임 전용 필드: 절대 직렬화되면 안 됩니다.
         self._meta = None
-        self._delegate = None
-        self._timm_transform = None
         self._torchvision_transform = None
         # Build runtime objects according to backbone type.
-        # backbone type에 따라 런타임 객체를 구성합니다.
         self._build_runtime()
     # ============================================================
@@ -95,13 +92,13 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _build_runtime(self):
         """
         Build runtime delegate/transform based on BACKBONE_META["type"].
-        BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성합니다.
         """
         meta = BACKBONE_META[self.backbone_name_or_path]
         self._meta = meta
         # Always reset runtime fields before rebuilding.
-        # 재구성 전 런타임 필드는 항상 초기화합니다.
         self._delegate = None
         self._timm_transform = None
         self._torchvision_transform = None
@@ -110,7 +107,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
         if t == "timm_densenet":
             # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
-            # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용합니다.
             self._timm_transform = self._build_timm_transform(
                 backbone_id=self.backbone_name_or_path,
                 is_training=self.is_training,
@@ -119,17 +116,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
         if t == "torchvision_densenet":
             # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
-            # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요합니다.
             self._torchvision_transform = self._build_torchvision_densenet_transform(
                 is_training=self.is_training
             )
             return
         # Default: transformers backbone delegates to its official AutoImageProcessor.
-        # 기본: transformers 백본은 공식 AutoImageProcessor에 위임합니다.
         #
         # IMPORTANT:
-        # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달합니다.
         self._delegate = AutoImageProcessor.from_pretrained(
             self.backbone_name_or_path,
             use_fast=self.use_fast,
@@ -140,7 +137,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _build_timm_transform(*, backbone_id: str, is_training: bool):
         """
         Create timm transform without storing non-serializable objects in config.
-        비직렬화 객체를 config에 저장하지 않고 timm transform을 생성합니다.
         """
         try:
             import timm
@@ -151,20 +148,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             ) from e
         # We only need model metadata to resolve data config, so pretrained=False is preferred.
-        # data config 추출만 필요하므로 pretrained=False를 우선 사용합니다.
         m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
         dc = resolve_model_data_config(m)
         # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
-        # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환합니다.
-        tfm = create_transform(**dc, is_training=is_training)
         return tfm
     @staticmethod
     def _build_torchvision_densenet_transform(*, is_training: bool):
         """
         Build torchvision preprocessing for DenseNet-121 (224 pipeline).
-        DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성합니다.
         """
         try:
             from torchvision import transforms
@@ -174,28 +171,29 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             ) from e
         # These are the standard ImageNet normalization stats used by torchvision weights.
-        # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계입니다.
         mean = (0.485, 0.456, 0.406)
-        std = (0.229, 0.224, 0.225)
         # Training pipeline typically uses RandomResizedCrop and horizontal flip.
-        # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용합니다.
         if is_training:
             return transforms.Compose(
                 [
-                    transforms.RandomResizedCrop(224),
-                    transforms.RandomHorizontalFlip(p=0.5),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=mean, std=std),
                 ]
             )
         # Inference pipeline typically uses Resize(256) + CenterCrop(224).
-        # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용합니다.
         return transforms.Compose(
             [
                 transforms.Resize(256),
-                transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=mean, std=std),
             ]
@@ -208,24 +206,24 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def to_dict(self) -> dict[str, Any]:
         """
         Return a JSON-serializable dict for preprocessor_config.json.
-        preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환합니다.
         Important: do not leak runtime objects into the serialized dict.
-        중요: 런타임 객체가 직렬화 dict에 섞이면 안 됩니다.
         """
         # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
         # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
         d = super().to_dict()
         # Force minimal stable fields for long-term compatibility.
-        # 장기 호환을 위해 최소 안정 필드를 강제합니다.
-        d["image_processor_type"] = self.__class__.__name__
         d["backbone_name_or_path"] = self.backbone_name_or_path
         d["is_training"] = self.is_training
-        d["use_fast"] = self.use_fast
         # Remove any runtime-only fields defensively.
-        # 런타임 전용 필드는 보수적으로 제거합니다.
         for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
             d.pop(key, None)
@@ -235,14 +233,14 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
         """
         Standard load path used by BaseImageProcessor / AutoImageProcessor.
-        BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로입니다.
         """
         backbone = image_processor_dict.get("backbone_name_or_path", None)
         if backbone is None:
             raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
         is_training = bool(image_processor_dict.get("is_training", False))
-        use_fast = bool(image_processor_dict.get("use_fast", False))
         return cls(
             backbone_name_or_path=backbone,
@@ -255,20 +253,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
         """
         Fallback path if AutoImageProcessor calls class.from_pretrained directly.
-        AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 경로입니다.
         Strategy:
         전략:
         - Read config.json via AutoConfig and recover backbone_name_or_path.
-          AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구합니다.
         """
         # is_training is runtime-only and should default to False for inference/serving.
-        # is_training은 런타임 전용이며 추론/서빙 기본값은 False가 맞습니다.
         #
         # IMPORTANT:
-        # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영합니다.
         use_fast = bool(kwargs.pop("use_fast", False))
         kwargs.pop("trust_remote_code", None)
@@ -289,7 +287,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     @staticmethod
     def _ensure_list(images: Any) -> list[Any]:
         # Normalize scalar image input to a list for uniform processing.
-        # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용합니다.
         if isinstance(images, (list, tuple)):
             return list(images)
         return [images]
@@ -297,7 +295,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     @staticmethod
     def _to_pil_rgb(x: Any):
         # Convert common image inputs into PIL RGB images.
-        # 일반적인 입력을 PIL RGB 이미지로 변환합니다.
         from PIL import Image as PILImage
         if isinstance(x, PILImage.Image):
@@ -314,17 +312,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     ) -> dict[str, Any]:
         """
         Convert images into {"pixel_values": Tensor/ndarray}.
-        이미지를 {"pixel_values": Tensor/ndarray}로 변환합니다.
         """
         images = self._ensure_list(images)
         # Rebuild runtime if needed (e.g., right after deserialization).
-        # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성합니다.
         if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
             self._build_runtime()
         # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
-        # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
         if self._timm_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
@@ -337,7 +335,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             return self._format_return(pixel_values, return_tensors)
         # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
-        # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
         if self._torchvision_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
@@ -350,7 +348,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             return self._format_return(pixel_values, return_tensors)
         # transformers delegate path: rely on official processor behavior.
-        # transformers 위임 경로: 공식 processor 동작을 그대로 사용합니다.
         if self._delegate is None:
             raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
@@ -360,7 +358,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
         """
         Format pixel_values according to return_tensors.
-        return_tensors에 맞춰 pixel_values 반환 포맷을 맞춥니다.
         """
         if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
             return {"pixel_values": pixel_values}
@@ -370,6 +368,6 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
 # Register this processor for AutoImageProcessor resolution.
-# AutoImageProcessor 해석을 위해 이 processor를 등록합니다.
 if __name__ != "__main__":
     BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")

 # src/ds_proc.py
 # ============================================================
+# ImageProcessor (AutoImageProcessor integration)
+# ImageProcessor (AutoImageProcessor 연동)
 # ============================================================
 from typing import Any
 class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     """
     This processor performs image preprocessing and outputs {"pixel_values": ...}.
+    이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 반환함.
     Key requirements:
     핵심 요구사항:
     1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
+       save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 함.
     2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
+       런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됨.
     3) Runtime objects are rebuilt at init/load time based on backbone meta.
+       런타임 객체는 backbone meta에 따라 init/load 시점에 재구성.
     4) For reproducibility, use_fast must be explicitly persisted and honored on load.
+       재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 함.
     """
     # HF vision models conventionally expect "pixel_values" as the primary input key.
+    # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대.
     model_input_names = ["pixel_values"]
     def __init__(
         self,
         backbone_name_or_path: BackboneID,
+        is_training: bool = False, # timm 에서 data augmentation 용.
         use_fast: bool = False,
         **kwargs,
     ):
         # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
+        # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리.
         super().__init__(**kwargs)
         # Enforce whitelist via BACKBONE_META to keep behavior stable.
+        # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제. - fast fail
         if backbone_name_or_path not in BACKBONE_META:
             raise ValueError(
                 f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
             )
         # Serializable fields only: these should appear in preprocessor_config.json.
+        # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 함
         self.backbone_name_or_path = backbone_name_or_path
         self.is_training = bool(is_training)
         # Reproducibility switch for transformers processors.
+        # transformers processor의 fast/slow 선택을 재현 가능하게 고정.
         self.use_fast = bool(use_fast)
         # Runtime-only fields: must never be serialized.
+        # 런타임 전용 필드: 절대 직렬화되면 안 됨.
         self._meta = None
+        self._delegate              = None
+        self._timm_transform        = None
         self._torchvision_transform = None
         # Build runtime objects according to backbone type.
+        # backbone type에 따라 런타임 객체를 구성.
         self._build_runtime()
     # ============================================================
     def _build_runtime(self):
         """
         Build runtime delegate/transform based on BACKBONE_META["type"].
+        BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성.
         """
         meta = BACKBONE_META[self.backbone_name_or_path]
         self._meta = meta
         # Always reset runtime fields before rebuilding.
+        # 재구성 전 런타임 필드는 항상 초기화.
         self._delegate = None
         self._timm_transform = None
         self._torchvision_transform = None
         if t == "timm_densenet":
             # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
+            # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용.
             self._timm_transform = self._build_timm_transform(
                 backbone_id=self.backbone_name_or_path,
                 is_training=self.is_training,
         if t == "torchvision_densenet":
             # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
+            # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요.
             self._torchvision_transform = self._build_torchvision_densenet_transform(
                 is_training=self.is_training
             )
             return
         # Default: transformers backbone delegates to its official AutoImageProcessor.
+        # 기본: transformers 백본은 공식 AutoImageProcessor에 위임.
         #
         # IMPORTANT:
+        # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달.
         self._delegate = AutoImageProcessor.from_pretrained(
             self.backbone_name_or_path,
             use_fast=self.use_fast,
     def _build_timm_transform(*, backbone_id: str, is_training: bool):
         """
         Create timm transform without storing non-serializable objects in config.
+        비직렬화 객체를 config에 저장하지 않고 timm transform을 생성.
         """
         try:
             import timm
             ) from e
         # We only need model metadata to resolve data config, so pretrained=False is preferred.
+        # data config 추출만 필요하므로 pretrained=False를 우선 사용.
         m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
         dc = resolve_model_data_config(m)
         # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
+        # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환.
+        tfm = create_transform(**dc, is_training=is_training) # is_training :Data Aug.
         return tfm
     @staticmethod
     def _build_torchvision_densenet_transform(*, is_training: bool):
         """
         Build torchvision preprocessing for DenseNet-121 (224 pipeline).
+        DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성.
         """
         try:
             from torchvision import transforms
             ) from e
         # These are the standard ImageNet normalization stats used by torchvision weights.
+        # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계.
         mean = (0.485, 0.456, 0.406)
+        std  = (0.229, 0.224, 0.225)
         # Training pipeline typically uses RandomResizedCrop and horizontal flip.
+        # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용.
         if is_training:
             return transforms.Compose(
                 [
+                    # transforms.RandomResizedCrop(224),
+                    # transforms.RandomHorizontalFlip(p=0.5),
+                    transforms.Resize(224),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=mean, std=std),
                 ]
             )
         # Inference pipeline typically uses Resize(256) + CenterCrop(224).
+        # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용.
         return transforms.Compose(
             [
                 transforms.Resize(256),
+                # transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=mean, std=std),
             ]
     def to_dict(self) -> dict[str, Any]:
         """
         Return a JSON-serializable dict for preprocessor_config.json.
+        preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환.
         Important: do not leak runtime objects into the serialized dict.
+        중요: 런타임 객체가 직렬화 dict에 섞이면 안 됨.
         """
         # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
         # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
         d = super().to_dict()
         # Force minimal stable fields for long-term compatibility.
+        # 장기 호환을 위해 최소 안정 필드를 강제로 지정.
+        d["image_processor_type"]  = self.__class__.__name__
         d["backbone_name_or_path"] = self.backbone_name_or_path
         d["is_training"] = self.is_training
+        d["use_fast"]    = self.use_fast
         # Remove any runtime-only fields defensively.
+        # 런타임 전용 필드는 보수적으로 제거.
         for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
             d.pop(key, None)
     def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
         """
         Standard load path used by BaseImageProcessor / AutoImageProcessor.
+        BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로임.
         """
         backbone = image_processor_dict.get("backbone_name_or_path", None)
         if backbone is None:
             raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
         is_training = bool(image_processor_dict.get("is_training", False))
+        use_fast    = bool(image_processor_dict.get("use_fast", False))
         return cls(
             backbone_name_or_path=backbone,
     def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
         """
         Fallback path if AutoImageProcessor calls class.from_pretrained directly.
+        AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 메서드.
         Strategy:
         전략:
         - Read config.json via AutoConfig and recover backbone_name_or_path.
+          AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구.
         """
         # is_training is runtime-only and should default to False for inference/serving.
+        # is_training은 런타임 전용이며 추론/서빙 기본값은 False 임.
         #
         # IMPORTANT:
+        # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영.
         use_fast = bool(kwargs.pop("use_fast", False))
         kwargs.pop("trust_remote_code", None)
     @staticmethod
     def _ensure_list(images: Any) -> list[Any]:
         # Normalize scalar image input to a list for uniform processing.
+        # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용.
         if isinstance(images, (list, tuple)):
             return list(images)
         return [images]
     @staticmethod
     def _to_pil_rgb(x: Any):
         # Convert common image inputs into PIL RGB images.
+        # 일반적인 입력을 PIL RGB 이미지로 변환.
         from PIL import Image as PILImage
         if isinstance(x, PILImage.Image):
     ) -> dict[str, Any]:
         """
         Convert images into {"pixel_values": Tensor/ndarray}.
+        이미지를 {"pixel_values": Tensor/ndarray}로 변환.
         """
         images = self._ensure_list(images)
         # Rebuild runtime if needed (e.g., right after deserialization).
+        # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성.
         if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
             self._build_runtime()
         # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
+        # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
         if self._timm_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
             return self._format_return(pixel_values, return_tensors)
         # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
+        # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
         if self._torchvision_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
             return self._format_return(pixel_values, return_tensors)
         # transformers delegate path: rely on official processor behavior.
+        # transformers 위임 경로: 공식 processor 동작을 그대로 사용.
         if self._delegate is None:
             raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
     def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
         """
         Format pixel_values according to return_tensors.
+        return_tensors에 맞춰 pixel_values 반환 포맷을 변환.
         """
         if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
             return {"pixel_values": pixel_values}
 # Register this processor for AutoImageProcessor resolution.
+# AutoImageProcessor 해석을 위해 이 processor를 등록.
 if __name__ != "__main__":
     BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")

models/google__efficientnet-b0/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0069437f45965e8c4ac06831d6e34c4ba7603bf246b37eef446730ec04bf4e5d
 size 17558436

 version https://git-lfs.github.com/spec/v1
+oid sha256:865ceddfa7c4eb1c24844a2bea075ff25ee577fabb2886069b5478cd27bf6cac
 size 17558436

models/google__vit-base-patch16-224/config.json CHANGED Viewed

@@ -24,7 +24,7 @@
   "num_labels": 3,
   "transformers_version": "5.1.0",
   "ds_provenance": {
-    "created_at": "20260210_170738",
     "repo_id": "dsaint31/bb_mlp_224",
     "subdir": "models/google__vit-base-patch16-224",
     "wrapper_class": "BackboneWithMLPHeadForImageClassification",

   "num_labels": 3,
   "transformers_version": "5.1.0",
   "ds_provenance": {
+    "created_at": "20260212_202546",
     "repo_id": "dsaint31/bb_mlp_224",
     "subdir": "models/google__vit-base-patch16-224",
     "wrapper_class": "BackboneWithMLPHeadForImageClassification",

models/google__vit-base-patch16-224/ds_model.py CHANGED Viewed

@@ -94,14 +94,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     def __init__(self, config: BackboneMLPHeadConfig):
         # PreTrainedModel expects a config object and stores it internally.
-        # PreTrainedModel은 config 객체를 받아 내부에 저장하는 전제를 가집니다.
         super().__init__(config)
         # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
-        # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않습니다.
         #
         # Note: Transformers may create configs with no args, but models are conventionally created with configs.
-        # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성됩니다.
         if config.backbone_name_or_path is None:
             raise ValueError(
                 "config.backbone_name_or_path is None. "
@@ -109,10 +109,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             )
         # Fail-fast: training/inference requires a positive number of labels.
-        # fail-fast: 학습/추론은 num_labels가 양수여야 합니다.
         #
         # Config may exist in a minimal form for internal serialization paths, but the model should not.
-        # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 그러면 안 됩니다.
         if int(getattr(config, "num_labels", 0)) <= 0:
             raise ValueError(
                 f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
@@ -120,17 +120,17 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             )
         # Meta is a single source of truth for extraction and fine-tuning rules.
-        # meta는 feature 추출 및 미세조정 규칙의 단일 기준입니다.
-                # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
         # Prefer config.backbone_meta to keep Hub runtime self-contained.
         self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
         # Backbone skeleton is always created without pretrained weights.
-        # backbone skeleton은 항상 pretrained weight 없이 생성합니다.
         self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
         # Head shape is driven by meta feat_dim and config.num_labels.
-        # head shape은 meta의 feat_dim과 config.num_labels로 결정됩니다.
         self.classifier = MLPHead(
             in_dim=int(self._meta["feat_dim"]),
             num_labels=int(config.num_labels),
@@ -139,16 +139,20 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         )
         # HF initialization hook, but we override init_weights to initialize head-only.
-        # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화합니다.
         self.post_init()
     def init_weights(self):
         """
         Initialize only the head to avoid touching the backbone skeleton.
-        backbone skeleton을 건드리지 않기 위해 head만 초기화합니다.
         HF's default init may traverse the entire module tree, which is undesirable here.
-        HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서는 부적절합니다.
         """
         if getattr(self, "classifier", None) is not None:
             self.classifier.apply(self._init_weights)
@@ -160,7 +164,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     # ----------------------------
     def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
         # Meta decides which loader path to use.
-        # meta가 어떤 로더 경로를 사용할지 결정합니다.
         meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
         if meta is None:
             raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
@@ -174,14 +178,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             return self._build_torchvision_densenet_skeleton(backbone_id)
         # For transformers backbones: build a random-weight skeleton from config only.
-        # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성합니다.
         bb_cfg = AutoConfig.from_pretrained(backbone_id)
         return AutoModel.from_config(bb_cfg)
     @staticmethod
     def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
         # timm is an optional dependency and should be imported lazily.
-        # timm은 옵션 의존성이므로 지연 import 합니다.
         try:
             import timm
         except Exception as e:
@@ -190,7 +194,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             ) from e
         # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
-        # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0)합니다.
         return timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=False,
@@ -200,12 +204,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @staticmethod
     def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
-        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Build structure only (weights=None) to avoid implicit pretrained loading.
-        # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None)합니다.
         m = tv_models.densenet121(weights=None)
         return m
@@ -222,10 +226,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     ):
         """
         Fresh-start only: inject pretrained backbone weights into the skeleton.
-        fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입합니다.
         Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
-        from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 절대 호출하면 안 됩니다.
         """
         bb = self.config.backbone_name_or_path
         meta = self._meta
@@ -240,7 +244,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             return
         # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
-        # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사합니다.
         ref = AutoModel.from_pretrained(
             bb,
             low_cpu_mem_usage=low_cpu_mem_usage,
@@ -248,18 +252,18 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         )
         # strict=False is used to tolerate harmless key differences across minor versions.
-        # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용합니다.
         self.backbone.load_state_dict(ref.state_dict(), strict=False)
         del ref
     @torch.no_grad()
     def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
         # timm must be present for timm backbones.
-        # timm 백본에는 timm 설치가 필요합니다.
         import timm
         # Create a pretrained reference model and copy its weights strictly.
-        # pretrained reference 모델을 만들고 가중치를 strict하게 복사합니다.
         ref = timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=True,
@@ -272,12 +276,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @torch.no_grad()
     def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
-        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Use torchvision's default pretrained weights for densenet121.
-        # torchvision의 densenet121 기본 pretrained weights를 사용합니다.
         ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
         self.backbone.load_state_dict(ref.state_dict(), strict=True)
@@ -290,7 +294,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @staticmethod
     def _pool_or_gap(outputs) -> torch.Tensor:
         # Some transformers vision CNNs provide pooler_output explicitly.
-        # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공합니다.
         if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
             x = outputs.pooler_output
             if x.dim() == 2:
@@ -300,7 +304,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
         # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
-        # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용합니다.
         x = outputs.last_hidden_state
         if x.dim() == 4:
             return x.mean(dim=(2, 3))
@@ -312,29 +316,29 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
         # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
-        # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적으로 유지되어야 합니다.
         rule = self._meta["feat_rule"]
         if rule == "cls":
             # ViT-style: use CLS token embedding from last_hidden_state.
-            # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용합니다.
             return outputs.last_hidden_state[:, 0, :]
         if rule == "pool_or_mean":
             # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
-            # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용합니다.
             if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
                 return outputs.pooler_output
             return outputs.last_hidden_state.mean(dim=1)
         if rule == "pool_or_gap":
             # CNN-style: use pooler_output if present, else GAP over spatial dims.
-            # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용합니다.
             return self._pool_or_gap(outputs)
         if rule == "timm_gap":
             # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
-            # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 만듭니다.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
             if outputs.dim() != 4:
@@ -343,7 +347,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         if rule == "torchvision_densenet_gap":
             # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
-            # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요합니다.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
             if outputs.dim() != 4:
@@ -362,7 +366,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         **kwargs,
     ):
         # Type decides the backbone forward path and output format.
-        # type이 backbone forward 경로 및 출력 포맷을 결정합니다.
         t = self._meta["type"]
         if t == "timm_densenet":
@@ -394,7 +398,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         else:
             # Transformers vision models are called with pixel_values and return ModelOutput.
-            # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환합니다.
             outputs = self.backbone(
                 pixel_values=pixel_values,
                 output_attentions=output_attentions,
@@ -407,13 +411,13 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             attentions = getattr(outputs, "attentions", None)
         # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
-        # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환합니다.
         logits = self.classifier(feats)
         loss = None
         if labels is not None:
             # Cross entropy expects labels as class indices in [0, num_labels).
-            # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대합니다.
             loss = F.cross_entropy(logits, labels)
         if not return_dict:
@@ -434,14 +438,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
 # ============================================================
 def _set_requires_grad(module: nn.Module, flag: bool):
     # Toggle requires_grad for all parameters in a module.
-    # 모듈의 모든 파라미터에 대해 requires_grad를 토글합니다.
     for p in module.parameters():
         p.requires_grad = flag
 def set_bn_eval(module: nn.Module):
     # Put BatchNorm layers into eval mode to freeze running stats.
-    # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정합니다.
     for m in module.modules():
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
             m.eval()
@@ -449,7 +453,7 @@ def set_bn_eval(module: nn.Module):
 def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
     # Stage1: freeze backbone and train only the head.
-    # stage1: backbone을 freeze하고 head만 학습합니다.
     _set_requires_grad(model.backbone, False)
     _set_requires_grad(model.classifier, True)
@@ -460,7 +464,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
 def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
     # Stage2: train mode, optionally keeping BN layers in eval for stability.
-    # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있습니다.
     model.train()
     meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
     if keep_bn_eval and meta.get("has_bn", False):
@@ -469,7 +473,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
 def trainable_summary(model: nn.Module):
     # Print a compact summary of trainable parameters.
-    # 학습 가능 파라미터 요약을 간단히 출력합니다.
     total = sum(p.numel() for p in model.parameters())
     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
     ratio = trainable / total if total > 0 else 0.0
@@ -483,7 +487,7 @@ def unfreeze_last_stage(
     keep_bn_eval: bool = True,
 ):
     # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
-    # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 ���현합니다.
     freeze_backbone(model, freeze_bn=keep_bn_eval)
     n = int(last_n)
@@ -498,7 +502,7 @@ def unfreeze_last_stage(
     if bb_type == "vit":
         # ViT blocks live under backbone.encoder.layer in the transformers implementation.
-        # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 있습니다.
         blocks = list(model.backbone.encoder.layer)
         for blk in blocks[-n:]:
             _set_requires_grad(blk, True)
@@ -506,7 +510,7 @@ def unfreeze_last_stage(
     if bb_type == "swin":
         # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
-        # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze 합니다.
         stages = list(model.backbone.encoder.layers)
         blocks: List[nn.Module] = []
         for st in stages:
@@ -517,7 +521,7 @@ def unfreeze_last_stage(
     if bb_type == "resnet":
         # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
-        # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze 합니다.
         bb = model.backbone
         for name in ("layer1", "layer2", "layer3", "layer4"):
             if not hasattr(bb, name):
@@ -538,7 +542,7 @@ def unfreeze_last_stage(
     if bb_type == "efficientnet":
         # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
-        # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze 합니다.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected EfficientNet structure: missing features")
@@ -556,7 +560,7 @@ def unfreeze_last_stage(
     if bb_type in ("timm_densenet", "torchvision_densenet"):
         # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
-        # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze 합니다.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected DenseNet: missing features")
@@ -575,7 +579,7 @@ def unfreeze_last_stage(
         def _denselayers(db: nn.Module) -> List[nn.Module]:
             # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
-            # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환합니다.
             return list(db.children())
         blocks: List[nn.Module] = []
@@ -600,5 +604,5 @@ def unfreeze_last_stage(
 # register
 # -------------------------
 # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
-# from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록합니다.
 BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")

     def __init__(self, config: BackboneMLPHeadConfig):
         # PreTrainedModel expects a config object and stores it internally.
+        # PreTrainedModel은 config 객체를 받아 내부에 저장함.
         super().__init__(config)
         # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
+        # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않음 - fast fail.
         #
         # Note: Transformers may create configs with no args, but models are conventionally created with configs.
+        # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성.
         if config.backbone_name_or_path is None:
             raise ValueError(
                 "config.backbone_name_or_path is None. "
             )
         # Fail-fast: training/inference requires a positive number of labels.
+        # fail-fast: 학습/추론은 num_labels가 양수여야 함.
         #
         # Config may exist in a minimal form for internal serialization paths, but the model should not.
+        # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 해당 없음.
         if int(getattr(config, "num_labels", 0)) <= 0:
             raise ValueError(
                 f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
             )
         # Meta is a single source of truth for extraction and fine-tuning rules.
+        # meta는 feature 추출 및 미세조정 규칙의 단일 기준.
+        # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
         # Prefer config.backbone_meta to keep Hub runtime self-contained.
         self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
         # Backbone skeleton is always created without pretrained weights.
+        # backbone skeleton은 항상 pretrained weight 없이 생성.
         self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
         # Head shape is driven by meta feat_dim and config.num_labels.
+        # head shape은 meta의 feat_dim과 config.num_labels로 결정.
         self.classifier = MLPHead(
             in_dim=int(self._meta["feat_dim"]),
             num_labels=int(config.num_labels),
         )
         # HF initialization hook, but we override init_weights to initialize head-only.
+        # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화하도록 변경.
         self.post_init()
     def init_weights(self):
         """
         Initialize only the head to avoid touching the backbone skeleton.
+        backbone skeleton을 건드리지 않기 위해 head만 초기화.
         HF's default init may traverse the entire module tree, which is undesirable here.
+        HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서 그대로 사용하기 부적절.
+        초기 설계에서 __init__ 내부에서 backbone의 가중치 로드를 수행함(편리를 위해).
+        이 경우, HF의 post_init()으로 인해 해당 로드가 취소되는 경우가 존재(timm, torchvision 등의 백본).
+        때문에 이를 오버라이드 하여 classifier만 초기화 하도록 변경함.
         """
         if getattr(self, "classifier", None) is not None:
             self.classifier.apply(self._init_weights)
     # ----------------------------
     def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
         # Meta decides which loader path to use.
+        # meta가 어떤 로더 경로를 사용할지 결정.
         meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
         if meta is None:
             raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
             return self._build_torchvision_densenet_skeleton(backbone_id)
         # For transformers backbones: build a random-weight skeleton from config only.
+        # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성.
         bb_cfg = AutoConfig.from_pretrained(backbone_id)
         return AutoModel.from_config(bb_cfg)
     @staticmethod
     def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
         # timm is an optional dependency and should be imported lazily.
+        # timm은 옵션 의존성이므로 지연 import 수행.
         try:
             import timm
         except Exception as e:
             ) from e
         # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
+        # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0).
         return timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=False,
     @staticmethod
     def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
+        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Build structure only (weights=None) to avoid implicit pretrained loading.
+        # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None).
         m = tv_models.densenet121(weights=None)
         return m
     ):
         """
         Fresh-start only: inject pretrained backbone weights into the skeleton.
+        fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입.
         Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
+        from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 주의할 것.
         """
         bb = self.config.backbone_name_or_path
         meta = self._meta
             return
         # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
+        # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사.
         ref = AutoModel.from_pretrained(
             bb,
             low_cpu_mem_usage=low_cpu_mem_usage,
         )
         # strict=False is used to tolerate harmless key differences across minor versions.
+        # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용.
         self.backbone.load_state_dict(ref.state_dict(), strict=False)
         del ref
     @torch.no_grad()
     def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
         # timm must be present for timm backbones.
+        # timm 백본에��� timm 설치가 필요.
         import timm
         # Create a pretrained reference model and copy its weights strictly.
+        # pretrained reference 모델을 만들고 가중치를 strict하게 복사.
         ref = timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=True,
     @torch.no_grad()
     def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
+        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 지원.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Use torchvision's default pretrained weights for densenet121.
+        # torchvision의 densenet121 기본 pretrained weights를 사용.
         ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
         self.backbone.load_state_dict(ref.state_dict(), strict=True)
     @staticmethod
     def _pool_or_gap(outputs) -> torch.Tensor:
         # Some transformers vision CNNs provide pooler_output explicitly.
+        # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공.
         if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
             x = outputs.pooler_output
             if x.dim() == 2:
             raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
         # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
+        # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용.
         x = outputs.last_hidden_state
         if x.dim() == 4:
             return x.mean(dim=(2, 3))
     def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
         # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
+        # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적 동작을 위해 제한된 모델만 사용.
         rule = self._meta["feat_rule"]
         if rule == "cls":
             # ViT-style: use CLS token embedding from last_hidden_state.
+            # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용.
             return outputs.last_hidden_state[:, 0, :]
         if rule == "pool_or_mean":
             # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
+            # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용.
             if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
                 return outputs.pooler_output
             return outputs.last_hidden_state.mean(dim=1)
         if rule == "pool_or_gap":
             # CNN-style: use pooler_output if present, else GAP over spatial dims.
+            # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용.
             return self._pool_or_gap(outputs)
         if rule == "timm_gap":
             # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
+            # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 변환.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
             if outputs.dim() != 4:
         if rule == "torchvision_densenet_gap":
             # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
+            # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
             if outputs.dim() != 4:
         **kwargs,
     ):
         # Type decides the backbone forward path and output format.
+        # type이 backbone forward 경로 및 출력 포맷을 결정.
         t = self._meta["type"]
         if t == "timm_densenet":
         else:
             # Transformers vision models are called with pixel_values and return ModelOutput.
+            # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환.
             outputs = self.backbone(
                 pixel_values=pixel_values,
                 output_attentions=output_attentions,
             attentions = getattr(outputs, "attentions", None)
         # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
+        # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환.
         logits = self.classifier(feats)
         loss = None
         if labels is not None:
             # Cross entropy expects labels as class indices in [0, num_labels).
+            # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대함.
             loss = F.cross_entropy(logits, labels)
         if not return_dict:
 # ============================================================
 def _set_requires_grad(module: nn.Module, flag: bool):
     # Toggle requires_grad for all parameters in a module.
+    # 모듈의 모든 파라미터에 대해 requires_grad를 토글.
     for p in module.parameters():
         p.requires_grad = flag
 def set_bn_eval(module: nn.Module):
     # Put BatchNorm layers into eval mode to freeze running stats.
+    # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정.
     for m in module.modules():
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
             m.eval()
 def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
     # Stage1: freeze backbone and train only the head.
+    # stage1: backbone을 freeze하고 head만 학습.
     _set_requires_grad(model.backbone, False)
     _set_requires_grad(model.classifier, True)
 def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
     # Stage2: train mode, optionally keeping BN layers in eval for stability.
+    # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있음. (buffer 등을 유지하기 위해)
     model.train()
     meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
     if keep_bn_eval and meta.get("has_bn", False):
 def trainable_summary(model: nn.Module):
     # Print a compact summary of trainable parameters.
+    # 학습 가능 파라미터 요약을 간단히 출력.
     total = sum(p.numel() for p in model.parameters())
     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
     ratio = trainable / total if total > 0 else 0.0
     keep_bn_eval: bool = True,
 ):
     # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
+    # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 구현.
     freeze_backbone(model, freeze_bn=keep_bn_eval)
     n = int(last_n)
     if bb_type == "vit":
         # ViT blocks live under backbone.encoder.layer in the transformers implementation.
+        # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 존재함.
         blocks = list(model.backbone.encoder.layer)
         for blk in blocks[-n:]:
             _set_requires_grad(blk, True)
     if bb_type == "swin":
         # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
+        # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze.
         stages = list(model.backbone.encoder.layers)
         blocks: List[nn.Module] = []
         for st in stages:
     if bb_type == "resnet":
         # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
+        # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze.
         bb = model.backbone
         for name in ("layer1", "layer2", "layer3", "layer4"):
             if not hasattr(bb, name):
     if bb_type == "efficientnet":
         # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
+        # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected EfficientNet structure: missing features")
     if bb_type in ("timm_densenet", "torchvision_densenet"):
         # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
+        # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected DenseNet: missing features")
         def _denselayers(db: nn.Module) -> List[nn.Module]:
             # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
+            # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환.
             return list(db.children())
         blocks: List[nn.Module] = []
 # register
 # -------------------------
 # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
+# from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록.
 BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")

models/google__vit-base-patch16-224/ds_proc.py CHANGED Viewed

@@ -4,8 +4,8 @@
 # src/ds_proc.py
 # ============================================================
-# (4) ImageProcessor (AutoImageProcessor integration)
-# (4) ImageProcessor (AutoImageProcessor 연동)
 # ============================================================
 from typing import Any
@@ -27,41 +27,38 @@ except ImportError:
 class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     """
     This processor performs image preprocessing and outputs {"pixel_values": ...}.
-    이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 출력합니다.
     Key requirements:
     핵심 요구사항:
     1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
-       save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 합니다.
     2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
-       런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됩니다.
     3) Runtime objects are rebuilt at init/load time based on backbone meta.
-       런타임 객체는 backbone meta에 따라 init/load 시점에 재구성됩니다.
     4) For reproducibility, use_fast must be explicitly persisted and honored on load.
-       재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 합니다.
     """
     # HF vision models conventionally expect "pixel_values" as the primary input key.
-    # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대합니다.
     model_input_names = ["pixel_values"]
     def __init__(
         self,
         backbone_name_or_path: BackboneID,
-        is_training: bool = False,
         use_fast: bool = False,
         **kwargs,
     ):
         # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
-        # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리합니다.
         super().__init__(**kwargs)
         # Enforce whitelist via BACKBONE_META to keep behavior stable.
-        # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제합니다.
         if backbone_name_or_path not in BACKBONE_META:
             raise ValueError(
                 f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
@@ -69,23 +66,23 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             )
         # Serializable fields only: these should appear in preprocessor_config.json.
-        # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 합니다.
         self.backbone_name_or_path = backbone_name_or_path
         self.is_training = bool(is_training)
         # Reproducibility switch for transformers processors.
-        # transformers processor의 fast/slow 선택을 재현 가능하게 고정합니다.
         self.use_fast = bool(use_fast)
         # Runtime-only fields: must never be serialized.
-        # 런타임 전용 필드: 절대 직렬화되면 안 됩니다.
         self._meta = None
-        self._delegate = None
-        self._timm_transform = None
         self._torchvision_transform = None
         # Build runtime objects according to backbone type.
-        # backbone type에 따라 런타임 객체를 구성합니다.
         self._build_runtime()
     # ============================================================
@@ -95,13 +92,13 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _build_runtime(self):
         """
         Build runtime delegate/transform based on BACKBONE_META["type"].
-        BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성합니다.
         """
         meta = BACKBONE_META[self.backbone_name_or_path]
         self._meta = meta
         # Always reset runtime fields before rebuilding.
-        # 재구성 전 런타임 필드는 항상 초기화합니다.
         self._delegate = None
         self._timm_transform = None
         self._torchvision_transform = None
@@ -110,7 +107,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
         if t == "timm_densenet":
             # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
-            # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용합니다.
             self._timm_transform = self._build_timm_transform(
                 backbone_id=self.backbone_name_or_path,
                 is_training=self.is_training,
@@ -119,17 +116,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
         if t == "torchvision_densenet":
             # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
-            # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요합니다.
             self._torchvision_transform = self._build_torchvision_densenet_transform(
                 is_training=self.is_training
             )
             return
         # Default: transformers backbone delegates to its official AutoImageProcessor.
-        # 기본: transformers 백본은 공식 AutoImageProcessor에 위임합니다.
         #
         # IMPORTANT:
-        # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달합니다.
         self._delegate = AutoImageProcessor.from_pretrained(
             self.backbone_name_or_path,
             use_fast=self.use_fast,
@@ -140,7 +137,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _build_timm_transform(*, backbone_id: str, is_training: bool):
         """
         Create timm transform without storing non-serializable objects in config.
-        비직렬화 객체를 config에 저장하지 않고 timm transform을 생성합니다.
         """
         try:
             import timm
@@ -151,20 +148,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             ) from e
         # We only need model metadata to resolve data config, so pretrained=False is preferred.
-        # data config 추출만 필요하므로 pretrained=False를 우선 사용합니다.
         m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
         dc = resolve_model_data_config(m)
         # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
-        # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환합니다.
-        tfm = create_transform(**dc, is_training=is_training)
         return tfm
     @staticmethod
     def _build_torchvision_densenet_transform(*, is_training: bool):
         """
         Build torchvision preprocessing for DenseNet-121 (224 pipeline).
-        DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성합니다.
         """
         try:
             from torchvision import transforms
@@ -174,28 +171,29 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             ) from e
         # These are the standard ImageNet normalization stats used by torchvision weights.
-        # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계입니다.
         mean = (0.485, 0.456, 0.406)
-        std = (0.229, 0.224, 0.225)
         # Training pipeline typically uses RandomResizedCrop and horizontal flip.
-        # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용합니다.
         if is_training:
             return transforms.Compose(
                 [
-                    transforms.RandomResizedCrop(224),
-                    transforms.RandomHorizontalFlip(p=0.5),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=mean, std=std),
                 ]
             )
         # Inference pipeline typically uses Resize(256) + CenterCrop(224).
-        # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용합니다.
         return transforms.Compose(
             [
                 transforms.Resize(256),
-                transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=mean, std=std),
             ]
@@ -208,24 +206,24 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def to_dict(self) -> dict[str, Any]:
         """
         Return a JSON-serializable dict for preprocessor_config.json.
-        preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환합니다.
         Important: do not leak runtime objects into the serialized dict.
-        중요: 런타임 객체가 직렬화 dict에 섞이면 안 됩니다.
         """
         # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
         # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
         d = super().to_dict()
         # Force minimal stable fields for long-term compatibility.
-        # 장기 호환을 위해 최소 안정 필드를 강제합니다.
-        d["image_processor_type"] = self.__class__.__name__
         d["backbone_name_or_path"] = self.backbone_name_or_path
         d["is_training"] = self.is_training
-        d["use_fast"] = self.use_fast
         # Remove any runtime-only fields defensively.
-        # 런타임 전용 필드는 보수적으로 제거합니다.
         for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
             d.pop(key, None)
@@ -235,14 +233,14 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
         """
         Standard load path used by BaseImageProcessor / AutoImageProcessor.
-        BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로입니다.
         """
         backbone = image_processor_dict.get("backbone_name_or_path", None)
         if backbone is None:
             raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
         is_training = bool(image_processor_dict.get("is_training", False))
-        use_fast = bool(image_processor_dict.get("use_fast", False))
         return cls(
             backbone_name_or_path=backbone,
@@ -255,20 +253,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
         """
         Fallback path if AutoImageProcessor calls class.from_pretrained directly.
-        AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 경로입니다.
         Strategy:
         전략:
         - Read config.json via AutoConfig and recover backbone_name_or_path.
-          AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구합니다.
         """
         # is_training is runtime-only and should default to False for inference/serving.
-        # is_training은 런타임 전용이며 추론/서빙 기본값은 False가 맞습니다.
         #
         # IMPORTANT:
-        # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영합니다.
         use_fast = bool(kwargs.pop("use_fast", False))
         kwargs.pop("trust_remote_code", None)
@@ -289,7 +287,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     @staticmethod
     def _ensure_list(images: Any) -> list[Any]:
         # Normalize scalar image input to a list for uniform processing.
-        # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용합니다.
         if isinstance(images, (list, tuple)):
             return list(images)
         return [images]
@@ -297,7 +295,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     @staticmethod
     def _to_pil_rgb(x: Any):
         # Convert common image inputs into PIL RGB images.
-        # 일반적인 입력을 PIL RGB 이미지로 변환합니다.
         from PIL import Image as PILImage
         if isinstance(x, PILImage.Image):
@@ -314,17 +312,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     ) -> dict[str, Any]:
         """
         Convert images into {"pixel_values": Tensor/ndarray}.
-        이미지를 {"pixel_values": Tensor/ndarray}로 변환합니다.
         """
         images = self._ensure_list(images)
         # Rebuild runtime if needed (e.g., right after deserialization).
-        # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성합니다.
         if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
             self._build_runtime()
         # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
-        # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
         if self._timm_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
@@ -337,7 +335,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             return self._format_return(pixel_values, return_tensors)
         # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
-        # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
         if self._torchvision_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
@@ -350,7 +348,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             return self._format_return(pixel_values, return_tensors)
         # transformers delegate path: rely on official processor behavior.
-        # transformers 위임 경로: 공식 processor 동작을 그대로 사용합니다.
         if self._delegate is None:
             raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
@@ -360,7 +358,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
         """
         Format pixel_values according to return_tensors.
-        return_tensors에 맞춰 pixel_values 반환 포맷을 맞춥니다.
         """
         if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
             return {"pixel_values": pixel_values}
@@ -370,6 +368,6 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
 # Register this processor for AutoImageProcessor resolution.
-# AutoImageProcessor 해석을 위해 이 processor를 등록합니다.
 if __name__ != "__main__":
     BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")

 # src/ds_proc.py
 # ============================================================
+# ImageProcessor (AutoImageProcessor integration)
+# ImageProcessor (AutoImageProcessor 연동)
 # ============================================================
 from typing import Any
 class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     """
     This processor performs image preprocessing and outputs {"pixel_values": ...}.
+    이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 반환함.
     Key requirements:
     핵심 요구사항:
     1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
+       save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 함.
     2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
+       런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됨.
     3) Runtime objects are rebuilt at init/load time based on backbone meta.
+       런타임 객체는 backbone meta에 따라 init/load 시점에 재구성.
     4) For reproducibility, use_fast must be explicitly persisted and honored on load.
+       재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 함.
     """
     # HF vision models conventionally expect "pixel_values" as the primary input key.
+    # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대.
     model_input_names = ["pixel_values"]
     def __init__(
         self,
         backbone_name_or_path: BackboneID,
+        is_training: bool = False, # timm 에서 data augmentation 용.
         use_fast: bool = False,
         **kwargs,
     ):
         # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
+        # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리.
         super().__init__(**kwargs)
         # Enforce whitelist via BACKBONE_META to keep behavior stable.
+        # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제. - fast fail
         if backbone_name_or_path not in BACKBONE_META:
             raise ValueError(
                 f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
             )
         # Serializable fields only: these should appear in preprocessor_config.json.
+        # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 함
         self.backbone_name_or_path = backbone_name_or_path
         self.is_training = bool(is_training)
         # Reproducibility switch for transformers processors.
+        # transformers processor의 fast/slow 선택을 재현 가능하게 고정.
         self.use_fast = bool(use_fast)
         # Runtime-only fields: must never be serialized.
+        # 런타임 전용 필드: 절대 직렬화되면 안 됨.
         self._meta = None
+        self._delegate              = None
+        self._timm_transform        = None
         self._torchvision_transform = None
         # Build runtime objects according to backbone type.
+        # backbone type에 따라 런타임 객체를 구성.
         self._build_runtime()
     # ============================================================
     def _build_runtime(self):
         """
         Build runtime delegate/transform based on BACKBONE_META["type"].
+        BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성.
         """
         meta = BACKBONE_META[self.backbone_name_or_path]
         self._meta = meta
         # Always reset runtime fields before rebuilding.
+        # 재구성 전 런타임 필드는 항상 초기화.
         self._delegate = None
         self._timm_transform = None
         self._torchvision_transform = None
         if t == "timm_densenet":
             # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
+            # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용.
             self._timm_transform = self._build_timm_transform(
                 backbone_id=self.backbone_name_or_path,
                 is_training=self.is_training,
         if t == "torchvision_densenet":
             # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
+            # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요.
             self._torchvision_transform = self._build_torchvision_densenet_transform(
                 is_training=self.is_training
             )
             return
         # Default: transformers backbone delegates to its official AutoImageProcessor.
+        # 기본: transformers 백본은 공식 AutoImageProcessor에 위임.
         #
         # IMPORTANT:
+        # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달.
         self._delegate = AutoImageProcessor.from_pretrained(
             self.backbone_name_or_path,
             use_fast=self.use_fast,
     def _build_timm_transform(*, backbone_id: str, is_training: bool):
         """
         Create timm transform without storing non-serializable objects in config.
+        비직렬화 객체를 config에 저장하지 않고 timm transform을 생성.
         """
         try:
             import timm
             ) from e
         # We only need model metadata to resolve data config, so pretrained=False is preferred.
+        # data config 추출만 필요하므로 pretrained=False를 우선 사용.
         m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
         dc = resolve_model_data_config(m)
         # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
+        # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환.
+        tfm = create_transform(**dc, is_training=is_training) # is_training :Data Aug.
         return tfm
     @staticmethod
     def _build_torchvision_densenet_transform(*, is_training: bool):
         """
         Build torchvision preprocessing for DenseNet-121 (224 pipeline).
+        DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성.
         """
         try:
             from torchvision import transforms
             ) from e
         # These are the standard ImageNet normalization stats used by torchvision weights.
+        # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계.
         mean = (0.485, 0.456, 0.406)
+        std  = (0.229, 0.224, 0.225)
         # Training pipeline typically uses RandomResizedCrop and horizontal flip.
+        # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용.
         if is_training:
             return transforms.Compose(
                 [
+                    # transforms.RandomResizedCrop(224),
+                    # transforms.RandomHorizontalFlip(p=0.5),
+                    transforms.Resize(224),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=mean, std=std),
                 ]
             )
         # Inference pipeline typically uses Resize(256) + CenterCrop(224).
+        # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용.
         return transforms.Compose(
             [
                 transforms.Resize(256),
+                # transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=mean, std=std),
             ]
     def to_dict(self) -> dict[str, Any]:
         """
         Return a JSON-serializable dict for preprocessor_config.json.
+        preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환.
         Important: do not leak runtime objects into the serialized dict.
+        중요: 런타임 객체가 직렬화 dict에 섞이면 안 됨.
         """
         # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
         # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
         d = super().to_dict()
         # Force minimal stable fields for long-term compatibility.
+        # 장기 호환을 위해 최소 안정 필드를 강제로 지정.
+        d["image_processor_type"]  = self.__class__.__name__
         d["backbone_name_or_path"] = self.backbone_name_or_path
         d["is_training"] = self.is_training
+        d["use_fast"]    = self.use_fast
         # Remove any runtime-only fields defensively.
+        # 런타임 전용 필드는 보수적으로 제거.
         for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
             d.pop(key, None)
     def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
         """
         Standard load path used by BaseImageProcessor / AutoImageProcessor.
+        BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로임.
         """
         backbone = image_processor_dict.get("backbone_name_or_path", None)
         if backbone is None:
             raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
         is_training = bool(image_processor_dict.get("is_training", False))
+        use_fast    = bool(image_processor_dict.get("use_fast", False))
         return cls(
             backbone_name_or_path=backbone,
     def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
         """
         Fallback path if AutoImageProcessor calls class.from_pretrained directly.
+        AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 메서드.
         Strategy:
         전략:
         - Read config.json via AutoConfig and recover backbone_name_or_path.
+          AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구.
         """
         # is_training is runtime-only and should default to False for inference/serving.
+        # is_training은 런타임 전용이며 추론/서빙 기본값은 False 임.
         #
         # IMPORTANT:
+        # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영.
         use_fast = bool(kwargs.pop("use_fast", False))
         kwargs.pop("trust_remote_code", None)
     @staticmethod
     def _ensure_list(images: Any) -> list[Any]:
         # Normalize scalar image input to a list for uniform processing.
+        # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용.
         if isinstance(images, (list, tuple)):
             return list(images)
         return [images]
     @staticmethod
     def _to_pil_rgb(x: Any):
         # Convert common image inputs into PIL RGB images.
+        # 일반적인 입력을 PIL RGB 이미지로 변환.
         from PIL import Image as PILImage
         if isinstance(x, PILImage.Image):
     ) -> dict[str, Any]:
         """
         Convert images into {"pixel_values": Tensor/ndarray}.
+        이미지를 {"pixel_values": Tensor/ndarray}로 변환.
         """
         images = self._ensure_list(images)
         # Rebuild runtime if needed (e.g., right after deserialization).
+        # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성.
         if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
             self._build_runtime()
         # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
+        # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
         if self._timm_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
             return self._format_return(pixel_values, return_tensors)
         # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
+        # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
         if self._torchvision_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
             return self._format_return(pixel_values, return_tensors)
         # transformers delegate path: rely on official processor behavior.
+        # transformers 위임 경로: 공식 processor 동작을 그대로 사용.
         if self._delegate is None:
             raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
     def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
         """
         Format pixel_values according to return_tensors.
+        return_tensors에 맞춰 pixel_values 반환 포맷을 변환.
         """
         if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
             return {"pixel_values": pixel_values}
 # Register this processor for AutoImageProcessor resolution.
+# AutoImageProcessor 해석을 위해 이 processor를 등록.
 if __name__ != "__main__":
     BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")

models/google__vit-base-patch16-224/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:911b24afbf6ed3a61687a5c67609c5a909dfd6f16d013bb1cad613aba5b3ebe4
 size 346372132

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ecc8942482f4acdd221a9490549b44ab8d697b3b170cacce759bfd0e215e5df
 size 346372132

models/microsoft__resnet-50/config.json CHANGED Viewed

@@ -24,7 +24,7 @@
   "num_labels": 3,
   "transformers_version": "5.1.0",
   "ds_provenance": {
-    "created_at": "20260210_170738",
     "repo_id": "dsaint31/bb_mlp_224",
     "subdir": "models/microsoft__resnet-50",
     "wrapper_class": "BackboneWithMLPHeadForImageClassification",

   "num_labels": 3,
   "transformers_version": "5.1.0",
   "ds_provenance": {
+    "created_at": "20260212_202546",
     "repo_id": "dsaint31/bb_mlp_224",
     "subdir": "models/microsoft__resnet-50",
     "wrapper_class": "BackboneWithMLPHeadForImageClassification",

models/microsoft__resnet-50/ds_model.py CHANGED Viewed

@@ -94,14 +94,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     def __init__(self, config: BackboneMLPHeadConfig):
         # PreTrainedModel expects a config object and stores it internally.
-        # PreTrainedModel은 config 객체를 받아 내부에 저장하는 전제를 가집니다.
         super().__init__(config)
         # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
-        # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않습니다.
         #
         # Note: Transformers may create configs with no args, but models are conventionally created with configs.
-        # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성됩니다.
         if config.backbone_name_or_path is None:
             raise ValueError(
                 "config.backbone_name_or_path is None. "
@@ -109,10 +109,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             )
         # Fail-fast: training/inference requires a positive number of labels.
-        # fail-fast: 학습/추론은 num_labels가 양수여야 합니다.
         #
         # Config may exist in a minimal form for internal serialization paths, but the model should not.
-        # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 그러면 안 됩니다.
         if int(getattr(config, "num_labels", 0)) <= 0:
             raise ValueError(
                 f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
@@ -120,17 +120,17 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             )
         # Meta is a single source of truth for extraction and fine-tuning rules.
-        # meta는 feature 추출 및 미세조정 규칙의 단일 기준입니다.
-                # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
         # Prefer config.backbone_meta to keep Hub runtime self-contained.
         self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
         # Backbone skeleton is always created without pretrained weights.
-        # backbone skeleton은 항상 pretrained weight 없이 생성합니다.
         self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
         # Head shape is driven by meta feat_dim and config.num_labels.
-        # head shape은 meta의 feat_dim과 config.num_labels로 결정됩니다.
         self.classifier = MLPHead(
             in_dim=int(self._meta["feat_dim"]),
             num_labels=int(config.num_labels),
@@ -139,16 +139,20 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         )
         # HF initialization hook, but we override init_weights to initialize head-only.
-        # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화합니다.
         self.post_init()
     def init_weights(self):
         """
         Initialize only the head to avoid touching the backbone skeleton.
-        backbone skeleton을 건드리지 않기 위해 head만 초기화합니다.
         HF's default init may traverse the entire module tree, which is undesirable here.
-        HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서는 부적절합니다.
         """
         if getattr(self, "classifier", None) is not None:
             self.classifier.apply(self._init_weights)
@@ -160,7 +164,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     # ----------------------------
     def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
         # Meta decides which loader path to use.
-        # meta가 어떤 로더 경로를 사용할지 결정합니다.
         meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
         if meta is None:
             raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
@@ -174,14 +178,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             return self._build_torchvision_densenet_skeleton(backbone_id)
         # For transformers backbones: build a random-weight skeleton from config only.
-        # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성합니다.
         bb_cfg = AutoConfig.from_pretrained(backbone_id)
         return AutoModel.from_config(bb_cfg)
     @staticmethod
     def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
         # timm is an optional dependency and should be imported lazily.
-        # timm은 옵션 의존성이므로 지연 import 합니다.
         try:
             import timm
         except Exception as e:
@@ -190,7 +194,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             ) from e
         # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
-        # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0)합니다.
         return timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=False,
@@ -200,12 +204,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @staticmethod
     def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
-        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Build structure only (weights=None) to avoid implicit pretrained loading.
-        # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None)합니다.
         m = tv_models.densenet121(weights=None)
         return m
@@ -222,10 +226,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     ):
         """
         Fresh-start only: inject pretrained backbone weights into the skeleton.
-        fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입합니다.
         Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
-        from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 절대 호출하면 안 됩니다.
         """
         bb = self.config.backbone_name_or_path
         meta = self._meta
@@ -240,7 +244,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             return
         # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
-        # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사합니다.
         ref = AutoModel.from_pretrained(
             bb,
             low_cpu_mem_usage=low_cpu_mem_usage,
@@ -248,18 +252,18 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         )
         # strict=False is used to tolerate harmless key differences across minor versions.
-        # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용합니다.
         self.backbone.load_state_dict(ref.state_dict(), strict=False)
         del ref
     @torch.no_grad()
     def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
         # timm must be present for timm backbones.
-        # timm 백본에는 timm 설치가 필요합니다.
         import timm
         # Create a pretrained reference model and copy its weights strictly.
-        # pretrained reference 모델을 만들고 가중치를 strict하게 복사합니다.
         ref = timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=True,
@@ -272,12 +276,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @torch.no_grad()
     def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
-        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Use torchvision's default pretrained weights for densenet121.
-        # torchvision의 densenet121 기본 pretrained weights를 사용합니다.
         ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
         self.backbone.load_state_dict(ref.state_dict(), strict=True)
@@ -290,7 +294,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @staticmethod
     def _pool_or_gap(outputs) -> torch.Tensor:
         # Some transformers vision CNNs provide pooler_output explicitly.
-        # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공합니다.
         if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
             x = outputs.pooler_output
             if x.dim() == 2:
@@ -300,7 +304,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
         # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
-        # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용합니다.
         x = outputs.last_hidden_state
         if x.dim() == 4:
             return x.mean(dim=(2, 3))
@@ -312,29 +316,29 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
         # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
-        # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적으로 유지되어야 합니다.
         rule = self._meta["feat_rule"]
         if rule == "cls":
             # ViT-style: use CLS token embedding from last_hidden_state.
-            # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용합니다.
             return outputs.last_hidden_state[:, 0, :]
         if rule == "pool_or_mean":
             # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
-            # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용합니다.
             if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
                 return outputs.pooler_output
             return outputs.last_hidden_state.mean(dim=1)
         if rule == "pool_or_gap":
             # CNN-style: use pooler_output if present, else GAP over spatial dims.
-            # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용합니다.
             return self._pool_or_gap(outputs)
         if rule == "timm_gap":
             # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
-            # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 만듭니다.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
             if outputs.dim() != 4:
@@ -343,7 +347,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         if rule == "torchvision_densenet_gap":
             # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
-            # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요합니다.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
             if outputs.dim() != 4:
@@ -362,7 +366,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         **kwargs,
     ):
         # Type decides the backbone forward path and output format.
-        # type이 backbone forward 경로 및 출력 포맷을 결정합니다.
         t = self._meta["type"]
         if t == "timm_densenet":
@@ -394,7 +398,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         else:
             # Transformers vision models are called with pixel_values and return ModelOutput.
-            # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환합니다.
             outputs = self.backbone(
                 pixel_values=pixel_values,
                 output_attentions=output_attentions,
@@ -407,13 +411,13 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             attentions = getattr(outputs, "attentions", None)
         # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
-        # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환합니다.
         logits = self.classifier(feats)
         loss = None
         if labels is not None:
             # Cross entropy expects labels as class indices in [0, num_labels).
-            # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대합니다.
             loss = F.cross_entropy(logits, labels)
         if not return_dict:
@@ -434,14 +438,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
 # ============================================================
 def _set_requires_grad(module: nn.Module, flag: bool):
     # Toggle requires_grad for all parameters in a module.
-    # 모듈의 모든 파라미터에 대해 requires_grad를 토글합니다.
     for p in module.parameters():
         p.requires_grad = flag
 def set_bn_eval(module: nn.Module):
     # Put BatchNorm layers into eval mode to freeze running stats.
-    # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정합니다.
     for m in module.modules():
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
             m.eval()
@@ -449,7 +453,7 @@ def set_bn_eval(module: nn.Module):
 def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
     # Stage1: freeze backbone and train only the head.
-    # stage1: backbone을 freeze하고 head만 학습합니다.
     _set_requires_grad(model.backbone, False)
     _set_requires_grad(model.classifier, True)
@@ -460,7 +464,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
 def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
     # Stage2: train mode, optionally keeping BN layers in eval for stability.
-    # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있습니다.
     model.train()
     meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
     if keep_bn_eval and meta.get("has_bn", False):
@@ -469,7 +473,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
 def trainable_summary(model: nn.Module):
     # Print a compact summary of trainable parameters.
-    # 학습 가능 파라미터 요약을 간단히 출력합니다.
     total = sum(p.numel() for p in model.parameters())
     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
     ratio = trainable / total if total > 0 else 0.0
@@ -483,7 +487,7 @@ def unfreeze_last_stage(
     keep_bn_eval: bool = True,
 ):
     # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
-    # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 ���현합니다.
     freeze_backbone(model, freeze_bn=keep_bn_eval)
     n = int(last_n)
@@ -498,7 +502,7 @@ def unfreeze_last_stage(
     if bb_type == "vit":
         # ViT blocks live under backbone.encoder.layer in the transformers implementation.
-        # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 있습니다.
         blocks = list(model.backbone.encoder.layer)
         for blk in blocks[-n:]:
             _set_requires_grad(blk, True)
@@ -506,7 +510,7 @@ def unfreeze_last_stage(
     if bb_type == "swin":
         # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
-        # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze 합니다.
         stages = list(model.backbone.encoder.layers)
         blocks: List[nn.Module] = []
         for st in stages:
@@ -517,7 +521,7 @@ def unfreeze_last_stage(
     if bb_type == "resnet":
         # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
-        # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze 합니다.
         bb = model.backbone
         for name in ("layer1", "layer2", "layer3", "layer4"):
             if not hasattr(bb, name):
@@ -538,7 +542,7 @@ def unfreeze_last_stage(
     if bb_type == "efficientnet":
         # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
-        # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze 합니다.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected EfficientNet structure: missing features")
@@ -556,7 +560,7 @@ def unfreeze_last_stage(
     if bb_type in ("timm_densenet", "torchvision_densenet"):
         # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
-        # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze 합니다.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected DenseNet: missing features")
@@ -575,7 +579,7 @@ def unfreeze_last_stage(
         def _denselayers(db: nn.Module) -> List[nn.Module]:
             # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
-            # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환합니다.
             return list(db.children())
         blocks: List[nn.Module] = []
@@ -600,5 +604,5 @@ def unfreeze_last_stage(
 # register
 # -------------------------
 # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
-# from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록합니다.
 BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")

     def __init__(self, config: BackboneMLPHeadConfig):
         # PreTrainedModel expects a config object and stores it internally.
+        # PreTrainedModel은 config 객체를 받아 내부에 저장함.
         super().__init__(config)
         # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
+        # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않음 - fast fail.
         #
         # Note: Transformers may create configs with no args, but models are conventionally created with configs.
+        # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성.
         if config.backbone_name_or_path is None:
             raise ValueError(
                 "config.backbone_name_or_path is None. "
             )
         # Fail-fast: training/inference requires a positive number of labels.
+        # fail-fast: 학습/추론은 num_labels가 양수여야 함.
         #
         # Config may exist in a minimal form for internal serialization paths, but the model should not.
+        # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 해당 없음.
         if int(getattr(config, "num_labels", 0)) <= 0:
             raise ValueError(
                 f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
             )
         # Meta is a single source of truth for extraction and fine-tuning rules.
+        # meta는 feature 추출 및 미세조정 규칙의 단일 기준.
+        # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
         # Prefer config.backbone_meta to keep Hub runtime self-contained.
         self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
         # Backbone skeleton is always created without pretrained weights.
+        # backbone skeleton은 항상 pretrained weight 없이 생성.
         self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
         # Head shape is driven by meta feat_dim and config.num_labels.
+        # head shape은 meta의 feat_dim과 config.num_labels로 결정.
         self.classifier = MLPHead(
             in_dim=int(self._meta["feat_dim"]),
             num_labels=int(config.num_labels),
         )
         # HF initialization hook, but we override init_weights to initialize head-only.
+        # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화하도록 변경.
         self.post_init()
     def init_weights(self):
         """
         Initialize only the head to avoid touching the backbone skeleton.
+        backbone skeleton을 건드리지 않기 위해 head만 초기화.
         HF's default init may traverse the entire module tree, which is undesirable here.
+        HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서 그대로 사용하기 부적절.
+        초기 설계에서 __init__ 내부에서 backbone의 가중치 로드를 수행함(편리를 위해).
+        이 경우, HF의 post_init()으로 인해 해당 로드가 취소되는 경우가 존재(timm, torchvision 등의 백본).
+        때문에 이를 오버라이드 하여 classifier만 초기화 하도록 변경함.
         """
         if getattr(self, "classifier", None) is not None:
             self.classifier.apply(self._init_weights)
     # ----------------------------
     def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
         # Meta decides which loader path to use.
+        # meta가 어떤 로더 경로를 사용할지 결정.
         meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
         if meta is None:
             raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
             return self._build_torchvision_densenet_skeleton(backbone_id)
         # For transformers backbones: build a random-weight skeleton from config only.
+        # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성.
         bb_cfg = AutoConfig.from_pretrained(backbone_id)
         return AutoModel.from_config(bb_cfg)
     @staticmethod
     def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
         # timm is an optional dependency and should be imported lazily.
+        # timm은 옵션 의존성이므로 지연 import 수행.
         try:
             import timm
         except Exception as e:
             ) from e
         # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
+        # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0).
         return timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=False,
     @staticmethod
     def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
+        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Build structure only (weights=None) to avoid implicit pretrained loading.
+        # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None).
         m = tv_models.densenet121(weights=None)
         return m
     ):
         """
         Fresh-start only: inject pretrained backbone weights into the skeleton.
+        fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입.
         Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
+        from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 주의할 것.
         """
         bb = self.config.backbone_name_or_path
         meta = self._meta
             return
         # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
+        # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사.
         ref = AutoModel.from_pretrained(
             bb,
             low_cpu_mem_usage=low_cpu_mem_usage,
         )
         # strict=False is used to tolerate harmless key differences across minor versions.
+        # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용.
         self.backbone.load_state_dict(ref.state_dict(), strict=False)
         del ref
     @torch.no_grad()
     def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
         # timm must be present for timm backbones.
+        # timm 백본에��� timm 설치가 필요.
         import timm
         # Create a pretrained reference model and copy its weights strictly.
+        # pretrained reference 모델을 만들고 가중치를 strict하게 복사.
         ref = timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=True,
     @torch.no_grad()
     def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
+        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 지원.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Use torchvision's default pretrained weights for densenet121.
+        # torchvision의 densenet121 기본 pretrained weights를 사용.
         ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
         self.backbone.load_state_dict(ref.state_dict(), strict=True)
     @staticmethod
     def _pool_or_gap(outputs) -> torch.Tensor:
         # Some transformers vision CNNs provide pooler_output explicitly.
+        # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공.
         if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
             x = outputs.pooler_output
             if x.dim() == 2:
             raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
         # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
+        # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용.
         x = outputs.last_hidden_state
         if x.dim() == 4:
             return x.mean(dim=(2, 3))
     def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
         # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
+        # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적 동작을 위해 제한된 모델만 사용.
         rule = self._meta["feat_rule"]
         if rule == "cls":
             # ViT-style: use CLS token embedding from last_hidden_state.
+            # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용.
             return outputs.last_hidden_state[:, 0, :]
         if rule == "pool_or_mean":
             # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
+            # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용.
             if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
                 return outputs.pooler_output
             return outputs.last_hidden_state.mean(dim=1)
         if rule == "pool_or_gap":
             # CNN-style: use pooler_output if present, else GAP over spatial dims.
+            # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용.
             return self._pool_or_gap(outputs)
         if rule == "timm_gap":
             # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
+            # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 변환.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
             if outputs.dim() != 4:
         if rule == "torchvision_densenet_gap":
             # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
+            # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
             if outputs.dim() != 4:
         **kwargs,
     ):
         # Type decides the backbone forward path and output format.
+        # type이 backbone forward 경로 및 출력 포맷을 결정.
         t = self._meta["type"]
         if t == "timm_densenet":
         else:
             # Transformers vision models are called with pixel_values and return ModelOutput.
+            # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환.
             outputs = self.backbone(
                 pixel_values=pixel_values,
                 output_attentions=output_attentions,
             attentions = getattr(outputs, "attentions", None)
         # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
+        # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환.
         logits = self.classifier(feats)
         loss = None
         if labels is not None:
             # Cross entropy expects labels as class indices in [0, num_labels).
+            # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대함.
             loss = F.cross_entropy(logits, labels)
         if not return_dict:
 # ============================================================
 def _set_requires_grad(module: nn.Module, flag: bool):
     # Toggle requires_grad for all parameters in a module.
+    # 모듈의 모든 파라미터에 대해 requires_grad를 토글.
     for p in module.parameters():
         p.requires_grad = flag
 def set_bn_eval(module: nn.Module):
     # Put BatchNorm layers into eval mode to freeze running stats.
+    # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정.
     for m in module.modules():
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
             m.eval()
 def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
     # Stage1: freeze backbone and train only the head.
+    # stage1: backbone을 freeze하고 head만 학습.
     _set_requires_grad(model.backbone, False)
     _set_requires_grad(model.classifier, True)
 def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
     # Stage2: train mode, optionally keeping BN layers in eval for stability.
+    # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있음. (buffer 등을 유지하기 위해)
     model.train()
     meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
     if keep_bn_eval and meta.get("has_bn", False):
 def trainable_summary(model: nn.Module):
     # Print a compact summary of trainable parameters.
+    # 학습 가능 파라미터 요약을 간단히 출력.
     total = sum(p.numel() for p in model.parameters())
     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
     ratio = trainable / total if total > 0 else 0.0
     keep_bn_eval: bool = True,
 ):
     # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
+    # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 구현.
     freeze_backbone(model, freeze_bn=keep_bn_eval)
     n = int(last_n)
     if bb_type == "vit":
         # ViT blocks live under backbone.encoder.layer in the transformers implementation.
+        # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 존재함.
         blocks = list(model.backbone.encoder.layer)
         for blk in blocks[-n:]:
             _set_requires_grad(blk, True)
     if bb_type == "swin":
         # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
+        # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze.
         stages = list(model.backbone.encoder.layers)
         blocks: List[nn.Module] = []
         for st in stages:
     if bb_type == "resnet":
         # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
+        # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze.
         bb = model.backbone
         for name in ("layer1", "layer2", "layer3", "layer4"):
             if not hasattr(bb, name):
     if bb_type == "efficientnet":
         # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
+        # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected EfficientNet structure: missing features")
     if bb_type in ("timm_densenet", "torchvision_densenet"):
         # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
+        # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected DenseNet: missing features")
         def _denselayers(db: nn.Module) -> List[nn.Module]:
             # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
+            # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환.
             return list(db.children())
         blocks: List[nn.Module] = []
 # register
 # -------------------------
 # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
+# from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록.
 BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")

models/microsoft__resnet-50/ds_proc.py CHANGED Viewed

@@ -4,8 +4,8 @@
 # src/ds_proc.py
 # ============================================================
-# (4) ImageProcessor (AutoImageProcessor integration)
-# (4) ImageProcessor (AutoImageProcessor 연동)
 # ============================================================
 from typing import Any
@@ -27,41 +27,38 @@ except ImportError:
 class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     """
     This processor performs image preprocessing and outputs {"pixel_values": ...}.
-    이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 출력합니다.
     Key requirements:
     핵심 요구사항:
     1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
-       save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 합니다.
     2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
-       런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됩니다.
     3) Runtime objects are rebuilt at init/load time based on backbone meta.
-       런타임 객체는 backbone meta에 따라 init/load 시점에 재구성됩니다.
     4) For reproducibility, use_fast must be explicitly persisted and honored on load.
-       재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 합니다.
     """
     # HF vision models conventionally expect "pixel_values" as the primary input key.
-    # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대합니다.
     model_input_names = ["pixel_values"]
     def __init__(
         self,
         backbone_name_or_path: BackboneID,
-        is_training: bool = False,
         use_fast: bool = False,
         **kwargs,
     ):
         # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
-        # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리합니다.
         super().__init__(**kwargs)
         # Enforce whitelist via BACKBONE_META to keep behavior stable.
-        # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제합니다.
         if backbone_name_or_path not in BACKBONE_META:
             raise ValueError(
                 f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
@@ -69,23 +66,23 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             )
         # Serializable fields only: these should appear in preprocessor_config.json.
-        # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 합니다.
         self.backbone_name_or_path = backbone_name_or_path
         self.is_training = bool(is_training)
         # Reproducibility switch for transformers processors.
-        # transformers processor의 fast/slow 선택을 재현 가능하게 고정합니다.
         self.use_fast = bool(use_fast)
         # Runtime-only fields: must never be serialized.
-        # 런타임 전용 필드: 절대 직렬화되면 안 됩니다.
         self._meta = None
-        self._delegate = None
-        self._timm_transform = None
         self._torchvision_transform = None
         # Build runtime objects according to backbone type.
-        # backbone type에 따라 런타임 객체를 구성합니다.
         self._build_runtime()
     # ============================================================
@@ -95,13 +92,13 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _build_runtime(self):
         """
         Build runtime delegate/transform based on BACKBONE_META["type"].
-        BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성합니다.
         """
         meta = BACKBONE_META[self.backbone_name_or_path]
         self._meta = meta
         # Always reset runtime fields before rebuilding.
-        # 재구성 전 런타임 필드는 항상 초기화합니다.
         self._delegate = None
         self._timm_transform = None
         self._torchvision_transform = None
@@ -110,7 +107,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
         if t == "timm_densenet":
             # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
-            # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용합니다.
             self._timm_transform = self._build_timm_transform(
                 backbone_id=self.backbone_name_or_path,
                 is_training=self.is_training,
@@ -119,17 +116,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
         if t == "torchvision_densenet":
             # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
-            # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요합니다.
             self._torchvision_transform = self._build_torchvision_densenet_transform(
                 is_training=self.is_training
             )
             return
         # Default: transformers backbone delegates to its official AutoImageProcessor.
-        # 기본: transformers 백본은 공식 AutoImageProcessor에 위임합니다.
         #
         # IMPORTANT:
-        # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달합니다.
         self._delegate = AutoImageProcessor.from_pretrained(
             self.backbone_name_or_path,
             use_fast=self.use_fast,
@@ -140,7 +137,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _build_timm_transform(*, backbone_id: str, is_training: bool):
         """
         Create timm transform without storing non-serializable objects in config.
-        비직렬화 객체를 config에 저장하지 않고 timm transform을 생성합니다.
         """
         try:
             import timm
@@ -151,20 +148,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             ) from e
         # We only need model metadata to resolve data config, so pretrained=False is preferred.
-        # data config 추출만 필요하므로 pretrained=False를 우선 사용합니다.
         m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
         dc = resolve_model_data_config(m)
         # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
-        # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환합니다.
-        tfm = create_transform(**dc, is_training=is_training)
         return tfm
     @staticmethod
     def _build_torchvision_densenet_transform(*, is_training: bool):
         """
         Build torchvision preprocessing for DenseNet-121 (224 pipeline).
-        DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성합니다.
         """
         try:
             from torchvision import transforms
@@ -174,28 +171,29 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             ) from e
         # These are the standard ImageNet normalization stats used by torchvision weights.
-        # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계입니다.
         mean = (0.485, 0.456, 0.406)
-        std = (0.229, 0.224, 0.225)
         # Training pipeline typically uses RandomResizedCrop and horizontal flip.
-        # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용합니다.
         if is_training:
             return transforms.Compose(
                 [
-                    transforms.RandomResizedCrop(224),
-                    transforms.RandomHorizontalFlip(p=0.5),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=mean, std=std),
                 ]
             )
         # Inference pipeline typically uses Resize(256) + CenterCrop(224).
-        # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용합니다.
         return transforms.Compose(
             [
                 transforms.Resize(256),
-                transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=mean, std=std),
             ]
@@ -208,24 +206,24 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def to_dict(self) -> dict[str, Any]:
         """
         Return a JSON-serializable dict for preprocessor_config.json.
-        preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환합니다.
         Important: do not leak runtime objects into the serialized dict.
-        중요: 런타임 객체가 직렬화 dict에 섞이면 안 됩니다.
         """
         # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
         # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
         d = super().to_dict()
         # Force minimal stable fields for long-term compatibility.
-        # 장기 호환을 위해 최소 안정 필드를 강제합니다.
-        d["image_processor_type"] = self.__class__.__name__
         d["backbone_name_or_path"] = self.backbone_name_or_path
         d["is_training"] = self.is_training
-        d["use_fast"] = self.use_fast
         # Remove any runtime-only fields defensively.
-        # 런타임 전용 필드는 보수적으로 제거합니다.
         for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
             d.pop(key, None)
@@ -235,14 +233,14 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
         """
         Standard load path used by BaseImageProcessor / AutoImageProcessor.
-        BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로입니다.
         """
         backbone = image_processor_dict.get("backbone_name_or_path", None)
         if backbone is None:
             raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
         is_training = bool(image_processor_dict.get("is_training", False))
-        use_fast = bool(image_processor_dict.get("use_fast", False))
         return cls(
             backbone_name_or_path=backbone,
@@ -255,20 +253,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
         """
         Fallback path if AutoImageProcessor calls class.from_pretrained directly.
-        AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 경로입니다.
         Strategy:
         전략:
         - Read config.json via AutoConfig and recover backbone_name_or_path.
-          AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구합니다.
         """
         # is_training is runtime-only and should default to False for inference/serving.
-        # is_training은 런타임 전용이며 추론/서빙 기본값은 False가 맞습니다.
         #
         # IMPORTANT:
-        # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영합니다.
         use_fast = bool(kwargs.pop("use_fast", False))
         kwargs.pop("trust_remote_code", None)
@@ -289,7 +287,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     @staticmethod
     def _ensure_list(images: Any) -> list[Any]:
         # Normalize scalar image input to a list for uniform processing.
-        # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용합니다.
         if isinstance(images, (list, tuple)):
             return list(images)
         return [images]
@@ -297,7 +295,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     @staticmethod
     def _to_pil_rgb(x: Any):
         # Convert common image inputs into PIL RGB images.
-        # 일반적인 입력을 PIL RGB 이미지로 변환합니다.
         from PIL import Image as PILImage
         if isinstance(x, PILImage.Image):
@@ -314,17 +312,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     ) -> dict[str, Any]:
         """
         Convert images into {"pixel_values": Tensor/ndarray}.
-        이미지를 {"pixel_values": Tensor/ndarray}로 변환합니다.
         """
         images = self._ensure_list(images)
         # Rebuild runtime if needed (e.g., right after deserialization).
-        # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성합니다.
         if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
             self._build_runtime()
         # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
-        # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
         if self._timm_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
@@ -337,7 +335,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             return self._format_return(pixel_values, return_tensors)
         # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
-        # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
         if self._torchvision_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
@@ -350,7 +348,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             return self._format_return(pixel_values, return_tensors)
         # transformers delegate path: rely on official processor behavior.
-        # transformers 위임 경로: 공식 processor 동작을 그대로 사용합니다.
         if self._delegate is None:
             raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
@@ -360,7 +358,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
         """
         Format pixel_values according to return_tensors.
-        return_tensors에 맞춰 pixel_values 반환 포맷을 맞춥니다.
         """
         if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
             return {"pixel_values": pixel_values}
@@ -370,6 +368,6 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
 # Register this processor for AutoImageProcessor resolution.
-# AutoImageProcessor 해석을 위해 이 processor를 등록합니다.
 if __name__ != "__main__":
     BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")

 # src/ds_proc.py
 # ============================================================
+# ImageProcessor (AutoImageProcessor integration)
+# ImageProcessor (AutoImageProcessor 연동)
 # ============================================================
 from typing import Any
 class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     """
     This processor performs image preprocessing and outputs {"pixel_values": ...}.
+    이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 반환함.
     Key requirements:
     핵심 요구사항:
     1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
+       save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 함.
     2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
+       런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됨.
     3) Runtime objects are rebuilt at init/load time based on backbone meta.
+       런타임 객체는 backbone meta에 따라 init/load 시점에 재구성.
     4) For reproducibility, use_fast must be explicitly persisted and honored on load.
+       재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 함.
     """
     # HF vision models conventionally expect "pixel_values" as the primary input key.
+    # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대.
     model_input_names = ["pixel_values"]
     def __init__(
         self,
         backbone_name_or_path: BackboneID,
+        is_training: bool = False, # timm 에서 data augmentation 용.
         use_fast: bool = False,
         **kwargs,
     ):
         # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
+        # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리.
         super().__init__(**kwargs)
         # Enforce whitelist via BACKBONE_META to keep behavior stable.
+        # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제. - fast fail
         if backbone_name_or_path not in BACKBONE_META:
             raise ValueError(
                 f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
             )
         # Serializable fields only: these should appear in preprocessor_config.json.
+        # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 함
         self.backbone_name_or_path = backbone_name_or_path
         self.is_training = bool(is_training)
         # Reproducibility switch for transformers processors.
+        # transformers processor의 fast/slow 선택을 재현 가능하게 고정.
         self.use_fast = bool(use_fast)
         # Runtime-only fields: must never be serialized.
+        # 런타임 전용 필드: 절대 직렬화되면 안 됨.
         self._meta = None
+        self._delegate              = None
+        self._timm_transform        = None
         self._torchvision_transform = None
         # Build runtime objects according to backbone type.
+        # backbone type에 따라 런타임 객체를 구성.
         self._build_runtime()
     # ============================================================
     def _build_runtime(self):
         """
         Build runtime delegate/transform based on BACKBONE_META["type"].
+        BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성.
         """
         meta = BACKBONE_META[self.backbone_name_or_path]
         self._meta = meta
         # Always reset runtime fields before rebuilding.
+        # 재구성 전 런타임 필드는 항상 초기화.
         self._delegate = None
         self._timm_transform = None
         self._torchvision_transform = None
         if t == "timm_densenet":
             # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
+            # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용.
             self._timm_transform = self._build_timm_transform(
                 backbone_id=self.backbone_name_or_path,
                 is_training=self.is_training,
         if t == "torchvision_densenet":
             # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
+            # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요.
             self._torchvision_transform = self._build_torchvision_densenet_transform(
                 is_training=self.is_training
             )
             return
         # Default: transformers backbone delegates to its official AutoImageProcessor.
+        # 기본: transformers 백본은 공식 AutoImageProcessor에 위임.
         #
         # IMPORTANT:
+        # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달.
         self._delegate = AutoImageProcessor.from_pretrained(
             self.backbone_name_or_path,
             use_fast=self.use_fast,
     def _build_timm_transform(*, backbone_id: str, is_training: bool):
         """
         Create timm transform without storing non-serializable objects in config.
+        비직렬화 객체를 config에 저장하지 않고 timm transform을 생성.
         """
         try:
             import timm
             ) from e
         # We only need model metadata to resolve data config, so pretrained=False is preferred.
+        # data config 추출만 필요하므로 pretrained=False를 우선 사용.
         m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
         dc = resolve_model_data_config(m)
         # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
+        # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환.
+        tfm = create_transform(**dc, is_training=is_training) # is_training :Data Aug.
         return tfm
     @staticmethod
     def _build_torchvision_densenet_transform(*, is_training: bool):
         """
         Build torchvision preprocessing for DenseNet-121 (224 pipeline).
+        DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성.
         """
         try:
             from torchvision import transforms
             ) from e
         # These are the standard ImageNet normalization stats used by torchvision weights.
+        # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계.
         mean = (0.485, 0.456, 0.406)
+        std  = (0.229, 0.224, 0.225)
         # Training pipeline typically uses RandomResizedCrop and horizontal flip.
+        # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용.
         if is_training:
             return transforms.Compose(
                 [
+                    # transforms.RandomResizedCrop(224),
+                    # transforms.RandomHorizontalFlip(p=0.5),
+                    transforms.Resize(224),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=mean, std=std),
                 ]
             )
         # Inference pipeline typically uses Resize(256) + CenterCrop(224).
+        # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용.
         return transforms.Compose(
             [
                 transforms.Resize(256),
+                # transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=mean, std=std),
             ]
     def to_dict(self) -> dict[str, Any]:
         """
         Return a JSON-serializable dict for preprocessor_config.json.
+        preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환.
         Important: do not leak runtime objects into the serialized dict.
+        중요: 런타임 객체가 직렬화 dict에 섞이면 안 됨.
         """
         # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
         # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
         d = super().to_dict()
         # Force minimal stable fields for long-term compatibility.
+        # 장기 호환을 위해 최소 안정 필드를 강제로 지정.
+        d["image_processor_type"]  = self.__class__.__name__
         d["backbone_name_or_path"] = self.backbone_name_or_path
         d["is_training"] = self.is_training
+        d["use_fast"]    = self.use_fast
         # Remove any runtime-only fields defensively.
+        # 런타임 전용 필드는 보수적으로 제거.
         for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
             d.pop(key, None)
     def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
         """
         Standard load path used by BaseImageProcessor / AutoImageProcessor.
+        BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로임.
         """
         backbone = image_processor_dict.get("backbone_name_or_path", None)
         if backbone is None:
             raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
         is_training = bool(image_processor_dict.get("is_training", False))
+        use_fast    = bool(image_processor_dict.get("use_fast", False))
         return cls(
             backbone_name_or_path=backbone,
     def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
         """
         Fallback path if AutoImageProcessor calls class.from_pretrained directly.
+        AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 메서드.
         Strategy:
         전략:
         - Read config.json via AutoConfig and recover backbone_name_or_path.
+          AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구.
         """
         # is_training is runtime-only and should default to False for inference/serving.
+        # is_training은 런타임 전용이며 추론/서빙 기본값은 False 임.
         #
         # IMPORTANT:
+        # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영.
         use_fast = bool(kwargs.pop("use_fast", False))
         kwargs.pop("trust_remote_code", None)
     @staticmethod
     def _ensure_list(images: Any) -> list[Any]:
         # Normalize scalar image input to a list for uniform processing.
+        # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용.
         if isinstance(images, (list, tuple)):
             return list(images)
         return [images]
     @staticmethod
     def _to_pil_rgb(x: Any):
         # Convert common image inputs into PIL RGB images.
+        # 일반적인 입력을 PIL RGB 이미지로 변환.
         from PIL import Image as PILImage
         if isinstance(x, PILImage.Image):
     ) -> dict[str, Any]:
         """
         Convert images into {"pixel_values": Tensor/ndarray}.
+        이미지를 {"pixel_values": Tensor/ndarray}로 변환.
         """
         images = self._ensure_list(images)
         # Rebuild runtime if needed (e.g., right after deserialization).
+        # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성.
         if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
             self._build_runtime()
         # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
+        # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
         if self._timm_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
             return self._format_return(pixel_values, return_tensors)
         # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
+        # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
         if self._torchvision_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
             return self._format_return(pixel_values, return_tensors)
         # transformers delegate path: rely on official processor behavior.
+        # transformers 위임 경로: 공식 processor 동작을 그대로 사용.
         if self._delegate is None:
             raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
     def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
         """
         Format pixel_values according to return_tensors.
+        return_tensors에 맞춰 pixel_values 반환 포맷을 변환.
         """
         if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
             return {"pixel_values": pixel_values}
 # Register this processor for AutoImageProcessor resolution.
+# AutoImageProcessor 해석을 위해 이 processor를 등록.
 if __name__ != "__main__":
     BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")

models/microsoft__resnet-50/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ff72671dfb9ce0dddaa0b43965385bf4185ce4210d00092b5d08ed116cecc4b
 size 96388660

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b46be9524952c57179580e6e9728d525676dc8d1f2cf184315277e2f57bce90
 size 96388660

models/microsoft__swin-tiny-patch4-window7-224/config.json CHANGED Viewed

@@ -24,7 +24,7 @@
   "num_labels": 3,
   "transformers_version": "5.1.0",
   "ds_provenance": {
-    "created_at": "20260210_170738",
     "repo_id": "dsaint31/bb_mlp_224",
     "subdir": "models/microsoft__swin-tiny-patch4-window7-224",
     "wrapper_class": "BackboneWithMLPHeadForImageClassification",

   "num_labels": 3,
   "transformers_version": "5.1.0",
   "ds_provenance": {
+    "created_at": "20260212_202546",
     "repo_id": "dsaint31/bb_mlp_224",
     "subdir": "models/microsoft__swin-tiny-patch4-window7-224",
     "wrapper_class": "BackboneWithMLPHeadForImageClassification",

models/microsoft__swin-tiny-patch4-window7-224/ds_model.py CHANGED Viewed

@@ -94,14 +94,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     def __init__(self, config: BackboneMLPHeadConfig):
         # PreTrainedModel expects a config object and stores it internally.
-        # PreTrainedModel은 config 객체를 받아 내부에 저장하는 전제를 가집니다.
         super().__init__(config)
         # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
-        # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않습니다.
         #
         # Note: Transformers may create configs with no args, but models are conventionally created with configs.
-        # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성됩니다.
         if config.backbone_name_or_path is None:
             raise ValueError(
                 "config.backbone_name_or_path is None. "
@@ -109,10 +109,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             )
         # Fail-fast: training/inference requires a positive number of labels.
-        # fail-fast: 학습/추론은 num_labels가 양수여야 합니다.
         #
         # Config may exist in a minimal form for internal serialization paths, but the model should not.
-        # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 그러면 안 됩니다.
         if int(getattr(config, "num_labels", 0)) <= 0:
             raise ValueError(
                 f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
@@ -120,17 +120,17 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             )
         # Meta is a single source of truth for extraction and fine-tuning rules.
-        # meta는 feature 추출 및 미세조정 규칙의 단일 기준입니다.
-                # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
         # Prefer config.backbone_meta to keep Hub runtime self-contained.
         self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
         # Backbone skeleton is always created without pretrained weights.
-        # backbone skeleton은 항상 pretrained weight 없이 생성합니다.
         self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
         # Head shape is driven by meta feat_dim and config.num_labels.
-        # head shape은 meta의 feat_dim과 config.num_labels로 결정됩니다.
         self.classifier = MLPHead(
             in_dim=int(self._meta["feat_dim"]),
             num_labels=int(config.num_labels),
@@ -139,16 +139,20 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         )
         # HF initialization hook, but we override init_weights to initialize head-only.
-        # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화합니다.
         self.post_init()
     def init_weights(self):
         """
         Initialize only the head to avoid touching the backbone skeleton.
-        backbone skeleton을 건드리지 않기 위해 head만 초기화합니다.
         HF's default init may traverse the entire module tree, which is undesirable here.
-        HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서는 부적절합니다.
         """
         if getattr(self, "classifier", None) is not None:
             self.classifier.apply(self._init_weights)
@@ -160,7 +164,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     # ----------------------------
     def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
         # Meta decides which loader path to use.
-        # meta가 어떤 로더 경로를 사용할지 결정합니다.
         meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
         if meta is None:
             raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
@@ -174,14 +178,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             return self._build_torchvision_densenet_skeleton(backbone_id)
         # For transformers backbones: build a random-weight skeleton from config only.
-        # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성합니다.
         bb_cfg = AutoConfig.from_pretrained(backbone_id)
         return AutoModel.from_config(bb_cfg)
     @staticmethod
     def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
         # timm is an optional dependency and should be imported lazily.
-        # timm은 옵션 의존성이므로 지연 import 합니다.
         try:
             import timm
         except Exception as e:
@@ -190,7 +194,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             ) from e
         # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
-        # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0)합니다.
         return timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=False,
@@ -200,12 +204,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @staticmethod
     def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
-        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Build structure only (weights=None) to avoid implicit pretrained loading.
-        # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None)합니다.
         m = tv_models.densenet121(weights=None)
         return m
@@ -222,10 +226,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     ):
         """
         Fresh-start only: inject pretrained backbone weights into the skeleton.
-        fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입합니다.
         Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
-        from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 절대 호출하면 안 됩니다.
         """
         bb = self.config.backbone_name_or_path
         meta = self._meta
@@ -240,7 +244,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             return
         # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
-        # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사합니다.
         ref = AutoModel.from_pretrained(
             bb,
             low_cpu_mem_usage=low_cpu_mem_usage,
@@ -248,18 +252,18 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         )
         # strict=False is used to tolerate harmless key differences across minor versions.
-        # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용합니다.
         self.backbone.load_state_dict(ref.state_dict(), strict=False)
         del ref
     @torch.no_grad()
     def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
         # timm must be present for timm backbones.
-        # timm 백본에는 timm 설치가 필요합니다.
         import timm
         # Create a pretrained reference model and copy its weights strictly.
-        # pretrained reference 모델을 만들고 가중치를 strict하게 복사합니다.
         ref = timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=True,
@@ -272,12 +276,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @torch.no_grad()
     def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
-        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Use torchvision's default pretrained weights for densenet121.
-        # torchvision의 densenet121 기본 pretrained weights를 사용합니다.
         ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
         self.backbone.load_state_dict(ref.state_dict(), strict=True)
@@ -290,7 +294,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @staticmethod
     def _pool_or_gap(outputs) -> torch.Tensor:
         # Some transformers vision CNNs provide pooler_output explicitly.
-        # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공합니다.
         if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
             x = outputs.pooler_output
             if x.dim() == 2:
@@ -300,7 +304,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
         # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
-        # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용합니다.
         x = outputs.last_hidden_state
         if x.dim() == 4:
             return x.mean(dim=(2, 3))
@@ -312,29 +316,29 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
         # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
-        # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적으로 유지되어야 합니다.
         rule = self._meta["feat_rule"]
         if rule == "cls":
             # ViT-style: use CLS token embedding from last_hidden_state.
-            # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용합니다.
             return outputs.last_hidden_state[:, 0, :]
         if rule == "pool_or_mean":
             # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
-            # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용합니다.
             if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
                 return outputs.pooler_output
             return outputs.last_hidden_state.mean(dim=1)
         if rule == "pool_or_gap":
             # CNN-style: use pooler_output if present, else GAP over spatial dims.
-            # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용합니다.
             return self._pool_or_gap(outputs)
         if rule == "timm_gap":
             # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
-            # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 만듭니다.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
             if outputs.dim() != 4:
@@ -343,7 +347,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         if rule == "torchvision_densenet_gap":
             # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
-            # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요합니다.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
             if outputs.dim() != 4:
@@ -362,7 +366,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         **kwargs,
     ):
         # Type decides the backbone forward path and output format.
-        # type이 backbone forward 경로 및 출력 포맷을 결정합니다.
         t = self._meta["type"]
         if t == "timm_densenet":
@@ -394,7 +398,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         else:
             # Transformers vision models are called with pixel_values and return ModelOutput.
-            # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환합니다.
             outputs = self.backbone(
                 pixel_values=pixel_values,
                 output_attentions=output_attentions,
@@ -407,13 +411,13 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             attentions = getattr(outputs, "attentions", None)
         # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
-        # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환합니다.
         logits = self.classifier(feats)
         loss = None
         if labels is not None:
             # Cross entropy expects labels as class indices in [0, num_labels).
-            # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대합니다.
             loss = F.cross_entropy(logits, labels)
         if not return_dict:
@@ -434,14 +438,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
 # ============================================================
 def _set_requires_grad(module: nn.Module, flag: bool):
     # Toggle requires_grad for all parameters in a module.
-    # 모듈의 모든 파라미터에 대해 requires_grad를 토글합니다.
     for p in module.parameters():
         p.requires_grad = flag
 def set_bn_eval(module: nn.Module):
     # Put BatchNorm layers into eval mode to freeze running stats.
-    # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정합니다.
     for m in module.modules():
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
             m.eval()
@@ -449,7 +453,7 @@ def set_bn_eval(module: nn.Module):
 def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
     # Stage1: freeze backbone and train only the head.
-    # stage1: backbone을 freeze하고 head만 학습합니다.
     _set_requires_grad(model.backbone, False)
     _set_requires_grad(model.classifier, True)
@@ -460,7 +464,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
 def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
     # Stage2: train mode, optionally keeping BN layers in eval for stability.
-    # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있습니다.
     model.train()
     meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
     if keep_bn_eval and meta.get("has_bn", False):
@@ -469,7 +473,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
 def trainable_summary(model: nn.Module):
     # Print a compact summary of trainable parameters.
-    # 학습 가능 파라미터 요약을 간단히 출력합니다.
     total = sum(p.numel() for p in model.parameters())
     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
     ratio = trainable / total if total > 0 else 0.0
@@ -483,7 +487,7 @@ def unfreeze_last_stage(
     keep_bn_eval: bool = True,
 ):
     # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
-    # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 ���현합니다.
     freeze_backbone(model, freeze_bn=keep_bn_eval)
     n = int(last_n)
@@ -498,7 +502,7 @@ def unfreeze_last_stage(
     if bb_type == "vit":
         # ViT blocks live under backbone.encoder.layer in the transformers implementation.
-        # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 있습니다.
         blocks = list(model.backbone.encoder.layer)
         for blk in blocks[-n:]:
             _set_requires_grad(blk, True)
@@ -506,7 +510,7 @@ def unfreeze_last_stage(
     if bb_type == "swin":
         # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
-        # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze 합니다.
         stages = list(model.backbone.encoder.layers)
         blocks: List[nn.Module] = []
         for st in stages:
@@ -517,7 +521,7 @@ def unfreeze_last_stage(
     if bb_type == "resnet":
         # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
-        # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze 합니다.
         bb = model.backbone
         for name in ("layer1", "layer2", "layer3", "layer4"):
             if not hasattr(bb, name):
@@ -538,7 +542,7 @@ def unfreeze_last_stage(
     if bb_type == "efficientnet":
         # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
-        # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze 합니다.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected EfficientNet structure: missing features")
@@ -556,7 +560,7 @@ def unfreeze_last_stage(
     if bb_type in ("timm_densenet", "torchvision_densenet"):
         # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
-        # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze 합니다.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected DenseNet: missing features")
@@ -575,7 +579,7 @@ def unfreeze_last_stage(
         def _denselayers(db: nn.Module) -> List[nn.Module]:
             # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
-            # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환합니다.
             return list(db.children())
         blocks: List[nn.Module] = []
@@ -600,5 +604,5 @@ def unfreeze_last_stage(
 # register
 # -------------------------
 # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
-# from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록합니다.
 BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")

     def __init__(self, config: BackboneMLPHeadConfig):
         # PreTrainedModel expects a config object and stores it internally.
+        # PreTrainedModel은 config 객체를 받아 내부에 저장함.
         super().__init__(config)
         # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
+        # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않음 - fast fail.
         #
         # Note: Transformers may create configs with no args, but models are conventionally created with configs.
+        # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성.
         if config.backbone_name_or_path is None:
             raise ValueError(
                 "config.backbone_name_or_path is None. "
             )
         # Fail-fast: training/inference requires a positive number of labels.
+        # fail-fast: 학습/추론은 num_labels가 양수여야 함.
         #
         # Config may exist in a minimal form for internal serialization paths, but the model should not.
+        # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 해당 없음.
         if int(getattr(config, "num_labels", 0)) <= 0:
             raise ValueError(
                 f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
             )
         # Meta is a single source of truth for extraction and fine-tuning rules.
+        # meta는 feature 추출 및 미세조정 규칙의 단일 기준.
+        # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
         # Prefer config.backbone_meta to keep Hub runtime self-contained.
         self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
         # Backbone skeleton is always created without pretrained weights.
+        # backbone skeleton은 항상 pretrained weight 없이 생성.
         self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
         # Head shape is driven by meta feat_dim and config.num_labels.
+        # head shape은 meta의 feat_dim과 config.num_labels로 결정.
         self.classifier = MLPHead(
             in_dim=int(self._meta["feat_dim"]),
             num_labels=int(config.num_labels),
         )
         # HF initialization hook, but we override init_weights to initialize head-only.
+        # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화하도록 변경.
         self.post_init()
     def init_weights(self):
         """
         Initialize only the head to avoid touching the backbone skeleton.
+        backbone skeleton을 건드리지 않기 위해 head만 초기화.
         HF's default init may traverse the entire module tree, which is undesirable here.
+        HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서 그대로 사용하기 부적절.
+        초기 설계에서 __init__ 내부에서 backbone의 가중치 로드를 수행함(편리를 위해).
+        이 경우, HF의 post_init()으로 인해 해당 로드가 취소되는 경우가 존재(timm, torchvision 등의 백본).
+        때문에 이를 오버라이드 하여 classifier만 초기화 하도록 변경함.
         """
         if getattr(self, "classifier", None) is not None:
             self.classifier.apply(self._init_weights)
     # ----------------------------
     def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
         # Meta decides which loader path to use.
+        # meta가 어떤 로더 경로를 사용할지 결정.
         meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
         if meta is None:
             raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
             return self._build_torchvision_densenet_skeleton(backbone_id)
         # For transformers backbones: build a random-weight skeleton from config only.
+        # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성.
         bb_cfg = AutoConfig.from_pretrained(backbone_id)
         return AutoModel.from_config(bb_cfg)
     @staticmethod
     def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
         # timm is an optional dependency and should be imported lazily.
+        # timm은 옵션 의존성이므로 지연 import 수행.
         try:
             import timm
         except Exception as e:
             ) from e
         # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
+        # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0).
         return timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=False,
     @staticmethod
     def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
+        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Build structure only (weights=None) to avoid implicit pretrained loading.
+        # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None).
         m = tv_models.densenet121(weights=None)
         return m
     ):
         """
         Fresh-start only: inject pretrained backbone weights into the skeleton.
+        fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입.
         Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
+        from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 주의할 것.
         """
         bb = self.config.backbone_name_or_path
         meta = self._meta
             return
         # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
+        # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사.
         ref = AutoModel.from_pretrained(
             bb,
             low_cpu_mem_usage=low_cpu_mem_usage,
         )
         # strict=False is used to tolerate harmless key differences across minor versions.
+        # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용.
         self.backbone.load_state_dict(ref.state_dict(), strict=False)
         del ref
     @torch.no_grad()
     def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
         # timm must be present for timm backbones.
+        # timm 백본에��� timm 설치가 필요.
         import timm
         # Create a pretrained reference model and copy its weights strictly.
+        # pretrained reference 모델을 만들고 가중치를 strict하게 복사.
         ref = timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=True,
     @torch.no_grad()
     def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
+        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 지원.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Use torchvision's default pretrained weights for densenet121.
+        # torchvision의 densenet121 기본 pretrained weights를 사용.
         ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
         self.backbone.load_state_dict(ref.state_dict(), strict=True)
     @staticmethod
     def _pool_or_gap(outputs) -> torch.Tensor:
         # Some transformers vision CNNs provide pooler_output explicitly.
+        # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공.
         if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
             x = outputs.pooler_output
             if x.dim() == 2:
             raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
         # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
+        # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용.
         x = outputs.last_hidden_state
         if x.dim() == 4:
             return x.mean(dim=(2, 3))
     def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
         # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
+        # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적 동작을 위해 제한된 모델만 사용.
         rule = self._meta["feat_rule"]
         if rule == "cls":
             # ViT-style: use CLS token embedding from last_hidden_state.
+            # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용.
             return outputs.last_hidden_state[:, 0, :]
         if rule == "pool_or_mean":
             # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
+            # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용.
             if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
                 return outputs.pooler_output
             return outputs.last_hidden_state.mean(dim=1)
         if rule == "pool_or_gap":
             # CNN-style: use pooler_output if present, else GAP over spatial dims.
+            # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용.
             return self._pool_or_gap(outputs)
         if rule == "timm_gap":
             # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
+            # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 변환.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
             if outputs.dim() != 4:
         if rule == "torchvision_densenet_gap":
             # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
+            # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
             if outputs.dim() != 4:
         **kwargs,
     ):
         # Type decides the backbone forward path and output format.
+        # type이 backbone forward 경로 및 출력 포맷을 결정.
         t = self._meta["type"]
         if t == "timm_densenet":
         else:
             # Transformers vision models are called with pixel_values and return ModelOutput.
+            # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환.
             outputs = self.backbone(
                 pixel_values=pixel_values,
                 output_attentions=output_attentions,
             attentions = getattr(outputs, "attentions", None)
         # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
+        # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환.
         logits = self.classifier(feats)
         loss = None
         if labels is not None:
             # Cross entropy expects labels as class indices in [0, num_labels).
+            # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대함.
             loss = F.cross_entropy(logits, labels)
         if not return_dict:
 # ============================================================
 def _set_requires_grad(module: nn.Module, flag: bool):
     # Toggle requires_grad for all parameters in a module.
+    # 모듈의 모든 파라미터에 대해 requires_grad를 토글.
     for p in module.parameters():
         p.requires_grad = flag
 def set_bn_eval(module: nn.Module):
     # Put BatchNorm layers into eval mode to freeze running stats.
+    # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정.
     for m in module.modules():
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
             m.eval()
 def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
     # Stage1: freeze backbone and train only the head.
+    # stage1: backbone을 freeze하고 head만 학습.
     _set_requires_grad(model.backbone, False)
     _set_requires_grad(model.classifier, True)
 def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
     # Stage2: train mode, optionally keeping BN layers in eval for stability.
+    # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있음. (buffer 등을 유지하기 위해)
     model.train()
     meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
     if keep_bn_eval and meta.get("has_bn", False):
 def trainable_summary(model: nn.Module):
     # Print a compact summary of trainable parameters.
+    # 학습 가능 파라미터 요약을 간단히 출력.
     total = sum(p.numel() for p in model.parameters())
     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
     ratio = trainable / total if total > 0 else 0.0
     keep_bn_eval: bool = True,
 ):
     # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
+    # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 구현.
     freeze_backbone(model, freeze_bn=keep_bn_eval)
     n = int(last_n)
     if bb_type == "vit":
         # ViT blocks live under backbone.encoder.layer in the transformers implementation.
+        # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 존재함.
         blocks = list(model.backbone.encoder.layer)
         for blk in blocks[-n:]:
             _set_requires_grad(blk, True)
     if bb_type == "swin":
         # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
+        # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze.
         stages = list(model.backbone.encoder.layers)
         blocks: List[nn.Module] = []
         for st in stages:
     if bb_type == "resnet":
         # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
+        # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze.
         bb = model.backbone
         for name in ("layer1", "layer2", "layer3", "layer4"):
             if not hasattr(bb, name):
     if bb_type == "efficientnet":
         # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
+        # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected EfficientNet structure: missing features")
     if bb_type in ("timm_densenet", "torchvision_densenet"):
         # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
+        # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected DenseNet: missing features")
         def _denselayers(db: nn.Module) -> List[nn.Module]:
             # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
+            # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환.
             return list(db.children())
         blocks: List[nn.Module] = []
 # register
 # -------------------------
 # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
+# from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록.
 BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")

models/microsoft__swin-tiny-patch4-window7-224/ds_proc.py CHANGED Viewed

@@ -4,8 +4,8 @@
 # src/ds_proc.py
 # ============================================================
-# (4) ImageProcessor (AutoImageProcessor integration)
-# (4) ImageProcessor (AutoImageProcessor 연동)
 # ============================================================
 from typing import Any
@@ -27,41 +27,38 @@ except ImportError:
 class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     """
     This processor performs image preprocessing and outputs {"pixel_values": ...}.
-    이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 출력합니다.
     Key requirements:
     핵심 요구사항:
     1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
-       save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 합니다.
     2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
-       런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됩니다.
     3) Runtime objects are rebuilt at init/load time based on backbone meta.
-       런타임 객체는 backbone meta에 따라 init/load 시점에 재구성됩니다.
     4) For reproducibility, use_fast must be explicitly persisted and honored on load.
-       재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 합니다.
     """
     # HF vision models conventionally expect "pixel_values" as the primary input key.
-    # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대합니다.
     model_input_names = ["pixel_values"]
     def __init__(
         self,
         backbone_name_or_path: BackboneID,
-        is_training: bool = False,
         use_fast: bool = False,
         **kwargs,
     ):
         # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
-        # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리합니다.
         super().__init__(**kwargs)
         # Enforce whitelist via BACKBONE_META to keep behavior stable.
-        # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제합니다.
         if backbone_name_or_path not in BACKBONE_META:
             raise ValueError(
                 f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
@@ -69,23 +66,23 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             )
         # Serializable fields only: these should appear in preprocessor_config.json.
-        # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 합니다.
         self.backbone_name_or_path = backbone_name_or_path
         self.is_training = bool(is_training)
         # Reproducibility switch for transformers processors.
-        # transformers processor의 fast/slow 선택을 재현 가능하게 고정합니다.
         self.use_fast = bool(use_fast)
         # Runtime-only fields: must never be serialized.
-        # 런타임 전용 필드: 절대 직렬화되면 안 됩니다.
         self._meta = None
-        self._delegate = None
-        self._timm_transform = None
         self._torchvision_transform = None
         # Build runtime objects according to backbone type.
-        # backbone type에 따라 런타임 객체를 구성합니다.
         self._build_runtime()
     # ============================================================
@@ -95,13 +92,13 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _build_runtime(self):
         """
         Build runtime delegate/transform based on BACKBONE_META["type"].
-        BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성합니다.
         """
         meta = BACKBONE_META[self.backbone_name_or_path]
         self._meta = meta
         # Always reset runtime fields before rebuilding.
-        # 재구성 전 런타임 필드는 항상 초기화합니다.
         self._delegate = None
         self._timm_transform = None
         self._torchvision_transform = None
@@ -110,7 +107,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
         if t == "timm_densenet":
             # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
-            # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용합니다.
             self._timm_transform = self._build_timm_transform(
                 backbone_id=self.backbone_name_or_path,
                 is_training=self.is_training,
@@ -119,17 +116,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
         if t == "torchvision_densenet":
             # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
-            # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요합니다.
             self._torchvision_transform = self._build_torchvision_densenet_transform(
                 is_training=self.is_training
             )
             return
         # Default: transformers backbone delegates to its official AutoImageProcessor.
-        # 기본: transformers 백본은 공식 AutoImageProcessor에 위임합니다.
         #
         # IMPORTANT:
-        # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달합니다.
         self._delegate = AutoImageProcessor.from_pretrained(
             self.backbone_name_or_path,
             use_fast=self.use_fast,
@@ -140,7 +137,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _build_timm_transform(*, backbone_id: str, is_training: bool):
         """
         Create timm transform without storing non-serializable objects in config.
-        비직렬화 객체를 config에 저장하지 않고 timm transform을 생성합니다.
         """
         try:
             import timm
@@ -151,20 +148,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             ) from e
         # We only need model metadata to resolve data config, so pretrained=False is preferred.
-        # data config 추출만 필요하므로 pretrained=False를 우선 사용합니다.
         m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
         dc = resolve_model_data_config(m)
         # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
-        # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환합니다.
-        tfm = create_transform(**dc, is_training=is_training)
         return tfm
     @staticmethod
     def _build_torchvision_densenet_transform(*, is_training: bool):
         """
         Build torchvision preprocessing for DenseNet-121 (224 pipeline).
-        DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성합니다.
         """
         try:
             from torchvision import transforms
@@ -174,28 +171,29 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             ) from e
         # These are the standard ImageNet normalization stats used by torchvision weights.
-        # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계입니다.
         mean = (0.485, 0.456, 0.406)
-        std = (0.229, 0.224, 0.225)
         # Training pipeline typically uses RandomResizedCrop and horizontal flip.
-        # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용합니다.
         if is_training:
             return transforms.Compose(
                 [
-                    transforms.RandomResizedCrop(224),
-                    transforms.RandomHorizontalFlip(p=0.5),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=mean, std=std),
                 ]
             )
         # Inference pipeline typically uses Resize(256) + CenterCrop(224).
-        # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용합니다.
         return transforms.Compose(
             [
                 transforms.Resize(256),
-                transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=mean, std=std),
             ]
@@ -208,24 +206,24 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def to_dict(self) -> dict[str, Any]:
         """
         Return a JSON-serializable dict for preprocessor_config.json.
-        preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환합니다.
         Important: do not leak runtime objects into the serialized dict.
-        중요: 런타임 객체가 직렬화 dict에 섞이면 안 됩니다.
         """
         # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
         # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
         d = super().to_dict()
         # Force minimal stable fields for long-term compatibility.
-        # 장기 호환을 위해 최소 안정 필드를 강제합니다.
-        d["image_processor_type"] = self.__class__.__name__
         d["backbone_name_or_path"] = self.backbone_name_or_path
         d["is_training"] = self.is_training
-        d["use_fast"] = self.use_fast
         # Remove any runtime-only fields defensively.
-        # 런타임 전용 필드는 보수적으로 제거합니다.
         for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
             d.pop(key, None)
@@ -235,14 +233,14 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
         """
         Standard load path used by BaseImageProcessor / AutoImageProcessor.
-        BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로입니다.
         """
         backbone = image_processor_dict.get("backbone_name_or_path", None)
         if backbone is None:
             raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
         is_training = bool(image_processor_dict.get("is_training", False))
-        use_fast = bool(image_processor_dict.get("use_fast", False))
         return cls(
             backbone_name_or_path=backbone,
@@ -255,20 +253,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
         """
         Fallback path if AutoImageProcessor calls class.from_pretrained directly.
-        AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 경로입니다.
         Strategy:
         전략:
         - Read config.json via AutoConfig and recover backbone_name_or_path.
-          AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구합니다.
         """
         # is_training is runtime-only and should default to False for inference/serving.
-        # is_training은 런타임 전용이며 추론/서빙 기본값은 False가 맞습니다.
         #
         # IMPORTANT:
-        # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영합니다.
         use_fast = bool(kwargs.pop("use_fast", False))
         kwargs.pop("trust_remote_code", None)
@@ -289,7 +287,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     @staticmethod
     def _ensure_list(images: Any) -> list[Any]:
         # Normalize scalar image input to a list for uniform processing.
-        # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용합니다.
         if isinstance(images, (list, tuple)):
             return list(images)
         return [images]
@@ -297,7 +295,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     @staticmethod
     def _to_pil_rgb(x: Any):
         # Convert common image inputs into PIL RGB images.
-        # 일반적인 입력을 PIL RGB 이미지로 변환합니다.
         from PIL import Image as PILImage
         if isinstance(x, PILImage.Image):
@@ -314,17 +312,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     ) -> dict[str, Any]:
         """
         Convert images into {"pixel_values": Tensor/ndarray}.
-        이미지를 {"pixel_values": Tensor/ndarray}로 변환합니다.
         """
         images = self._ensure_list(images)
         # Rebuild runtime if needed (e.g., right after deserialization).
-        # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성합니다.
         if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
             self._build_runtime()
         # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
-        # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
         if self._timm_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
@@ -337,7 +335,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             return self._format_return(pixel_values, return_tensors)
         # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
-        # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
         if self._torchvision_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
@@ -350,7 +348,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             return self._format_return(pixel_values, return_tensors)
         # transformers delegate path: rely on official processor behavior.
-        # transformers 위임 경로: 공식 processor 동작을 그대로 사용합니다.
         if self._delegate is None:
             raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
@@ -360,7 +358,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
         """
         Format pixel_values according to return_tensors.
-        return_tensors에 맞춰 pixel_values 반환 포맷을 맞춥니다.
         """
         if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
             return {"pixel_values": pixel_values}
@@ -370,6 +368,6 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
 # Register this processor for AutoImageProcessor resolution.
-# AutoImageProcessor 해석을 위해 이 processor를 등록합니다.
 if __name__ != "__main__":
     BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")

 # src/ds_proc.py
 # ============================================================
+# ImageProcessor (AutoImageProcessor integration)
+# ImageProcessor (AutoImageProcessor 연동)
 # ============================================================
 from typing import Any
 class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     """
     This processor performs image preprocessing and outputs {"pixel_values": ...}.
+    이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 반환함.
     Key requirements:
     핵심 요구사항:
     1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
+       save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 함.
     2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
+       런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됨.
     3) Runtime objects are rebuilt at init/load time based on backbone meta.
+       런타임 객체는 backbone meta에 따라 init/load 시점에 재구성.
     4) For reproducibility, use_fast must be explicitly persisted and honored on load.
+       재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 함.
     """
     # HF vision models conventionally expect "pixel_values" as the primary input key.
+    # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대.
     model_input_names = ["pixel_values"]
     def __init__(
         self,
         backbone_name_or_path: BackboneID,
+        is_training: bool = False, # timm 에서 data augmentation 용.
         use_fast: bool = False,
         **kwargs,
     ):
         # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
+        # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리.
         super().__init__(**kwargs)
         # Enforce whitelist via BACKBONE_META to keep behavior stable.
+        # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제. - fast fail
         if backbone_name_or_path not in BACKBONE_META:
             raise ValueError(
                 f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
             )
         # Serializable fields only: these should appear in preprocessor_config.json.
+        # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 함
         self.backbone_name_or_path = backbone_name_or_path
         self.is_training = bool(is_training)
         # Reproducibility switch for transformers processors.
+        # transformers processor의 fast/slow 선택을 재현 가능하게 고정.
         self.use_fast = bool(use_fast)
         # Runtime-only fields: must never be serialized.
+        # 런타임 전용 필드: 절대 직렬화되면 안 됨.
         self._meta = None
+        self._delegate              = None
+        self._timm_transform        = None
         self._torchvision_transform = None
         # Build runtime objects according to backbone type.
+        # backbone type에 따라 런타임 객체를 구성.
         self._build_runtime()
     # ============================================================
     def _build_runtime(self):
         """
         Build runtime delegate/transform based on BACKBONE_META["type"].
+        BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성.
         """
         meta = BACKBONE_META[self.backbone_name_or_path]
         self._meta = meta
         # Always reset runtime fields before rebuilding.
+        # 재구성 전 런타임 필드는 항상 초기화.
         self._delegate = None
         self._timm_transform = None
         self._torchvision_transform = None
         if t == "timm_densenet":
             # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
+            # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용.
             self._timm_transform = self._build_timm_transform(
                 backbone_id=self.backbone_name_or_path,
                 is_training=self.is_training,
         if t == "torchvision_densenet":
             # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
+            # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요.
             self._torchvision_transform = self._build_torchvision_densenet_transform(
                 is_training=self.is_training
             )
             return
         # Default: transformers backbone delegates to its official AutoImageProcessor.
+        # 기본: transformers 백본은 공식 AutoImageProcessor에 위임.
         #
         # IMPORTANT:
+        # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달.
         self._delegate = AutoImageProcessor.from_pretrained(
             self.backbone_name_or_path,
             use_fast=self.use_fast,
     def _build_timm_transform(*, backbone_id: str, is_training: bool):
         """
         Create timm transform without storing non-serializable objects in config.
+        비직렬화 객체를 config에 저장하지 않고 timm transform을 생성.
         """
         try:
             import timm
             ) from e
         # We only need model metadata to resolve data config, so pretrained=False is preferred.
+        # data config 추출만 필요하므로 pretrained=False를 우선 사용.
         m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
         dc = resolve_model_data_config(m)
         # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
+        # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환.
+        tfm = create_transform(**dc, is_training=is_training) # is_training :Data Aug.
         return tfm
     @staticmethod
     def _build_torchvision_densenet_transform(*, is_training: bool):
         """
         Build torchvision preprocessing for DenseNet-121 (224 pipeline).
+        DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성.
         """
         try:
             from torchvision import transforms
             ) from e
         # These are the standard ImageNet normalization stats used by torchvision weights.
+        # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계.
         mean = (0.485, 0.456, 0.406)
+        std  = (0.229, 0.224, 0.225)
         # Training pipeline typically uses RandomResizedCrop and horizontal flip.
+        # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용.
         if is_training:
             return transforms.Compose(
                 [
+                    # transforms.RandomResizedCrop(224),
+                    # transforms.RandomHorizontalFlip(p=0.5),
+                    transforms.Resize(224),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=mean, std=std),
                 ]
             )
         # Inference pipeline typically uses Resize(256) + CenterCrop(224).
+        # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용.
         return transforms.Compose(
             [
                 transforms.Resize(256),
+                # transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=mean, std=std),
             ]
     def to_dict(self) -> dict[str, Any]:
         """
         Return a JSON-serializable dict for preprocessor_config.json.
+        preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환.
         Important: do not leak runtime objects into the serialized dict.
+        중요: 런타임 객체가 직렬화 dict에 섞이면 안 됨.
         """
         # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
         # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
         d = super().to_dict()
         # Force minimal stable fields for long-term compatibility.
+        # 장기 호환을 위해 최소 안정 필드를 강제로 지정.
+        d["image_processor_type"]  = self.__class__.__name__
         d["backbone_name_or_path"] = self.backbone_name_or_path
         d["is_training"] = self.is_training
+        d["use_fast"]    = self.use_fast
         # Remove any runtime-only fields defensively.
+        # 런타임 전용 필드는 보수적으로 제거.
         for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
             d.pop(key, None)
     def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
         """
         Standard load path used by BaseImageProcessor / AutoImageProcessor.
+        BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로임.
         """
         backbone = image_processor_dict.get("backbone_name_or_path", None)
         if backbone is None:
             raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
         is_training = bool(image_processor_dict.get("is_training", False))
+        use_fast    = bool(image_processor_dict.get("use_fast", False))
         return cls(
             backbone_name_or_path=backbone,
     def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
         """
         Fallback path if AutoImageProcessor calls class.from_pretrained directly.
+        AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 메서드.
         Strategy:
         전략:
         - Read config.json via AutoConfig and recover backbone_name_or_path.
+          AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구.
         """
         # is_training is runtime-only and should default to False for inference/serving.
+        # is_training은 런타임 전용이며 추론/서빙 기본값은 False 임.
         #
         # IMPORTANT:
+        # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영.
         use_fast = bool(kwargs.pop("use_fast", False))
         kwargs.pop("trust_remote_code", None)
     @staticmethod
     def _ensure_list(images: Any) -> list[Any]:
         # Normalize scalar image input to a list for uniform processing.
+        # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용.
         if isinstance(images, (list, tuple)):
             return list(images)
         return [images]
     @staticmethod
     def _to_pil_rgb(x: Any):
         # Convert common image inputs into PIL RGB images.
+        # 일반적인 입력을 PIL RGB 이미지로 변환.
         from PIL import Image as PILImage
         if isinstance(x, PILImage.Image):
     ) -> dict[str, Any]:
         """
         Convert images into {"pixel_values": Tensor/ndarray}.
+        이미지를 {"pixel_values": Tensor/ndarray}로 변환.
         """
         images = self._ensure_list(images)
         # Rebuild runtime if needed (e.g., right after deserialization).
+        # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성.
         if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
             self._build_runtime()
         # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
+        # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
         if self._timm_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
             return self._format_return(pixel_values, return_tensors)
         # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
+        # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
         if self._torchvision_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
             return self._format_return(pixel_values, return_tensors)
         # transformers delegate path: rely on official processor behavior.
+        # transformers 위임 경로: 공식 processor 동작을 그대로 사용.
         if self._delegate is None:
             raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
     def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
         """
         Format pixel_values according to return_tensors.
+        return_tensors에 맞춰 pixel_values 반환 포맷을 변환.
         """
         if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
             return {"pixel_values": pixel_values}
 # Register this processor for AutoImageProcessor resolution.
+# AutoImageProcessor 해석을 위해 이 processor를 등록.
 if __name__ != "__main__":
     BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")

models/microsoft__swin-tiny-patch4-window7-224/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:002362a9ce13fb5833945df3227b93f727613a2630c259525ac9eb3f1e5ecf69
 size 111128348

 version https://git-lfs.github.com/spec/v1
+oid sha256:45ec1d099f3a94683a6ded551823d596bc3cf56dd7d801adee5dc10c5b155e52
 size 111128348

models/timm__densenet121.tv_in1k/config.json CHANGED Viewed

@@ -24,7 +24,7 @@
   "num_labels": 3,
   "transformers_version": "5.1.0",
   "ds_provenance": {
-    "created_at": "20260210_170738",
     "repo_id": "dsaint31/bb_mlp_224",
     "subdir": "models/timm__densenet121.tv_in1k",
     "wrapper_class": "BackboneWithMLPHeadForImageClassification",

   "num_labels": 3,
   "transformers_version": "5.1.0",
   "ds_provenance": {
+    "created_at": "20260212_202546",
     "repo_id": "dsaint31/bb_mlp_224",
     "subdir": "models/timm__densenet121.tv_in1k",
     "wrapper_class": "BackboneWithMLPHeadForImageClassification",

models/timm__densenet121.tv_in1k/ds_model.py CHANGED Viewed

@@ -94,14 +94,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     def __init__(self, config: BackboneMLPHeadConfig):
         # PreTrainedModel expects a config object and stores it internally.
-        # PreTrainedModel은 config 객체를 받아 내부에 저장하는 전제를 가집니다.
         super().__init__(config)
         # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
-        # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않습니다.
         #
         # Note: Transformers may create configs with no args, but models are conventionally created with configs.
-        # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성됩니다.
         if config.backbone_name_or_path is None:
             raise ValueError(
                 "config.backbone_name_or_path is None. "
@@ -109,10 +109,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             )
         # Fail-fast: training/inference requires a positive number of labels.
-        # fail-fast: 학습/추론은 num_labels가 양수여야 합니다.
         #
         # Config may exist in a minimal form for internal serialization paths, but the model should not.
-        # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 그러면 안 됩니다.
         if int(getattr(config, "num_labels", 0)) <= 0:
             raise ValueError(
                 f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
@@ -120,17 +120,17 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             )
         # Meta is a single source of truth for extraction and fine-tuning rules.
-        # meta는 feature 추출 및 미세조정 규칙의 단일 기준입니다.
-                # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
         # Prefer config.backbone_meta to keep Hub runtime self-contained.
         self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
         # Backbone skeleton is always created without pretrained weights.
-        # backbone skeleton은 항상 pretrained weight 없이 생성합니다.
         self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
         # Head shape is driven by meta feat_dim and config.num_labels.
-        # head shape은 meta의 feat_dim과 config.num_labels로 결정됩니다.
         self.classifier = MLPHead(
             in_dim=int(self._meta["feat_dim"]),
             num_labels=int(config.num_labels),
@@ -139,16 +139,20 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         )
         # HF initialization hook, but we override init_weights to initialize head-only.
-        # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화합니다.
         self.post_init()
     def init_weights(self):
         """
         Initialize only the head to avoid touching the backbone skeleton.
-        backbone skeleton을 건드리지 않기 위해 head만 초기화합니다.
         HF's default init may traverse the entire module tree, which is undesirable here.
-        HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서는 부적절합니다.
         """
         if getattr(self, "classifier", None) is not None:
             self.classifier.apply(self._init_weights)
@@ -160,7 +164,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     # ----------------------------
     def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
         # Meta decides which loader path to use.
-        # meta가 어떤 로더 경로를 사용할지 결정합니다.
         meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
         if meta is None:
             raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
@@ -174,14 +178,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             return self._build_torchvision_densenet_skeleton(backbone_id)
         # For transformers backbones: build a random-weight skeleton from config only.
-        # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성합니다.
         bb_cfg = AutoConfig.from_pretrained(backbone_id)
         return AutoModel.from_config(bb_cfg)
     @staticmethod
     def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
         # timm is an optional dependency and should be imported lazily.
-        # timm은 옵션 의존성이므로 지연 import 합니다.
         try:
             import timm
         except Exception as e:
@@ -190,7 +194,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             ) from e
         # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
-        # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0)합니다.
         return timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=False,
@@ -200,12 +204,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @staticmethod
     def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
-        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Build structure only (weights=None) to avoid implicit pretrained loading.
-        # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None)합니다.
         m = tv_models.densenet121(weights=None)
         return m
@@ -222,10 +226,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     ):
         """
         Fresh-start only: inject pretrained backbone weights into the skeleton.
-        fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입합니다.
         Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
-        from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 절대 호출하면 안 됩니다.
         """
         bb = self.config.backbone_name_or_path
         meta = self._meta
@@ -240,7 +244,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             return
         # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
-        # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사합니다.
         ref = AutoModel.from_pretrained(
             bb,
             low_cpu_mem_usage=low_cpu_mem_usage,
@@ -248,18 +252,18 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         )
         # strict=False is used to tolerate harmless key differences across minor versions.
-        # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용합니다.
         self.backbone.load_state_dict(ref.state_dict(), strict=False)
         del ref
     @torch.no_grad()
     def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
         # timm must be present for timm backbones.
-        # timm 백본에는 timm 설치가 필요합니다.
         import timm
         # Create a pretrained reference model and copy its weights strictly.
-        # pretrained reference 모델을 만들고 가중치를 strict하게 복사합니다.
         ref = timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=True,
@@ -272,12 +276,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @torch.no_grad()
     def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
-        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Use torchvision's default pretrained weights for densenet121.
-        # torchvision의 densenet121 기본 pretrained weights를 사용합니다.
         ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
         self.backbone.load_state_dict(ref.state_dict(), strict=True)
@@ -290,7 +294,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @staticmethod
     def _pool_or_gap(outputs) -> torch.Tensor:
         # Some transformers vision CNNs provide pooler_output explicitly.
-        # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공합니다.
         if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
             x = outputs.pooler_output
             if x.dim() == 2:
@@ -300,7 +304,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
         # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
-        # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용합니다.
         x = outputs.last_hidden_state
         if x.dim() == 4:
             return x.mean(dim=(2, 3))
@@ -312,29 +316,29 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
         # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
-        # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적으로 유지되어야 합니다.
         rule = self._meta["feat_rule"]
         if rule == "cls":
             # ViT-style: use CLS token embedding from last_hidden_state.
-            # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용합니다.
             return outputs.last_hidden_state[:, 0, :]
         if rule == "pool_or_mean":
             # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
-            # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용합니다.
             if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
                 return outputs.pooler_output
             return outputs.last_hidden_state.mean(dim=1)
         if rule == "pool_or_gap":
             # CNN-style: use pooler_output if present, else GAP over spatial dims.
-            # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용합니다.
             return self._pool_or_gap(outputs)
         if rule == "timm_gap":
             # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
-            # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 만듭니다.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
             if outputs.dim() != 4:
@@ -343,7 +347,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         if rule == "torchvision_densenet_gap":
             # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
-            # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요합니다.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
             if outputs.dim() != 4:
@@ -362,7 +366,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         **kwargs,
     ):
         # Type decides the backbone forward path and output format.
-        # type이 backbone forward 경로 및 출력 포맷을 결정합니다.
         t = self._meta["type"]
         if t == "timm_densenet":
@@ -394,7 +398,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         else:
             # Transformers vision models are called with pixel_values and return ModelOutput.
-            # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환합니다.
             outputs = self.backbone(
                 pixel_values=pixel_values,
                 output_attentions=output_attentions,
@@ -407,13 +411,13 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             attentions = getattr(outputs, "attentions", None)
         # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
-        # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환합니다.
         logits = self.classifier(feats)
         loss = None
         if labels is not None:
             # Cross entropy expects labels as class indices in [0, num_labels).
-            # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대합니다.
             loss = F.cross_entropy(logits, labels)
         if not return_dict:
@@ -434,14 +438,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
 # ============================================================
 def _set_requires_grad(module: nn.Module, flag: bool):
     # Toggle requires_grad for all parameters in a module.
-    # 모듈의 모든 파라미터에 대해 requires_grad를 토글합니다.
     for p in module.parameters():
         p.requires_grad = flag
 def set_bn_eval(module: nn.Module):
     # Put BatchNorm layers into eval mode to freeze running stats.
-    # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정합니다.
     for m in module.modules():
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
             m.eval()
@@ -449,7 +453,7 @@ def set_bn_eval(module: nn.Module):
 def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
     # Stage1: freeze backbone and train only the head.
-    # stage1: backbone을 freeze하고 head만 학습합니다.
     _set_requires_grad(model.backbone, False)
     _set_requires_grad(model.classifier, True)
@@ -460,7 +464,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
 def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
     # Stage2: train mode, optionally keeping BN layers in eval for stability.
-    # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있습니다.
     model.train()
     meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
     if keep_bn_eval and meta.get("has_bn", False):
@@ -469,7 +473,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
 def trainable_summary(model: nn.Module):
     # Print a compact summary of trainable parameters.
-    # 학습 가능 파라미터 요약을 간단히 출력합니다.
     total = sum(p.numel() for p in model.parameters())
     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
     ratio = trainable / total if total > 0 else 0.0
@@ -483,7 +487,7 @@ def unfreeze_last_stage(
     keep_bn_eval: bool = True,
 ):
     # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
-    # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 ���현합니다.
     freeze_backbone(model, freeze_bn=keep_bn_eval)
     n = int(last_n)
@@ -498,7 +502,7 @@ def unfreeze_last_stage(
     if bb_type == "vit":
         # ViT blocks live under backbone.encoder.layer in the transformers implementation.
-        # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 있습니다.
         blocks = list(model.backbone.encoder.layer)
         for blk in blocks[-n:]:
             _set_requires_grad(blk, True)
@@ -506,7 +510,7 @@ def unfreeze_last_stage(
     if bb_type == "swin":
         # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
-        # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze 합니다.
         stages = list(model.backbone.encoder.layers)
         blocks: List[nn.Module] = []
         for st in stages:
@@ -517,7 +521,7 @@ def unfreeze_last_stage(
     if bb_type == "resnet":
         # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
-        # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze 합니다.
         bb = model.backbone
         for name in ("layer1", "layer2", "layer3", "layer4"):
             if not hasattr(bb, name):
@@ -538,7 +542,7 @@ def unfreeze_last_stage(
     if bb_type == "efficientnet":
         # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
-        # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze 합니다.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected EfficientNet structure: missing features")
@@ -556,7 +560,7 @@ def unfreeze_last_stage(
     if bb_type in ("timm_densenet", "torchvision_densenet"):
         # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
-        # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze 합니다.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected DenseNet: missing features")
@@ -575,7 +579,7 @@ def unfreeze_last_stage(
         def _denselayers(db: nn.Module) -> List[nn.Module]:
             # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
-            # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환합니다.
             return list(db.children())
         blocks: List[nn.Module] = []
@@ -600,5 +604,5 @@ def unfreeze_last_stage(
 # register
 # -------------------------
 # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
-# from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록합니다.
 BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")

     def __init__(self, config: BackboneMLPHeadConfig):
         # PreTrainedModel expects a config object and stores it internally.
+        # PreTrainedModel은 config 객체를 받아 내부에 저장함.
         super().__init__(config)
         # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
+        # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않음 - fast fail.
         #
         # Note: Transformers may create configs with no args, but models are conventionally created with configs.
+        # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성.
         if config.backbone_name_or_path is None:
             raise ValueError(
                 "config.backbone_name_or_path is None. "
             )
         # Fail-fast: training/inference requires a positive number of labels.
+        # fail-fast: 학습/추론은 num_labels가 양수여야 함.
         #
         # Config may exist in a minimal form for internal serialization paths, but the model should not.
+        # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 해당 없음.
         if int(getattr(config, "num_labels", 0)) <= 0:
             raise ValueError(
                 f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
             )
         # Meta is a single source of truth for extraction and fine-tuning rules.
+        # meta는 feature 추출 및 미세조정 규칙의 단일 기준.
+        # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
         # Prefer config.backbone_meta to keep Hub runtime self-contained.
         self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
         # Backbone skeleton is always created without pretrained weights.
+        # backbone skeleton은 항상 pretrained weight 없이 생성.
         self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
         # Head shape is driven by meta feat_dim and config.num_labels.
+        # head shape은 meta의 feat_dim과 config.num_labels로 결정.
         self.classifier = MLPHead(
             in_dim=int(self._meta["feat_dim"]),
             num_labels=int(config.num_labels),
         )
         # HF initialization hook, but we override init_weights to initialize head-only.
+        # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화하도록 변경.
         self.post_init()
     def init_weights(self):
         """
         Initialize only the head to avoid touching the backbone skeleton.
+        backbone skeleton을 건드리지 않기 위해 head만 초기화.
         HF's default init may traverse the entire module tree, which is undesirable here.
+        HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서 그대로 사용하기 부적절.
+        초기 설계에서 __init__ 내부에서 backbone의 가중치 로드를 수행함(편리를 위해).
+        이 경우, HF의 post_init()으로 인해 해당 로드가 취소되는 경우가 존재(timm, torchvision 등의 백본).
+        때문에 이를 오버라이드 하여 classifier만 초기화 하도록 변경함.
         """
         if getattr(self, "classifier", None) is not None:
             self.classifier.apply(self._init_weights)
     # ----------------------------
     def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
         # Meta decides which loader path to use.
+        # meta가 어떤 로더 경로를 사용할지 결정.
         meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
         if meta is None:
             raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
             return self._build_torchvision_densenet_skeleton(backbone_id)
         # For transformers backbones: build a random-weight skeleton from config only.
+        # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성.
         bb_cfg = AutoConfig.from_pretrained(backbone_id)
         return AutoModel.from_config(bb_cfg)
     @staticmethod
     def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
         # timm is an optional dependency and should be imported lazily.
+        # timm은 옵션 의존성이므로 지연 import 수행.
         try:
             import timm
         except Exception as e:
             ) from e
         # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
+        # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0).
         return timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=False,
     @staticmethod
     def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
+        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Build structure only (weights=None) to avoid implicit pretrained loading.
+        # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None).
         m = tv_models.densenet121(weights=None)
         return m
     ):
         """
         Fresh-start only: inject pretrained backbone weights into the skeleton.
+        fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입.
         Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
+        from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 주의할 것.
         """
         bb = self.config.backbone_name_or_path
         meta = self._meta
             return
         # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
+        # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사.
         ref = AutoModel.from_pretrained(
             bb,
             low_cpu_mem_usage=low_cpu_mem_usage,
         )
         # strict=False is used to tolerate harmless key differences across minor versions.
+        # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용.
         self.backbone.load_state_dict(ref.state_dict(), strict=False)
         del ref
     @torch.no_grad()
     def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
         # timm must be present for timm backbones.
+        # timm 백본에��� timm 설치가 필요.
         import timm
         # Create a pretrained reference model and copy its weights strictly.
+        # pretrained reference 모델을 만들고 가중치를 strict하게 복사.
         ref = timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=True,
     @torch.no_grad()
     def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
+        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 지원.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Use torchvision's default pretrained weights for densenet121.
+        # torchvision의 densenet121 기본 pretrained weights를 사용.
         ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
         self.backbone.load_state_dict(ref.state_dict(), strict=True)
     @staticmethod
     def _pool_or_gap(outputs) -> torch.Tensor:
         # Some transformers vision CNNs provide pooler_output explicitly.
+        # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공.
         if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
             x = outputs.pooler_output
             if x.dim() == 2:
             raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
         # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
+        # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용.
         x = outputs.last_hidden_state
         if x.dim() == 4:
             return x.mean(dim=(2, 3))
     def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
         # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
+        # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적 동작을 위해 제한된 모델만 사용.
         rule = self._meta["feat_rule"]
         if rule == "cls":
             # ViT-style: use CLS token embedding from last_hidden_state.
+            # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용.
             return outputs.last_hidden_state[:, 0, :]
         if rule == "pool_or_mean":
             # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
+            # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용.
             if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
                 return outputs.pooler_output
             return outputs.last_hidden_state.mean(dim=1)
         if rule == "pool_or_gap":
             # CNN-style: use pooler_output if present, else GAP over spatial dims.
+            # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용.
             return self._pool_or_gap(outputs)
         if rule == "timm_gap":
             # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
+            # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 변환.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
             if outputs.dim() != 4:
         if rule == "torchvision_densenet_gap":
             # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
+            # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
             if outputs.dim() != 4:
         **kwargs,
     ):
         # Type decides the backbone forward path and output format.
+        # type이 backbone forward 경로 및 출력 포맷을 결정.
         t = self._meta["type"]
         if t == "timm_densenet":
         else:
             # Transformers vision models are called with pixel_values and return ModelOutput.
+            # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환.
             outputs = self.backbone(
                 pixel_values=pixel_values,
                 output_attentions=output_attentions,
             attentions = getattr(outputs, "attentions", None)
         # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
+        # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환.
         logits = self.classifier(feats)
         loss = None
         if labels is not None:
             # Cross entropy expects labels as class indices in [0, num_labels).
+            # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대함.
             loss = F.cross_entropy(logits, labels)
         if not return_dict:
 # ============================================================
 def _set_requires_grad(module: nn.Module, flag: bool):
     # Toggle requires_grad for all parameters in a module.
+    # 모듈의 모든 파라미터에 대해 requires_grad를 토글.
     for p in module.parameters():
         p.requires_grad = flag
 def set_bn_eval(module: nn.Module):
     # Put BatchNorm layers into eval mode to freeze running stats.
+    # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정.
     for m in module.modules():
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
             m.eval()
 def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
     # Stage1: freeze backbone and train only the head.
+    # stage1: backbone을 freeze하고 head만 학습.
     _set_requires_grad(model.backbone, False)
     _set_requires_grad(model.classifier, True)
 def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
     # Stage2: train mode, optionally keeping BN layers in eval for stability.
+    # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있음. (buffer 등을 유지하기 위해)
     model.train()
     meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
     if keep_bn_eval and meta.get("has_bn", False):
 def trainable_summary(model: nn.Module):
     # Print a compact summary of trainable parameters.
+    # 학습 가능 파라미터 요약을 간단히 출력.
     total = sum(p.numel() for p in model.parameters())
     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
     ratio = trainable / total if total > 0 else 0.0
     keep_bn_eval: bool = True,
 ):
     # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
+    # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 구현.
     freeze_backbone(model, freeze_bn=keep_bn_eval)
     n = int(last_n)
     if bb_type == "vit":
         # ViT blocks live under backbone.encoder.layer in the transformers implementation.
+        # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 존재함.
         blocks = list(model.backbone.encoder.layer)
         for blk in blocks[-n:]:
             _set_requires_grad(blk, True)
     if bb_type == "swin":
         # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
+        # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze.
         stages = list(model.backbone.encoder.layers)
         blocks: List[nn.Module] = []
         for st in stages:
     if bb_type == "resnet":
         # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
+        # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze.
         bb = model.backbone
         for name in ("layer1", "layer2", "layer3", "layer4"):
             if not hasattr(bb, name):
     if bb_type == "efficientnet":
         # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
+        # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected EfficientNet structure: missing features")
     if bb_type in ("timm_densenet", "torchvision_densenet"):
         # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
+        # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected DenseNet: missing features")
         def _denselayers(db: nn.Module) -> List[nn.Module]:
             # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
+            # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환.
             return list(db.children())
         blocks: List[nn.Module] = []
 # register
 # -------------------------
 # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
+# from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록.
 BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")

models/timm__densenet121.tv_in1k/ds_proc.py CHANGED Viewed

@@ -4,8 +4,8 @@
 # src/ds_proc.py
 # ============================================================
-# (4) ImageProcessor (AutoImageProcessor integration)
-# (4) ImageProcessor (AutoImageProcessor 연동)
 # ============================================================
 from typing import Any
@@ -27,41 +27,38 @@ except ImportError:
 class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     """
     This processor performs image preprocessing and outputs {"pixel_values": ...}.
-    이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 출력합니다.
     Key requirements:
     핵심 요구사항:
     1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
-       save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 합니다.
     2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
-       런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됩니다.
     3) Runtime objects are rebuilt at init/load time based on backbone meta.
-       런타임 객체는 backbone meta에 따라 init/load 시점에 재구성됩니다.
     4) For reproducibility, use_fast must be explicitly persisted and honored on load.
-       재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 합니다.
     """
     # HF vision models conventionally expect "pixel_values" as the primary input key.
-    # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대합니다.
     model_input_names = ["pixel_values"]
     def __init__(
         self,
         backbone_name_or_path: BackboneID,
-        is_training: bool = False,
         use_fast: bool = False,
         **kwargs,
     ):
         # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
-        # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리합니다.
         super().__init__(**kwargs)
         # Enforce whitelist via BACKBONE_META to keep behavior stable.
-        # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제합니다.
         if backbone_name_or_path not in BACKBONE_META:
             raise ValueError(
                 f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
@@ -69,23 +66,23 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             )
         # Serializable fields only: these should appear in preprocessor_config.json.
-        # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 합니다.
         self.backbone_name_or_path = backbone_name_or_path
         self.is_training = bool(is_training)
         # Reproducibility switch for transformers processors.
-        # transformers processor의 fast/slow 선택을 재현 가능하게 고정합니다.
         self.use_fast = bool(use_fast)
         # Runtime-only fields: must never be serialized.
-        # 런타임 전용 필드: 절대 직렬화되면 안 됩니다.
         self._meta = None
-        self._delegate = None
-        self._timm_transform = None
         self._torchvision_transform = None
         # Build runtime objects according to backbone type.
-        # backbone type에 따라 런타임 객체를 구성합니다.
         self._build_runtime()
     # ============================================================
@@ -95,13 +92,13 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _build_runtime(self):
         """
         Build runtime delegate/transform based on BACKBONE_META["type"].
-        BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성합니다.
         """
         meta = BACKBONE_META[self.backbone_name_or_path]
         self._meta = meta
         # Always reset runtime fields before rebuilding.
-        # 재구성 전 런타임 필드는 항상 초기화합니다.
         self._delegate = None
         self._timm_transform = None
         self._torchvision_transform = None
@@ -110,7 +107,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
         if t == "timm_densenet":
             # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
-            # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용합니다.
             self._timm_transform = self._build_timm_transform(
                 backbone_id=self.backbone_name_or_path,
                 is_training=self.is_training,
@@ -119,17 +116,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
         if t == "torchvision_densenet":
             # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
-            # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요합니다.
             self._torchvision_transform = self._build_torchvision_densenet_transform(
                 is_training=self.is_training
             )
             return
         # Default: transformers backbone delegates to its official AutoImageProcessor.
-        # 기본: transformers 백본은 공식 AutoImageProcessor에 위임합니다.
         #
         # IMPORTANT:
-        # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달합니다.
         self._delegate = AutoImageProcessor.from_pretrained(
             self.backbone_name_or_path,
             use_fast=self.use_fast,
@@ -140,7 +137,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _build_timm_transform(*, backbone_id: str, is_training: bool):
         """
         Create timm transform without storing non-serializable objects in config.
-        비직렬화 객체를 config에 저장하지 않고 timm transform을 생성합니다.
         """
         try:
             import timm
@@ -151,20 +148,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             ) from e
         # We only need model metadata to resolve data config, so pretrained=False is preferred.
-        # data config 추출만 필요하므로 pretrained=False를 우선 사용합니다.
         m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
         dc = resolve_model_data_config(m)
         # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
-        # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환합니다.
-        tfm = create_transform(**dc, is_training=is_training)
         return tfm
     @staticmethod
     def _build_torchvision_densenet_transform(*, is_training: bool):
         """
         Build torchvision preprocessing for DenseNet-121 (224 pipeline).
-        DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성합니다.
         """
         try:
             from torchvision import transforms
@@ -174,28 +171,29 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             ) from e
         # These are the standard ImageNet normalization stats used by torchvision weights.
-        # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계입니다.
         mean = (0.485, 0.456, 0.406)
-        std = (0.229, 0.224, 0.225)
         # Training pipeline typically uses RandomResizedCrop and horizontal flip.
-        # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용합니다.
         if is_training:
             return transforms.Compose(
                 [
-                    transforms.RandomResizedCrop(224),
-                    transforms.RandomHorizontalFlip(p=0.5),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=mean, std=std),
                 ]
             )
         # Inference pipeline typically uses Resize(256) + CenterCrop(224).
-        # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용합니다.
         return transforms.Compose(
             [
                 transforms.Resize(256),
-                transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=mean, std=std),
             ]
@@ -208,24 +206,24 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def to_dict(self) -> dict[str, Any]:
         """
         Return a JSON-serializable dict for preprocessor_config.json.
-        preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환합니다.
         Important: do not leak runtime objects into the serialized dict.
-        중요: 런타임 객체가 직렬화 dict에 섞이면 안 됩니다.
         """
         # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
         # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
         d = super().to_dict()
         # Force minimal stable fields for long-term compatibility.
-        # 장기 호환을 위해 최소 안정 필드를 강제합니다.
-        d["image_processor_type"] = self.__class__.__name__
         d["backbone_name_or_path"] = self.backbone_name_or_path
         d["is_training"] = self.is_training
-        d["use_fast"] = self.use_fast
         # Remove any runtime-only fields defensively.
-        # 런타임 전용 필드는 보수적으로 제거합니다.
         for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
             d.pop(key, None)
@@ -235,14 +233,14 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
         """
         Standard load path used by BaseImageProcessor / AutoImageProcessor.
-        BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로입니다.
         """
         backbone = image_processor_dict.get("backbone_name_or_path", None)
         if backbone is None:
             raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
         is_training = bool(image_processor_dict.get("is_training", False))
-        use_fast = bool(image_processor_dict.get("use_fast", False))
         return cls(
             backbone_name_or_path=backbone,
@@ -255,20 +253,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
         """
         Fallback path if AutoImageProcessor calls class.from_pretrained directly.
-        AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 경로입니다.
         Strategy:
         전략:
         - Read config.json via AutoConfig and recover backbone_name_or_path.
-          AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구합니다.
         """
         # is_training is runtime-only and should default to False for inference/serving.
-        # is_training은 런타임 전용이며 추론/서빙 기본값은 False가 맞습니다.
         #
         # IMPORTANT:
-        # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영합니다.
         use_fast = bool(kwargs.pop("use_fast", False))
         kwargs.pop("trust_remote_code", None)
@@ -289,7 +287,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     @staticmethod
     def _ensure_list(images: Any) -> list[Any]:
         # Normalize scalar image input to a list for uniform processing.
-        # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용합니다.
         if isinstance(images, (list, tuple)):
             return list(images)
         return [images]
@@ -297,7 +295,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     @staticmethod
     def _to_pil_rgb(x: Any):
         # Convert common image inputs into PIL RGB images.
-        # 일반적인 입력을 PIL RGB 이미지로 변환합니다.
         from PIL import Image as PILImage
         if isinstance(x, PILImage.Image):
@@ -314,17 +312,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     ) -> dict[str, Any]:
         """
         Convert images into {"pixel_values": Tensor/ndarray}.
-        이미지를 {"pixel_values": Tensor/ndarray}로 변환합니다.
         """
         images = self._ensure_list(images)
         # Rebuild runtime if needed (e.g., right after deserialization).
-        # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성합니다.
         if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
             self._build_runtime()
         # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
-        # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
         if self._timm_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
@@ -337,7 +335,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             return self._format_return(pixel_values, return_tensors)
         # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
-        # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
         if self._torchvision_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
@@ -350,7 +348,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             return self._format_return(pixel_values, return_tensors)
         # transformers delegate path: rely on official processor behavior.
-        # transformers 위임 경로: 공식 processor 동작을 그대로 사용합니다.
         if self._delegate is None:
             raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
@@ -360,7 +358,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
         """
         Format pixel_values according to return_tensors.
-        return_tensors에 맞춰 pixel_values 반환 포맷을 맞춥니다.
         """
         if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
             return {"pixel_values": pixel_values}
@@ -370,6 +368,6 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
 # Register this processor for AutoImageProcessor resolution.
-# AutoImageProcessor 해석을 위해 이 processor를 등록합니다.
 if __name__ != "__main__":
     BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")

 # src/ds_proc.py
 # ============================================================
+# ImageProcessor (AutoImageProcessor integration)
+# ImageProcessor (AutoImageProcessor 연동)
 # ============================================================
 from typing import Any
 class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     """
     This processor performs image preprocessing and outputs {"pixel_values": ...}.
+    이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 반환함.
     Key requirements:
     핵심 요구사항:
     1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
+       save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 함.
     2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
+       런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됨.
     3) Runtime objects are rebuilt at init/load time based on backbone meta.
+       런타임 객체는 backbone meta에 따라 init/load 시점에 재구성.
     4) For reproducibility, use_fast must be explicitly persisted and honored on load.
+       재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 함.
     """
     # HF vision models conventionally expect "pixel_values" as the primary input key.
+    # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대.
     model_input_names = ["pixel_values"]
     def __init__(
         self,
         backbone_name_or_path: BackboneID,
+        is_training: bool = False, # timm 에서 data augmentation 용.
         use_fast: bool = False,
         **kwargs,
     ):
         # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
+        # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리.
         super().__init__(**kwargs)
         # Enforce whitelist via BACKBONE_META to keep behavior stable.
+        # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제. - fast fail
         if backbone_name_or_path not in BACKBONE_META:
             raise ValueError(
                 f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
             )
         # Serializable fields only: these should appear in preprocessor_config.json.
+        # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 함
         self.backbone_name_or_path = backbone_name_or_path
         self.is_training = bool(is_training)
         # Reproducibility switch for transformers processors.
+        # transformers processor의 fast/slow 선택을 재현 가능하게 고정.
         self.use_fast = bool(use_fast)
         # Runtime-only fields: must never be serialized.
+        # 런타임 전용 필드: 절대 직렬화되면 안 됨.
         self._meta = None
+        self._delegate              = None
+        self._timm_transform        = None
         self._torchvision_transform = None
         # Build runtime objects according to backbone type.
+        # backbone type에 따라 런타임 객체를 구성.
         self._build_runtime()
     # ============================================================
     def _build_runtime(self):
         """
         Build runtime delegate/transform based on BACKBONE_META["type"].
+        BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성.
         """
         meta = BACKBONE_META[self.backbone_name_or_path]
         self._meta = meta
         # Always reset runtime fields before rebuilding.
+        # 재구성 전 런타임 필드는 항상 초기화.
         self._delegate = None
         self._timm_transform = None
         self._torchvision_transform = None
         if t == "timm_densenet":
             # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
+            # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용.
             self._timm_transform = self._build_timm_transform(
                 backbone_id=self.backbone_name_or_path,
                 is_training=self.is_training,
         if t == "torchvision_densenet":
             # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
+            # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요.
             self._torchvision_transform = self._build_torchvision_densenet_transform(
                 is_training=self.is_training
             )
             return
         # Default: transformers backbone delegates to its official AutoImageProcessor.
+        # 기본: transformers 백본은 공식 AutoImageProcessor에 위임.
         #
         # IMPORTANT:
+        # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달.
         self._delegate = AutoImageProcessor.from_pretrained(
             self.backbone_name_or_path,
             use_fast=self.use_fast,
     def _build_timm_transform(*, backbone_id: str, is_training: bool):
         """
         Create timm transform without storing non-serializable objects in config.
+        비직렬화 객체를 config에 저장하지 않고 timm transform을 생성.
         """
         try:
             import timm
             ) from e
         # We only need model metadata to resolve data config, so pretrained=False is preferred.
+        # data config 추출만 필요하므로 pretrained=False를 우선 사용.
         m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
         dc = resolve_model_data_config(m)
         # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
+        # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환.
+        tfm = create_transform(**dc, is_training=is_training) # is_training :Data Aug.
         return tfm
     @staticmethod
     def _build_torchvision_densenet_transform(*, is_training: bool):
         """
         Build torchvision preprocessing for DenseNet-121 (224 pipeline).
+        DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성.
         """
         try:
             from torchvision import transforms
             ) from e
         # These are the standard ImageNet normalization stats used by torchvision weights.
+        # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계.
         mean = (0.485, 0.456, 0.406)
+        std  = (0.229, 0.224, 0.225)
         # Training pipeline typically uses RandomResizedCrop and horizontal flip.
+        # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용.
         if is_training:
             return transforms.Compose(
                 [
+                    # transforms.RandomResizedCrop(224),
+                    # transforms.RandomHorizontalFlip(p=0.5),
+                    transforms.Resize(224),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=mean, std=std),
                 ]
             )
         # Inference pipeline typically uses Resize(256) + CenterCrop(224).
+        # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용.
         return transforms.Compose(
             [
                 transforms.Resize(256),
+                # transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=mean, std=std),
             ]
     def to_dict(self) -> dict[str, Any]:
         """
         Return a JSON-serializable dict for preprocessor_config.json.
+        preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환.
         Important: do not leak runtime objects into the serialized dict.
+        중요: 런타임 객체가 직렬화 dict에 섞이면 안 됨.
         """
         # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
         # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
         d = super().to_dict()
         # Force minimal stable fields for long-term compatibility.
+        # 장기 호환을 위해 최소 안정 필드를 강제로 지정.
+        d["image_processor_type"]  = self.__class__.__name__
         d["backbone_name_or_path"] = self.backbone_name_or_path
         d["is_training"] = self.is_training
+        d["use_fast"]    = self.use_fast
         # Remove any runtime-only fields defensively.
+        # 런타임 전용 필드는 보수적으로 제거.
         for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
             d.pop(key, None)
     def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
         """
         Standard load path used by BaseImageProcessor / AutoImageProcessor.
+        BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로임.
         """
         backbone = image_processor_dict.get("backbone_name_or_path", None)
         if backbone is None:
             raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
         is_training = bool(image_processor_dict.get("is_training", False))
+        use_fast    = bool(image_processor_dict.get("use_fast", False))
         return cls(
             backbone_name_or_path=backbone,
     def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
         """
         Fallback path if AutoImageProcessor calls class.from_pretrained directly.
+        AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 메서드.
         Strategy:
         전략:
         - Read config.json via AutoConfig and recover backbone_name_or_path.
+          AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구.
         """
         # is_training is runtime-only and should default to False for inference/serving.
+        # is_training은 런타임 전용이며 추론/서빙 기본값은 False 임.
         #
         # IMPORTANT:
+        # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영.
         use_fast = bool(kwargs.pop("use_fast", False))
         kwargs.pop("trust_remote_code", None)
     @staticmethod
     def _ensure_list(images: Any) -> list[Any]:
         # Normalize scalar image input to a list for uniform processing.
+        # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용.
         if isinstance(images, (list, tuple)):
             return list(images)
         return [images]
     @staticmethod
     def _to_pil_rgb(x: Any):
         # Convert common image inputs into PIL RGB images.
+        # 일반적인 입력을 PIL RGB 이미지로 변환.
         from PIL import Image as PILImage
         if isinstance(x, PILImage.Image):
     ) -> dict[str, Any]:
         """
         Convert images into {"pixel_values": Tensor/ndarray}.
+        이미지를 {"pixel_values": Tensor/ndarray}로 변환.
         """
         images = self._ensure_list(images)
         # Rebuild runtime if needed (e.g., right after deserialization).
+        # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성.
         if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
             self._build_runtime()
         # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
+        # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
         if self._timm_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
             return self._format_return(pixel_values, return_tensors)
         # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
+        # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
         if self._torchvision_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
             return self._format_return(pixel_values, return_tensors)
         # transformers delegate path: rely on official processor behavior.
+        # transformers 위임 경로: 공식 processor 동작을 그대로 사용.
         if self._delegate is None:
             raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
     def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
         """
         Format pixel_values according to return_tensors.
+        return_tensors에 맞춰 pixel_values 반환 포맷을 변환.
         """
         if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
             return {"pixel_values": pixel_values}
 # Register this processor for AutoImageProcessor resolution.
+# AutoImageProcessor 해석을 위해 이 processor를 등록.
 if __name__ != "__main__":
     BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")

models/timm__densenet121.tv_in1k/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41040153bbc2066a084f36774b3ce20a45d9adf38ade5d33ae544c53d69350e6
 size 29293620

 version https://git-lfs.github.com/spec/v1
+oid sha256:0108b6737a776209aa44a8119bbb5034c69205bcd45dee8c5f0e3218eea1cf3a
 size 29293620

models/torchvision__densenet121/config.json CHANGED Viewed

@@ -24,7 +24,7 @@
   "num_labels": 3,
   "transformers_version": "5.1.0",
   "ds_provenance": {
-    "created_at": "20260210_170738",
     "repo_id": "dsaint31/bb_mlp_224",
     "subdir": "models/torchvision__densenet121",
     "wrapper_class": "BackboneWithMLPHeadForImageClassification",

   "num_labels": 3,
   "transformers_version": "5.1.0",
   "ds_provenance": {
+    "created_at": "20260212_202546",
     "repo_id": "dsaint31/bb_mlp_224",
     "subdir": "models/torchvision__densenet121",
     "wrapper_class": "BackboneWithMLPHeadForImageClassification",

models/torchvision__densenet121/ds_model.py CHANGED Viewed

@@ -94,14 +94,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     def __init__(self, config: BackboneMLPHeadConfig):
         # PreTrainedModel expects a config object and stores it internally.
-        # PreTrainedModel은 config 객체를 받아 내부에 저장하는 전제를 가집니다.
         super().__init__(config)
         # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
-        # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않습니다.
         #
         # Note: Transformers may create configs with no args, but models are conventionally created with configs.
-        # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성됩니다.
         if config.backbone_name_or_path is None:
             raise ValueError(
                 "config.backbone_name_or_path is None. "
@@ -109,10 +109,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             )
         # Fail-fast: training/inference requires a positive number of labels.
-        # fail-fast: 학습/추론은 num_labels가 양수여야 합니다.
         #
         # Config may exist in a minimal form for internal serialization paths, but the model should not.
-        # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 그러면 안 됩니다.
         if int(getattr(config, "num_labels", 0)) <= 0:
             raise ValueError(
                 f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
@@ -120,17 +120,17 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             )
         # Meta is a single source of truth for extraction and fine-tuning rules.
-        # meta는 feature 추출 및 미세조정 규칙의 단일 기준입니다.
-                # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
         # Prefer config.backbone_meta to keep Hub runtime self-contained.
         self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
         # Backbone skeleton is always created without pretrained weights.
-        # backbone skeleton은 항상 pretrained weight 없이 생성합니다.
         self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
         # Head shape is driven by meta feat_dim and config.num_labels.
-        # head shape은 meta의 feat_dim과 config.num_labels로 결정됩니다.
         self.classifier = MLPHead(
             in_dim=int(self._meta["feat_dim"]),
             num_labels=int(config.num_labels),
@@ -139,16 +139,20 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         )
         # HF initialization hook, but we override init_weights to initialize head-only.
-        # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화합니다.
         self.post_init()
     def init_weights(self):
         """
         Initialize only the head to avoid touching the backbone skeleton.
-        backbone skeleton을 건드리지 않기 위해 head만 초기화합니다.
         HF's default init may traverse the entire module tree, which is undesirable here.
-        HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서는 부적절합니다.
         """
         if getattr(self, "classifier", None) is not None:
             self.classifier.apply(self._init_weights)
@@ -160,7 +164,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     # ----------------------------
     def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
         # Meta decides which loader path to use.
-        # meta가 어떤 로더 경로를 사용할지 결정합니다.
         meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
         if meta is None:
             raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
@@ -174,14 +178,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             return self._build_torchvision_densenet_skeleton(backbone_id)
         # For transformers backbones: build a random-weight skeleton from config only.
-        # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성합니다.
         bb_cfg = AutoConfig.from_pretrained(backbone_id)
         return AutoModel.from_config(bb_cfg)
     @staticmethod
     def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
         # timm is an optional dependency and should be imported lazily.
-        # timm은 옵션 의존성이므로 지연 import 합니다.
         try:
             import timm
         except Exception as e:
@@ -190,7 +194,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             ) from e
         # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
-        # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0)합니다.
         return timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=False,
@@ -200,12 +204,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @staticmethod
     def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
-        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Build structure only (weights=None) to avoid implicit pretrained loading.
-        # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None)합니다.
         m = tv_models.densenet121(weights=None)
         return m
@@ -222,10 +226,10 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     ):
         """
         Fresh-start only: inject pretrained backbone weights into the skeleton.
-        fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입합니다.
         Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
-        from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 절대 호출하면 안 됩니다.
         """
         bb = self.config.backbone_name_or_path
         meta = self._meta
@@ -240,7 +244,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             return
         # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
-        # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사합니다.
         ref = AutoModel.from_pretrained(
             bb,
             low_cpu_mem_usage=low_cpu_mem_usage,
@@ -248,18 +252,18 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         )
         # strict=False is used to tolerate harmless key differences across minor versions.
-        # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용합니다.
         self.backbone.load_state_dict(ref.state_dict(), strict=False)
         del ref
     @torch.no_grad()
     def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
         # timm must be present for timm backbones.
-        # timm 백본에는 timm 설치가 필요합니다.
         import timm
         # Create a pretrained reference model and copy its weights strictly.
-        # pretrained reference 모델을 만들고 가중치를 strict하게 복사합니다.
         ref = timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=True,
@@ -272,12 +276,12 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @torch.no_grad()
     def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
-        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원합니다.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Use torchvision's default pretrained weights for densenet121.
-        # torchvision의 densenet121 기본 pretrained weights를 사용합니다.
         ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
         self.backbone.load_state_dict(ref.state_dict(), strict=True)
@@ -290,7 +294,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     @staticmethod
     def _pool_or_gap(outputs) -> torch.Tensor:
         # Some transformers vision CNNs provide pooler_output explicitly.
-        # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공합니다.
         if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
             x = outputs.pooler_output
             if x.dim() == 2:
@@ -300,7 +304,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
         # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
-        # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용합니다.
         x = outputs.last_hidden_state
         if x.dim() == 4:
             return x.mean(dim=(2, 3))
@@ -312,29 +316,29 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
     def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
         # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
-        # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적으로 유지되어야 합니다.
         rule = self._meta["feat_rule"]
         if rule == "cls":
             # ViT-style: use CLS token embedding from last_hidden_state.
-            # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용합니다.
             return outputs.last_hidden_state[:, 0, :]
         if rule == "pool_or_mean":
             # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
-            # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용합니다.
             if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
                 return outputs.pooler_output
             return outputs.last_hidden_state.mean(dim=1)
         if rule == "pool_or_gap":
             # CNN-style: use pooler_output if present, else GAP over spatial dims.
-            # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용합니다.
             return self._pool_or_gap(outputs)
         if rule == "timm_gap":
             # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
-            # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 만듭니다.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
             if outputs.dim() != 4:
@@ -343,7 +347,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         if rule == "torchvision_densenet_gap":
             # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
-            # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요합니다.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
             if outputs.dim() != 4:
@@ -362,7 +366,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         **kwargs,
     ):
         # Type decides the backbone forward path and output format.
-        # type이 backbone forward 경로 및 출력 포맷을 결정합니다.
         t = self._meta["type"]
         if t == "timm_densenet":
@@ -394,7 +398,7 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
         else:
             # Transformers vision models are called with pixel_values and return ModelOutput.
-            # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환합니다.
             outputs = self.backbone(
                 pixel_values=pixel_values,
                 output_attentions=output_attentions,
@@ -407,13 +411,13 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
             attentions = getattr(outputs, "attentions", None)
         # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
-        # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환합니다.
         logits = self.classifier(feats)
         loss = None
         if labels is not None:
             # Cross entropy expects labels as class indices in [0, num_labels).
-            # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대합니다.
             loss = F.cross_entropy(logits, labels)
         if not return_dict:
@@ -434,14 +438,14 @@ class BackboneWithMLPHeadForImageClassification(PreTrainedModel):
 # ============================================================
 def _set_requires_grad(module: nn.Module, flag: bool):
     # Toggle requires_grad for all parameters in a module.
-    # 모듈의 모든 파라미터에 대해 requires_grad를 토글합니다.
     for p in module.parameters():
         p.requires_grad = flag
 def set_bn_eval(module: nn.Module):
     # Put BatchNorm layers into eval mode to freeze running stats.
-    # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정합니다.
     for m in module.modules():
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
             m.eval()
@@ -449,7 +453,7 @@ def set_bn_eval(module: nn.Module):
 def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
     # Stage1: freeze backbone and train only the head.
-    # stage1: backbone을 freeze하고 head만 학습합니다.
     _set_requires_grad(model.backbone, False)
     _set_requires_grad(model.classifier, True)
@@ -460,7 +464,7 @@ def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn:
 def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
     # Stage2: train mode, optionally keeping BN layers in eval for stability.
-    # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있습니다.
     model.train()
     meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
     if keep_bn_eval and meta.get("has_bn", False):
@@ -469,7 +473,7 @@ def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_b
 def trainable_summary(model: nn.Module):
     # Print a compact summary of trainable parameters.
-    # 학습 가능 파라미터 요약을 간단히 출력합니다.
     total = sum(p.numel() for p in model.parameters())
     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
     ratio = trainable / total if total > 0 else 0.0
@@ -483,7 +487,7 @@ def unfreeze_last_stage(
     keep_bn_eval: bool = True,
 ):
     # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
-    # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 ���현합니다.
     freeze_backbone(model, freeze_bn=keep_bn_eval)
     n = int(last_n)
@@ -498,7 +502,7 @@ def unfreeze_last_stage(
     if bb_type == "vit":
         # ViT blocks live under backbone.encoder.layer in the transformers implementation.
-        # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 있습니다.
         blocks = list(model.backbone.encoder.layer)
         for blk in blocks[-n:]:
             _set_requires_grad(blk, True)
@@ -506,7 +510,7 @@ def unfreeze_last_stage(
     if bb_type == "swin":
         # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
-        # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze 합니다.
         stages = list(model.backbone.encoder.layers)
         blocks: List[nn.Module] = []
         for st in stages:
@@ -517,7 +521,7 @@ def unfreeze_last_stage(
     if bb_type == "resnet":
         # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
-        # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze 합니다.
         bb = model.backbone
         for name in ("layer1", "layer2", "layer3", "layer4"):
             if not hasattr(bb, name):
@@ -538,7 +542,7 @@ def unfreeze_last_stage(
     if bb_type == "efficientnet":
         # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
-        # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze 합니다.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected EfficientNet structure: missing features")
@@ -556,7 +560,7 @@ def unfreeze_last_stage(
     if bb_type in ("timm_densenet", "torchvision_densenet"):
         # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
-        # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze 합니다.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected DenseNet: missing features")
@@ -575,7 +579,7 @@ def unfreeze_last_stage(
         def _denselayers(db: nn.Module) -> List[nn.Module]:
             # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
-            # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환합니다.
             return list(db.children())
         blocks: List[nn.Module] = []
@@ -600,5 +604,5 @@ def unfreeze_last_stage(
 # register
 # -------------------------
 # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
-# from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록합니다.
 BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")

     def __init__(self, config: BackboneMLPHeadConfig):
         # PreTrainedModel expects a config object and stores it internally.
+        # PreTrainedModel은 config 객체를 받아 내부에 저장함.
         super().__init__(config)
         # Fail-fast: the model is not meant to be instantiated without a valid backbone id.
+        # fail-fast: 유효한 backbone id 없이 모델을 만드는 사용 시나리오는 허용하지 않음 - fast fail.
         #
         # Note: Transformers may create configs with no args, but models are conventionally created with configs.
+        # 참고: Transformers는 config 무인자 생성이 있을 수 있으나, 모델은 관례적으로 config를 받아 생성.
         if config.backbone_name_or_path is None:
             raise ValueError(
                 "config.backbone_name_or_path is None. "
             )
         # Fail-fast: training/inference requires a positive number of labels.
+        # fail-fast: 학습/추론은 num_labels가 양수여야 함.
         #
         # Config may exist in a minimal form for internal serialization paths, but the model should not.
+        # config는 내부 직렬화 경로에서 최소 형태로 존재할 수 있으나 모델은 해당 없음.
         if int(getattr(config, "num_labels", 0)) <= 0:
             raise ValueError(
                 f"config.num_labels must be > 0, got {getattr(config, 'num_labels', None)}. "
             )
         # Meta is a single source of truth for extraction and fine-tuning rules.
+        # meta는 feature 추출 및 미세조정 규칙의 단일 기준.
+        # Resolve backbone meta from config (preferred) or fallback table (for backward compatibility).
         # Prefer config.backbone_meta to keep Hub runtime self-contained.
         self._meta = _resolve_backbone_meta(config, fallback_table=BACKBONE_META)
         # Backbone skeleton is always created without pretrained weights.
+        # backbone skeleton은 항상 pretrained weight 없이 생성.
         self.backbone = self._build_backbone_skeleton(config.backbone_name_or_path)
         # Head shape is driven by meta feat_dim and config.num_labels.
+        # head shape은 meta의 feat_dim과 config.num_labels로 결정.
         self.classifier = MLPHead(
             in_dim=int(self._meta["feat_dim"]),
             num_labels=int(config.num_labels),
         )
         # HF initialization hook, but we override init_weights to initialize head-only.
+        # HF 초기화 훅이지만 init_weights를 override하여 head만 초기화하도록 변경.
         self.post_init()
     def init_weights(self):
         """
         Initialize only the head to avoid touching the backbone skeleton.
+        backbone skeleton을 건드리지 않기 위해 head만 초기화.
         HF's default init may traverse the entire module tree, which is undesirable here.
+        HF 기본 init은 전체 모듈 트리를 순회할 수 있어 여기서 그대로 사용하기 부적절.
+        초기 설계에서 __init__ 내부에서 backbone의 가중치 로드를 수행함(편리를 위해).
+        이 경우, HF의 post_init()으로 인해 해당 로드가 취소되는 경우가 존재(timm, torchvision 등의 백본).
+        때문에 이를 오버라이드 하여 classifier만 초기화 하도록 변경함.
         """
         if getattr(self, "classifier", None) is not None:
             self.classifier.apply(self._init_weights)
     # ----------------------------
     def _build_backbone_skeleton(self, backbone_id: str) -> nn.Module:
         # Meta decides which loader path to use.
+        # meta가 어떤 로더 경로를 사용할지 결정.
         meta = self._meta if backbone_id == self.config.backbone_name_or_path else BACKBONE_META.get(backbone_id)
         if meta is None:
             raise KeyError(f"Unknown backbone_id={backbone_id}. Provide backbone_meta in config or extend BACKBONE_META.")
             return self._build_torchvision_densenet_skeleton(backbone_id)
         # For transformers backbones: build a random-weight skeleton from config only.
+        # transformers 백본: config로부터 랜덤 초기화 skeleton만 생성.
         bb_cfg = AutoConfig.from_pretrained(backbone_id)
         return AutoModel.from_config(bb_cfg)
     @staticmethod
     def _build_timm_densenet_skeleton(hf_repo_id: str) -> nn.Module:
         # timm is an optional dependency and should be imported lazily.
+        # timm은 옵션 의존성이므로 지연 import 수행.
         try:
             import timm
         except Exception as e:
             ) from e
         # Build structure only (pretrained=False) and remove classifier head (num_classes=0).
+        # 구조만 생성(pretrained=False)하고 분류기 head는 제거(num_classes=0).
         return timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=False,
     @staticmethod
     def _build_torchvision_densenet_skeleton(model_id: str) -> nn.Module:
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
+        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 의도적으로 지원.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Build structure only (weights=None) to avoid implicit pretrained loading.
+        # implicit pretrained 로드를 피하기 위해 구조만 생성(weights=None).
         m = tv_models.densenet121(weights=None)
         return m
     ):
         """
         Fresh-start only: inject pretrained backbone weights into the skeleton.
+        fresh-start 전용: skeleton backbone에 pretrained 가중치를 주입.
         Do NOT call this after from_pretrained() because it would overwrite checkpoint weights.
+        from_pretrained() 이후 호출하면 체크포인트 가중치를 덮어쓰므로 주의할 것.
         """
         bb = self.config.backbone_name_or_path
         meta = self._meta
             return
         # For transformers backbones, load a reference pretrained model and copy weights into our skeleton.
+        # transformers 백본은 reference pretrained 모델을 로드한 뒤 skeleton에 가중치를 복사.
         ref = AutoModel.from_pretrained(
             bb,
             low_cpu_mem_usage=low_cpu_mem_usage,
         )
         # strict=False is used to tolerate harmless key differences across minor versions.
+        # strict=False는 마이너 버전 차이로 인한 무해한 키 차이를 허용하기 위해 사용.
         self.backbone.load_state_dict(ref.state_dict(), strict=False)
         del ref
     @torch.no_grad()
     def _load_timm_pretrained_into_skeleton_(self, hf_repo_id: str):
         # timm must be present for timm backbones.
+        # timm 백본에��� timm 설치가 필요.
         import timm
         # Create a pretrained reference model and copy its weights strictly.
+        # pretrained reference 모델을 만들고 가중치를 strict하게 복사.
         ref = timm.create_model(
             f"hf_hub:{hf_repo_id}",
             pretrained=True,
     @torch.no_grad()
     def _load_torchvision_pretrained_into_skeleton_(self, model_id: str):
         # This project intentionally supports only torchvision/densenet121 in the 224 whitelist.
+        # 이 프로젝트는 224 화이트리스트에서 torchvision/densenet121만 지원.
         if model_id != "torchvision/densenet121":
             raise ValueError(f"Unsupported torchvision DenseNet id (224 whitelist only): {model_id}")
         # Use torchvision's default pretrained weights for densenet121.
+        # torchvision의 densenet121 기본 pretrained weights를 사용.
         ref = tv_models.densenet121(weights=tv_models.DenseNet121_Weights.DEFAULT).eval()
         self.backbone.load_state_dict(ref.state_dict(), strict=True)
     @staticmethod
     def _pool_or_gap(outputs) -> torch.Tensor:
         # Some transformers vision CNNs provide pooler_output explicitly.
+        # 일부 transformers vision CNN은 pooler_output을 명시적으로 제공.
         if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
             x = outputs.pooler_output
             if x.dim() == 2:
             raise RuntimeError(f"Unexpected pooler_output shape: {tuple(x.shape)}")
         # Otherwise we expect a CNN-style last_hidden_state=(B,C,H,W) and apply GAP.
+        # 그렇지 않으면 CNN 스타일 last_hidden_state=(B,C,H,W)를 기대하고 GAP을 적용.
         x = outputs.last_hidden_state
         if x.dim() == 4:
             return x.mean(dim=(2, 3))
     def _extract_features(self, outputs, pixel_values: Optional[torch.Tensor] = None) -> torch.Tensor:
         # Feature rule is defined by BACKBONE_META and must remain stable across saves/loads.
+        # feature 규칙은 BACKBONE_META로 정의되며 저장/로드 간 안정적 동작을 위해 제한된 모델만 사용.
         rule = self._meta["feat_rule"]
         if rule == "cls":
             # ViT-style: use CLS token embedding from last_hidden_state.
+            # ViT 스타일: last_hidden_state에서 CLS 토큰 임베딩을 사용.
             return outputs.last_hidden_state[:, 0, :]
         if rule == "pool_or_mean":
             # Swin-style: prefer pooler_output if present, else mean-pool over tokens.
+            # Swin 스타일: pooler_output이 있으면 우선 사용하고, 없으면 토큰 평균 풀링을 사용.
             if hasattr(outputs, "pooler_output") and outputs.pooler_output is not None:
                 return outputs.pooler_output
             return outputs.last_hidden_state.mean(dim=1)
         if rule == "pool_or_gap":
             # CNN-style: use pooler_output if present, else GAP over spatial dims.
+            # CNN 스타일: pooler_output이 있으면 사용하고, 없으면 공간 차원 GAP을 사용.
             return self._pool_or_gap(outputs)
         if rule == "timm_gap":
             # timm forward_features returns a feature map (B,C,H,W) which we GAP to (B,C).
+            # timm forward_features는 (B,C,H,W) feature map을 반환하며 이를 GAP으로 (B,C)로 변환.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"timm_gap expects Tensor features, got {type(outputs)}")
             if outputs.dim() != 4:
         if rule == "torchvision_densenet_gap":
             # torchvision DenseNet features are feature maps (B,C,H,W) and require GAP.
+            # torchvision DenseNet features는 (B,C,H,W) feature map이며 GAP이 필요.
             if not isinstance(outputs, torch.Tensor):
                 raise TypeError(f"torchvision_densenet_gap expects Tensor, got {type(outputs)}")
             if outputs.dim() != 4:
         **kwargs,
     ):
         # Type decides the backbone forward path and output format.
+        # type이 backbone forward 경로 및 출력 포맷을 결정.
         t = self._meta["type"]
         if t == "timm_densenet":
         else:
             # Transformers vision models are called with pixel_values and return ModelOutput.
+            # transformers vision 모델은 pixel_values로 호출되며 ModelOutput을 반환.
             outputs = self.backbone(
                 pixel_values=pixel_values,
                 output_attentions=output_attentions,
             attentions = getattr(outputs, "attentions", None)
         # Classifier consumes (B, feat_dim) and returns logits (B, num_labels).
+        # classifier는 (B, feat_dim)을 받아 logits (B, num_labels)를 반환.
         logits = self.classifier(feats)
         loss = None
         if labels is not None:
             # Cross entropy expects labels as class indices in [0, num_labels).
+            # cross entropy는 labels가 [0, num_labels) 범위의 class index이길 기대함.
             loss = F.cross_entropy(logits, labels)
         if not return_dict:
 # ============================================================
 def _set_requires_grad(module: nn.Module, flag: bool):
     # Toggle requires_grad for all parameters in a module.
+    # 모듈의 모든 파라미터에 대해 requires_grad를 토글.
     for p in module.parameters():
         p.requires_grad = flag
 def set_bn_eval(module: nn.Module):
     # Put BatchNorm layers into eval mode to freeze running stats.
+    # BatchNorm 레이어를 eval 모드로 두어 running stats를 고정.
     for m in module.modules():
         if isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.SyncBatchNorm)):
             m.eval()
 def freeze_backbone(model: BackboneWithMLPHeadForImageClassification, freeze_bn: bool = True):
     # Stage1: freeze backbone and train only the head.
+    # stage1: backbone을 freeze하고 head만 학습.
     _set_requires_grad(model.backbone, False)
     _set_requires_grad(model.classifier, True)
 def finetune_train_mode(model: BackboneWithMLPHeadForImageClassification, keep_bn_eval: bool = True):
     # Stage2: train mode, optionally keeping BN layers in eval for stability.
+    # stage2: train 모드로 두되 안정성을 위해 BN을 eval로 유지할 수 있음. (buffer 등을 유지하기 위해)
     model.train()
     meta = getattr(model, "_meta", None) or getattr(model.config, "backbone_meta", None)
     if keep_bn_eval and meta.get("has_bn", False):
 def trainable_summary(model: nn.Module):
     # Print a compact summary of trainable parameters.
+    # 학습 가능 파라미터 요약을 간단히 출력.
     total = sum(p.numel() for p in model.parameters())
     trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
     ratio = trainable / total if total > 0 else 0.0
     keep_bn_eval: bool = True,
 ):
     # This utility implements BACKBONE_META['unfreeze']=="last_n" across supported backbones.
+    # 이 유틸은 지원 백본들에 대해 BACKBONE_META['unfreeze']=="last_n"을 구현.
     freeze_backbone(model, freeze_bn=keep_bn_eval)
     n = int(last_n)
     if bb_type == "vit":
         # ViT blocks live under backbone.encoder.layer in the transformers implementation.
+        # ViT 블록은 transformers 구현에서 backbone.encoder.layer 아래에 존재함.
         blocks = list(model.backbone.encoder.layer)
         for blk in blocks[-n:]:
             _set_requires_grad(blk, True)
     if bb_type == "swin":
         # Swin blocks are nested by stages and blocks; we flatten and unfreeze last n blocks.
+        # Swin 블록은 stage와 block으로 중첩되어 있어 펼친 후 마지막 n개를 unfreeze.
         stages = list(model.backbone.encoder.layers)
         blocks: List[nn.Module] = []
         for st in stages:
     if bb_type == "resnet":
         # ResNet uses layer1..layer4 stages; we unfreeze at block granularity.
+        # ResNet은 layer1..layer4 stage를 사용하며 block 단위로 unfreeze.
         bb = model.backbone
         for name in ("layer1", "layer2", "layer3", "layer4"):
             if not hasattr(bb, name):
     if bb_type == "efficientnet":
         # EfficientNet in transformers exposes features; we unfreeze from the tail blocks.
+        # transformers EfficientNet은 features를 노출하며 뒤쪽 블록부터 unfreeze.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected EfficientNet structure: missing features")
     if bb_type in ("timm_densenet", "torchvision_densenet"):
         # DenseNet exposes a .features module with named blocks; we unfreeze last n submodules.
+        # DenseNet은 .features 모듈에 블록들이 이름으로 존재하며 마지막 n개 서브모듈을 unfreeze.
         bb = model.backbone
         if not hasattr(bb, "features"):
             raise RuntimeError("Unexpected DenseNet: missing features")
         def _denselayers(db: nn.Module) -> List[nn.Module]:
             # Dense blocks contain multiple DenseLayer children; we return them for fine-grained unfreezing.
+            # denseblock은 DenseLayer 자식들을 가지므로 세밀한 unfreeze를 위해 이를 반환.
             return list(db.children())
         blocks: List[nn.Module] = []
 # register
 # -------------------------
 # Register for AutoModelForImageClassification so from_pretrained can resolve this custom class.
+# from_pretrained가 이 커스텀 클래스를 해석할 수 있도록 AutoModelForImageClassification에 등록.
 BackboneWithMLPHeadForImageClassification.register_for_auto_class("AutoModelForImageClassification")

models/torchvision__densenet121/ds_proc.py CHANGED Viewed

@@ -4,8 +4,8 @@
 # src/ds_proc.py
 # ============================================================
-# (4) ImageProcessor (AutoImageProcessor integration)
-# (4) ImageProcessor (AutoImageProcessor 연동)
 # ============================================================
 from typing import Any
@@ -27,41 +27,38 @@ except ImportError:
 class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     """
     This processor performs image preprocessing and outputs {"pixel_values": ...}.
-    이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 출력합니다.
     Key requirements:
     핵심 요구사항:
     1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
-       save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 합니다.
     2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
-       런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됩니다.
     3) Runtime objects are rebuilt at init/load time based on backbone meta.
-       런타임 객체는 backbone meta에 따라 init/load 시점에 재구성됩니다.
     4) For reproducibility, use_fast must be explicitly persisted and honored on load.
-       재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 합니다.
     """
     # HF vision models conventionally expect "pixel_values" as the primary input key.
-    # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대합니다.
     model_input_names = ["pixel_values"]
     def __init__(
         self,
         backbone_name_or_path: BackboneID,
-        is_training: bool = False,
         use_fast: bool = False,
         **kwargs,
     ):
         # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
-        # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리합니다.
         super().__init__(**kwargs)
         # Enforce whitelist via BACKBONE_META to keep behavior stable.
-        # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제합니다.
         if backbone_name_or_path not in BACKBONE_META:
             raise ValueError(
                 f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
@@ -69,23 +66,23 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             )
         # Serializable fields only: these should appear in preprocessor_config.json.
-        # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 합니다.
         self.backbone_name_or_path = backbone_name_or_path
         self.is_training = bool(is_training)
         # Reproducibility switch for transformers processors.
-        # transformers processor의 fast/slow 선택을 재현 가능하게 고정합니다.
         self.use_fast = bool(use_fast)
         # Runtime-only fields: must never be serialized.
-        # 런타임 전용 필드: 절대 직렬화되면 안 됩니다.
         self._meta = None
-        self._delegate = None
-        self._timm_transform = None
         self._torchvision_transform = None
         # Build runtime objects according to backbone type.
-        # backbone type에 따라 런타임 객체를 구성합니다.
         self._build_runtime()
     # ============================================================
@@ -95,13 +92,13 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _build_runtime(self):
         """
         Build runtime delegate/transform based on BACKBONE_META["type"].
-        BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성합니다.
         """
         meta = BACKBONE_META[self.backbone_name_or_path]
         self._meta = meta
         # Always reset runtime fields before rebuilding.
-        # 재구성 전 런타임 필드는 항상 초기화합니다.
         self._delegate = None
         self._timm_transform = None
         self._torchvision_transform = None
@@ -110,7 +107,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
         if t == "timm_densenet":
             # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
-            # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용합니다.
             self._timm_transform = self._build_timm_transform(
                 backbone_id=self.backbone_name_or_path,
                 is_training=self.is_training,
@@ -119,17 +116,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
         if t == "torchvision_densenet":
             # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
-            # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요합니다.
             self._torchvision_transform = self._build_torchvision_densenet_transform(
                 is_training=self.is_training
             )
             return
         # Default: transformers backbone delegates to its official AutoImageProcessor.
-        # 기본: transformers 백본은 공식 AutoImageProcessor에 위임합니다.
         #
         # IMPORTANT:
-        # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달합니다.
         self._delegate = AutoImageProcessor.from_pretrained(
             self.backbone_name_or_path,
             use_fast=self.use_fast,
@@ -140,7 +137,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _build_timm_transform(*, backbone_id: str, is_training: bool):
         """
         Create timm transform without storing non-serializable objects in config.
-        비직렬화 객체를 config에 저장하지 않고 timm transform을 생성합니다.
         """
         try:
             import timm
@@ -151,20 +148,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             ) from e
         # We only need model metadata to resolve data config, so pretrained=False is preferred.
-        # data config 추출만 필요하므로 pretrained=False를 우선 사용합니다.
         m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
         dc = resolve_model_data_config(m)
         # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
-        # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환합니다.
-        tfm = create_transform(**dc, is_training=is_training)
         return tfm
     @staticmethod
     def _build_torchvision_densenet_transform(*, is_training: bool):
         """
         Build torchvision preprocessing for DenseNet-121 (224 pipeline).
-        DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성합니다.
         """
         try:
             from torchvision import transforms
@@ -174,28 +171,29 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             ) from e
         # These are the standard ImageNet normalization stats used by torchvision weights.
-        # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계입니다.
         mean = (0.485, 0.456, 0.406)
-        std = (0.229, 0.224, 0.225)
         # Training pipeline typically uses RandomResizedCrop and horizontal flip.
-        # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용합니다.
         if is_training:
             return transforms.Compose(
                 [
-                    transforms.RandomResizedCrop(224),
-                    transforms.RandomHorizontalFlip(p=0.5),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=mean, std=std),
                 ]
             )
         # Inference pipeline typically uses Resize(256) + CenterCrop(224).
-        # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용합니다.
         return transforms.Compose(
             [
                 transforms.Resize(256),
-                transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=mean, std=std),
             ]
@@ -208,24 +206,24 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def to_dict(self) -> dict[str, Any]:
         """
         Return a JSON-serializable dict for preprocessor_config.json.
-        preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환합니다.
         Important: do not leak runtime objects into the serialized dict.
-        중요: 런타임 객체가 직렬화 dict에 섞이면 안 됩니다.
         """
         # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
         # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
         d = super().to_dict()
         # Force minimal stable fields for long-term compatibility.
-        # 장기 호환을 위해 최소 안정 필드를 강제합니다.
-        d["image_processor_type"] = self.__class__.__name__
         d["backbone_name_or_path"] = self.backbone_name_or_path
         d["is_training"] = self.is_training
-        d["use_fast"] = self.use_fast
         # Remove any runtime-only fields defensively.
-        # 런타임 전용 필드는 보수적으로 제거합니다.
         for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
             d.pop(key, None)
@@ -235,14 +233,14 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
         """
         Standard load path used by BaseImageProcessor / AutoImageProcessor.
-        BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로입니다.
         """
         backbone = image_processor_dict.get("backbone_name_or_path", None)
         if backbone is None:
             raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
         is_training = bool(image_processor_dict.get("is_training", False))
-        use_fast = bool(image_processor_dict.get("use_fast", False))
         return cls(
             backbone_name_or_path=backbone,
@@ -255,20 +253,20 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
         """
         Fallback path if AutoImageProcessor calls class.from_pretrained directly.
-        AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 경로입니다.
         Strategy:
         전략:
         - Read config.json via AutoConfig and recover backbone_name_or_path.
-          AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구합니다.
         """
         # is_training is runtime-only and should default to False for inference/serving.
-        # is_training은 런타임 전용이며 추론/서빙 기본값은 False가 맞습니다.
         #
         # IMPORTANT:
-        # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영합니다.
         use_fast = bool(kwargs.pop("use_fast", False))
         kwargs.pop("trust_remote_code", None)
@@ -289,7 +287,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     @staticmethod
     def _ensure_list(images: Any) -> list[Any]:
         # Normalize scalar image input to a list for uniform processing.
-        # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용합니다.
         if isinstance(images, (list, tuple)):
             return list(images)
         return [images]
@@ -297,7 +295,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     @staticmethod
     def _to_pil_rgb(x: Any):
         # Convert common image inputs into PIL RGB images.
-        # 일반적인 입력을 PIL RGB 이미지로 변환합니다.
         from PIL import Image as PILImage
         if isinstance(x, PILImage.Image):
@@ -314,17 +312,17 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     ) -> dict[str, Any]:
         """
         Convert images into {"pixel_values": Tensor/ndarray}.
-        이미지를 {"pixel_values": Tensor/ndarray}로 변환합니다.
         """
         images = self._ensure_list(images)
         # Rebuild runtime if needed (e.g., right after deserialization).
-        # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성합니다.
         if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
             self._build_runtime()
         # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
-        # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
         if self._timm_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
@@ -337,7 +335,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             return self._format_return(pixel_values, return_tensors)
         # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
-        # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32입니다.
         if self._torchvision_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
@@ -350,7 +348,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
             return self._format_return(pixel_values, return_tensors)
         # transformers delegate path: rely on official processor behavior.
-        # transformers 위임 경로: 공식 processor 동작을 그대로 사용합니다.
         if self._delegate is None:
             raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
@@ -360,7 +358,7 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
         """
         Format pixel_values according to return_tensors.
-        return_tensors에 맞춰 pixel_values 반환 포맷을 맞춥니다.
         """
         if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
             return {"pixel_values": pixel_values}
@@ -370,6 +368,6 @@ class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
 # Register this processor for AutoImageProcessor resolution.
-# AutoImageProcessor 해석을 위해 이 processor를 등록합니다.
 if __name__ != "__main__":
     BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")

 # src/ds_proc.py
 # ============================================================
+# ImageProcessor (AutoImageProcessor integration)
+# ImageProcessor (AutoImageProcessor 연동)
 # ============================================================
 from typing import Any
 class BackboneMLPHead224ImageProcessor(ImageProcessingMixin):
     """
     This processor performs image preprocessing and outputs {"pixel_values": ...}.
+    이 processor는 이미지 전처리를 수행하고 {"pixel_values": ...}를 반환함.
     Key requirements:
     핵심 요구사항:
     1) save_pretrained() must produce a JSON-serializable preprocessor_config.json.
+       save_pretrained()는 JSON 직렬화 가능한 preprocessor_config.json을 생성해야 함.
     2) Runtime-only objects (delegate processor, timm/torchvision transforms) must NOT be serialized.
+       런타임 객체(delegate processor, timm/torchvision transform)는 절대 직렬화하면 안 됨.
     3) Runtime objects are rebuilt at init/load time based on backbone meta.
+       런타임 객체는 backbone meta에 따라 init/load 시점에 재구성.
     4) For reproducibility, use_fast must be explicitly persisted and honored on load.
+       재현성을 위해 use_fast는 명시적으로 저장되고, 로드시 반드시 반영되어야 함.
     """
     # HF vision models conventionally expect "pixel_values" as the primary input key.
+    # HF vision 모델은 관례적으로 입력 키로 "pixel_values"를 기대.
     model_input_names = ["pixel_values"]
     def __init__(
         self,
         backbone_name_or_path: BackboneID,
+        is_training: bool = False, # timm 에서 data augmentation 용.
         use_fast: bool = False,
         **kwargs,
     ):
         # ImageProcessingMixin stores extra kwargs and manages auto_map metadata.
+        # ImageProcessingMixin은 추가 kwargs를 저장하고 auto_map 메타를 관리.
         super().__init__(**kwargs)
         # Enforce whitelist via BACKBONE_META to keep behavior stable.
+        # 동작 안정성을 위해 BACKBONE_META 기반 화이트리스트를 강제. - fast fail
         if backbone_name_or_path not in BACKBONE_META:
             raise ValueError(
                 f"Unsupported backbone_name_or_path={backbone_name_or_path}. "
             )
         # Serializable fields only: these should appear in preprocessor_config.json.
+        # 직렬화 가능한 필드만: 이 값들만 preprocessor_config.json에 들어가야 함
         self.backbone_name_or_path = backbone_name_or_path
         self.is_training = bool(is_training)
         # Reproducibility switch for transformers processors.
+        # transformers processor의 fast/slow 선택을 재현 가능하게 고정.
         self.use_fast = bool(use_fast)
         # Runtime-only fields: must never be serialized.
+        # 런타임 전용 필드: 절대 직렬화되면 안 됨.
         self._meta = None
+        self._delegate              = None
+        self._timm_transform        = None
         self._torchvision_transform = None
         # Build runtime objects according to backbone type.
+        # backbone type에 따라 런타임 객체를 구성.
         self._build_runtime()
     # ============================================================
     def _build_runtime(self):
         """
         Build runtime delegate/transform based on BACKBONE_META["type"].
+        BACKBONE_META["type"]에 따라 런타임 delegate/transform을 구성.
         """
         meta = BACKBONE_META[self.backbone_name_or_path]
         self._meta = meta
         # Always reset runtime fields before rebuilding.
+        # 재구성 전 런타임 필드는 항상 초기화.
         self._delegate = None
         self._timm_transform = None
         self._torchvision_transform = None
         if t == "timm_densenet":
             # timm DenseNet uses timm.data transforms for ImageNet-style preprocessing.
+            # timm DenseNet은 ImageNet 전처리를 위해 timm.data transform을 사용.
             self._timm_transform = self._build_timm_transform(
                 backbone_id=self.backbone_name_or_path,
                 is_training=self.is_training,
         if t == "torchvision_densenet":
             # torchvision DenseNet requires torchvision-style preprocessing (resize/crop/tensor/normalize).
+            # torchvision DenseNet은 torchvision 스타일 전처리(resize/crop/tensor/normalize)가 필요.
             self._torchvision_transform = self._build_torchvision_densenet_transform(
                 is_training=self.is_training
             )
             return
         # Default: transformers backbone delegates to its official AutoImageProcessor.
+        # 기본: transformers 백본은 공식 AutoImageProcessor에 위임.
         #
         # IMPORTANT:
+        # - use_fast는 transformers 기본값 변경에 흔들리지 않도록 반드시 명시적으로 전달.
         self._delegate = AutoImageProcessor.from_pretrained(
             self.backbone_name_or_path,
             use_fast=self.use_fast,
     def _build_timm_transform(*, backbone_id: str, is_training: bool):
         """
         Create timm transform without storing non-serializable objects in config.
+        비직렬화 객체를 config에 저장하지 않고 timm transform을 생성.
         """
         try:
             import timm
             ) from e
         # We only need model metadata to resolve data config, so pretrained=False is preferred.
+        # data config 추출만 필요하므로 pretrained=False를 우선 사용.
         m = timm.create_model(f"hf_hub:{backbone_id}", pretrained=False, num_classes=0)
         dc = resolve_model_data_config(m)
         # create_transform returns a torchvision-like callable that maps PIL -> torch.Tensor(C,H,W).
+        # create_transform은 PIL -> torch.Tensor(C,H,W)로 매핑하는 callable을 반환.
+        tfm = create_transform(**dc, is_training=is_training) # is_training :Data Aug.
         return tfm
     @staticmethod
     def _build_torchvision_densenet_transform(*, is_training: bool):
         """
         Build torchvision preprocessing for DenseNet-121 (224 pipeline).
+        DenseNet-121용 torchvision 전처리(224 파이프라인)를 구성.
         """
         try:
             from torchvision import transforms
             ) from e
         # These are the standard ImageNet normalization stats used by torchvision weights.
+        # 이 값들은 torchvision weights가 사용하는 표준 ImageNet 정규화 통계.
         mean = (0.485, 0.456, 0.406)
+        std  = (0.229, 0.224, 0.225)
         # Training pipeline typically uses RandomResizedCrop and horizontal flip.
+        # 학습 파이프라인은 보통 RandomResizedCrop과 좌우반전을 사용.
         if is_training:
             return transforms.Compose(
                 [
+                    # transforms.RandomResizedCrop(224),
+                    # transforms.RandomHorizontalFlip(p=0.5),
+                    transforms.Resize(224),
                     transforms.ToTensor(),
                     transforms.Normalize(mean=mean, std=std),
                 ]
             )
         # Inference pipeline typically uses Resize(256) + CenterCrop(224).
+        # 추론 파이프라인은 보통 Resize(256) + CenterCrop(224)를 사용.
         return transforms.Compose(
             [
                 transforms.Resize(256),
+                # transforms.CenterCrop(224),
                 transforms.ToTensor(),
                 transforms.Normalize(mean=mean, std=std),
             ]
     def to_dict(self) -> dict[str, Any]:
         """
         Return a JSON-serializable dict for preprocessor_config.json.
+        preprocessor_config.json에 들어갈 JSON 직렬화 dict를 반환.
         Important: do not leak runtime objects into the serialized dict.
+        중요: 런타임 객체가 직렬화 dict에 섞이면 안 됨.
         """
         # ImageProcessingMixin.to_dict() adds metadata such as image_processor_type/auto_map.
         # ImageProcessingMixin.to_dict()는 image_processor_type/auto_map 같은 메타를 추가합니다.
         d = super().to_dict()
         # Force minimal stable fields for long-term compatibility.
+        # 장기 호환을 위해 최소 안정 필드를 강제로 지정.
+        d["image_processor_type"]  = self.__class__.__name__
         d["backbone_name_or_path"] = self.backbone_name_or_path
         d["is_training"] = self.is_training
+        d["use_fast"]    = self.use_fast
         # Remove any runtime-only fields defensively.
+        # 런타임 전용 필드는 보수적으로 제거.
         for key in ["_meta", "_delegate", "_timm_transform", "_torchvision_transform"]:
             d.pop(key, None)
     def from_dict(cls, image_processor_dict: dict[str, Any], **kwargs):
         """
         Standard load path used by BaseImageProcessor / AutoImageProcessor.
+        BaseImageProcessor / AutoImageProcessor가 사용하는 표준 로드 경로임.
         """
         backbone = image_processor_dict.get("backbone_name_or_path", None)
         if backbone is None:
             raise ValueError("preprocessor_config.json missing key: backbone_name_or_path")
         is_training = bool(image_processor_dict.get("is_training", False))
+        use_fast    = bool(image_processor_dict.get("use_fast", False))
         return cls(
             backbone_name_or_path=backbone,
     def from_pretrained(cls, pretrained_model_name_or_path: str, **kwargs):
         """
         Fallback path if AutoImageProcessor calls class.from_pretrained directly.
+        AutoImageProcessor가 class.from_pretrained를 직접 호출하는 경우를 대비한 메서드.
         Strategy:
         전략:
         - Read config.json via AutoConfig and recover backbone_name_or_path.
+          AutoConfig로 config.json을 읽고 backbone_name_or_path를 복구.
         """
         # is_training is runtime-only and should default to False for inference/serving.
+        # is_training은 런타임 전용이며 추론/서빙 기본값은 False 임.
         #
         # IMPORTANT:
+        # - use_fast는 kwargs로 전달될 수 있으므로, 있으면 반영.
         use_fast = bool(kwargs.pop("use_fast", False))
         kwargs.pop("trust_remote_code", None)
     @staticmethod
     def _ensure_list(images: Any) -> list[Any]:
         # Normalize scalar image input to a list for uniform processing.
+        # 단일 입력을 리스트로 정규화하여 동일한 처리 경로를 사용.
         if isinstance(images, (list, tuple)):
             return list(images)
         return [images]
     @staticmethod
     def _to_pil_rgb(x: Any):
         # Convert common image inputs into PIL RGB images.
+        # 일반적인 입력을 PIL RGB 이미지로 변환.
         from PIL import Image as PILImage
         if isinstance(x, PILImage.Image):
     ) -> dict[str, Any]:
         """
         Convert images into {"pixel_values": Tensor/ndarray}.
+        이미지를 {"pixel_values": Tensor/ndarray}로 변환.
         """
         images = self._ensure_list(images)
         # Rebuild runtime if needed (e.g., right after deserialization).
+        # 직렬화 복원 직후 등 런타임이 비어있을 수 있으므로 재구성.
         if (self._delegate is None) and (self._timm_transform is None) and (self._torchvision_transform is None):
             self._build_runtime()
         # timm path: PIL -> torch.Tensor(C,H,W) normalized float32.
+        # timm 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
         if self._timm_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
             return self._format_return(pixel_values, return_tensors)
         # torchvision path: PIL -> torch.Tensor(C,H,W) normalized float32.
+        # torchvision 경로: PIL -> torch.Tensor(C,H,W) 정규화 float32.
         if self._torchvision_transform is not None:
             pv: list[torch.Tensor] = []
             for im in images:
             return self._format_return(pixel_values, return_tensors)
         # transformers delegate path: rely on official processor behavior.
+        # transformers 위임 경로: 공식 processor 동작을 그대로 사용.
         if self._delegate is None:
             raise RuntimeError("Processor runtime not built: delegate is None and no transforms are available.")
     def _format_return(pixel_values: torch.Tensor, return_tensors: str | TensorType | None) -> dict[str, Any]:
         """
         Format pixel_values according to return_tensors.
+        return_tensors에 맞춰 pixel_values 반환 포맷을 변환.
         """
         if return_tensors is None or return_tensors in ("pt", TensorType.PYTORCH):
             return {"pixel_values": pixel_values}
 # Register this processor for AutoImageProcessor resolution.
+# AutoImageProcessor 해석을 위해 이 processor를 등록.
 if __name__ != "__main__":
     BackboneMLPHead224ImageProcessor.register_for_auto_class("AutoImageProcessor")

models/torchvision__densenet121/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:717710e4f328713f7cfb85f1d12c5c1b24ff290c7afea055ff32dd5ad0d0bcc3
 size 33394052

 version https://git-lfs.github.com/spec/v1
+oid sha256:a3d373da2c45fdd83a13526586f67a2ccdc791505d1d5d26f878d6cb2a982e87
 size 33394052