HTill
/

flexEAT-base_epoch30_pretrain

@@ -1,58 +1,67 @@
-# configuration_eat.py
-from transformers import PretrainedConfig
-class EATConfig(PretrainedConfig):
-    model_type = "eat"
-    def __init__(
-        self,
-        embed_dim=768,
-        depth=12,
-        num_heads=12,
-        patch_size=16,
-        stride=16,
-        in_chans=1,
-        num_classes=527,
-        model_variant="pretrain",  # or "finetune"
-        mlp_ratio=4.0,
-        qkv_bias=True,
-        drop_rate=0.0,
-        attn_drop_rate=0.0,
-        activation_dropout=0.0,
-        post_mlp_drop=0.0,
-        start_drop_path_rate=0.0,
-        end_drop_path_rate=0.0,
-        layer_norm_first=False,
-        norm_eps=1e-6,
-        norm_affine=True,
-        fixed_positions=True,
-        **kwargs,
-    ):
-        super().__init__(**kwargs)
-        self.embed_dim = embed_dim
-        self.depth = depth
-        self.num_heads = num_heads
-        self.patch_size = patch_size
-        self.stride = stride
-        self.in_chans = in_chans
-        self.num_classes = num_classes
-        self.model_variant = model_variant
-        self.mlp_ratio = mlp_ratio
-        self.qkv_bias = qkv_bias
-        self.drop_rate = drop_rate
-        self.attn_drop_rate = attn_drop_rate
-        self.activation_dropout = activation_dropout
-        self.post_mlp_drop = post_mlp_drop
-        self.start_drop_path_rate = start_drop_path_rate
-        self.end_drop_path_rate = end_drop_path_rate
-        self.layer_norm_first = layer_norm_first
-        self.norm_eps = norm_eps
-        self.norm_affine = norm_affine
-        self.fixed_positions = fixed_positions

+from transformers import PretrainedConfig
+class EATConfig(PretrainedConfig):
+    model_type = "eat"
+    def __init__(
+        self,
+        # --- 1. Core Architecture (Dimensions) ---
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4.0,
+        num_classes=527,
+        model_variant="pretrain",  # or "finetune"
+        # --- 2. Input & Patching ---
+        in_chans=1,
+        patch_size=16,
+        stride=16,
+        fixed_positions=True,
+        # --- 3. Normalization & Bias Structure ---
+        qkv_bias=True,
+        layer_norm_first=False,
+        norm_affine=True,
+        norm_eps=1e-6,
+        # --- 4. Fine-Tuning Knobs (Regularization) ---
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        activation_dropout=0.0,
+        post_mlp_drop=0.0,
+        start_drop_path_rate=0.0,
+        end_drop_path_rate=0.0,
+        # --- 5. Hugging Face Extras ---
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        # --- 1. Core Architecture ---
+        self.embed_dim = embed_dim
+        self.depth = depth
+        self.num_heads = num_heads
+        self.mlp_ratio = mlp_ratio
+        self.num_classes = num_classes
+        self.model_variant = model_variant
+        # --- 2. Input & Patching ---
+        self.in_chans = in_chans
+        self.patch_size = patch_size
+        self.stride = stride
+        self.fixed_positions = fixed_positions
+        # --- 3. Normalization & Bias ---
+        self.qkv_bias = qkv_bias
+        self.layer_norm_first = layer_norm_first
+        self.norm_affine = norm_affine
+        self.norm_eps = norm_eps
+        # --- 4. Regularization ---
+        self.drop_rate = drop_rate
+        self.attn_drop_rate = attn_drop_rate
+        self.activation_dropout = activation_dropout
+        self.post_mlp_drop = post_mlp_drop
+        self.start_drop_path_rate = start_drop_path_rate
+        self.end_drop_path_rate = end_drop_path_rate