Akimotorakiyu
/

mnist-cnn-classifier

+from transformers import PreTrainedConfig
+from typing import List, Optional
+class MNISTCNNConfig(PreTrainedConfig):
+    """
+    Configuration class for custom MNIST CNN model.
+    This configuration contains all the hyperparameters needed to build the model.
+    """
+    model_type = "mnist_cnn"
+    def __init__(
+        self,
+        # Convolutional layers
+        conv_channels: List[int] = [32, 64],
+        conv_kernel_size: int = 3,
+        conv_padding: int = 1,
+        pool_kernel_size: int = 2,
+        pool_stride: int = 2,
+        # Dropout rates
+        conv_dropout: float = 0.25,
+        fc_dropout: float = 0.5,
+        # Fully connected layers
+        hidden_size: int = 512,
+        # Input/output
+        input_channels: int = 1,  # MNIST is grayscale
+        num_classes: int = 10,  # Digits 0-9
+        # Image dimensions
+        image_size: int = 28,
+        # Normalization parameters
+        normalize_mean: float = 0.1307,
+        normalize_std: float = 0.3081,
+        **kwargs,
+    ):
+        # Validate parameters
+        if not isinstance(conv_channels, list) or len(conv_channels) != 2:
+            raise ValueError(
+                f"`conv_channels` must be a list of 2 integers, got {conv_channels}"
+            )
+        if conv_kernel_size <= 0:
+            raise ValueError(
+                f"`conv_kernel_size` must be positive, got {conv_kernel_size}"
+            )
+        if not (0 <= conv_dropout <= 1):
+            raise ValueError(
+                f"`conv_dropout` must be between 0 and 1, got {conv_dropout}"
+            )
+        if not (0 <= fc_dropout <= 1):
+            raise ValueError(f"`fc_dropout` must be between 0 and 1, got {fc_dropout}")
+        if num_classes <= 0:
+            raise ValueError(f"`num_classes` must be positive, got {num_classes}")
+        # Set configuration attributes
+        self.conv_channels = conv_channels
+        self.conv_kernel_size = conv_kernel_size
+        self.conv_padding = conv_padding
+        self.pool_kernel_size = pool_kernel_size
+        self.pool_stride = pool_stride
+        self.conv_dropout = conv_dropout
+        self.fc_dropout = fc_dropout
+        self.hidden_size = hidden_size
+        self.input_channels = input_channels
+        self.num_classes = num_classes
+        self.image_size = image_size
+        self.normalize_mean = normalize_mean
+        self.normalize_std = normalize_std
+        # Calculate the size of flattened features after conv layers
+        # After two 2x2 pooling operations: 28 -> 14 -> 7
+        self.flattened_size = conv_channels[-1] * (image_size // 4) * (image_size // 4)
+        super().__init__(**kwargs)
+# Example configurations for different model variants
+def create_small_config():
+    """Create a smaller CNN configuration"""
+    return MNISTCNNConfig(
+        conv_channels=[16, 32],
+        hidden_size=256,
+        conv_dropout=0.2,
+        fc_dropout=0.4,
+    )
+def create_large_config():
+    """Create a larger CNN configuration"""
+    return MNISTCNNConfig(
+        conv_channels=[64, 128],
+        hidden_size=1024,
+        conv_dropout=0.3,
+        fc_dropout=0.6,
+    )
+if __name__ == "__main__":
+    # Create and test configuration
+    config = MNISTCNNConfig()
+    print("Default configuration:")
+    print(config)
+    # Save configuration
+    config.save_pretrained("mnist-cnn-config")
+    print(f"\nConfiguration saved to 'mnist-cnn-config'")
+    # Test different configurations
+    small_config = create_small_config()
+    large_config = create_large_config()
+    print(f"\nSmall config flattened size: {small_config.flattened_size}")
+    print(f"Large config flattened size: {large_config.flattened_size}")