Image Classification
Keras
LiteRT
TF-Keras
Safetensors
English
efficientnetv2-s
efficientnetv2
fgic
transfer-learning
gem-pooling
focal-loss
swa
grad-cam
calibration
temperature-scaling
computer-vision
tensorflow.js
Eval Results (legacy)
Instructions to use 0xgr3y/Arch-Building-Image-Classification with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Keras
How to use 0xgr3y/Arch-Building-Image-Classification with Keras:
# Available backend options are: "jax", "torch", "tensorflow". import os os.environ["KERAS_BACKEND"] = "jax" import keras model = keras.saving.load_model("hf://0xgr3y/Arch-Building-Image-Classification") - Notebooks
- Google Colab
- Kaggle
| """Arch-Building-Image-Classification — Model Construction & Inference Module. | |
| This module provides the architecture definition, custom layer implementations, | |
| and inference utilities for the EfficientNetV2-S-based fine-grained visual | |
| classification (FGIC) model trained on the World Architectural Buildings dataset. | |
| Custom layers (GeMPooling, FocalLoss, DiscriminativeAdamW) are registered via | |
| ``@register_keras_serializable`` so that ``tf.keras.models.load_model`` can | |
| deserialize them without an explicit ``custom_objects`` dict — simply importing | |
| this module is sufficient. | |
| Usage — Clean load (no ProtectAI flag, recommended): | |
| >>> from build_model import ArchBuildingClassifier | |
| >>> clf = ArchBuildingClassifier.build() | |
| >>> clf.load_weights('fine_tuning_swa.weights.h5') | |
| >>> preds = clf.predict(image_array) | |
| Usage — Load from .keras (flagged by ProtectAI but functionally correct): | |
| >>> import build_model # registers custom classes | |
| >>> import tensorflow as tf | |
| >>> model = tf.keras.models.load_model('fine_tuning_swa.keras') | |
| Usage — Inference with preprocessing: | |
| >>> from build_model import ArchBuildingClassifier | |
| >>> clf = ArchBuildingClassifier.from_weights('fine_tuning_swa.weights.h5') | |
| >>> label, confidence, top3 = clf.predict(image_pil_or_array) | |
| References: | |
| - GeM Pooling: Radenovic et al., CVPR 2018 | |
| - Focal Loss: Lin et al., ICCV 2017 | |
| - DiscriminativeAdamW: Howard & Ruder, ACL 2018 (selective fine-tuning) | |
| - Random Erasing: Zhong et al., AAAI 2020 | |
| - SWA: Izmailov et al., UAI 2018 | |
| License: | |
| - Code: MIT | |
| - Model weights: Apache-2.0 | |
| - Dataset: CC-BY-4.0 | |
| """ | |
| from __future__ import annotations | |
| import os | |
| from typing import Dict, List, Optional, Tuple, Union | |
| import numpy as np | |
| import tensorflow as tf | |
| from tensorflow.keras.applications import EfficientNetV2S | |
| try: | |
| from tensorflow.keras.applications.efficientnet_v2 import preprocess_input | |
| except (ImportError, ModuleNotFoundError): | |
| from tensorflow.keras.applications.efficientnet import preprocess_input | |
| from tensorflow.keras.layers import ( | |
| BatchNormalization, | |
| Conv2D, | |
| Dense, | |
| Dropout, | |
| Layer, | |
| MaxPooling2D, | |
| ) | |
| from tensorflow.keras.layers import Input | |
| # --------------------------------------------------------------------------- | |
| # Compatibility shim — tf.keras.saving is not exposed in all TF/Keras setups. | |
| # --------------------------------------------------------------------------- | |
| try: | |
| from tensorflow.keras.saving import register_keras_serializable | |
| except (ImportError, AttributeError): | |
| try: | |
| from keras.saving import register_keras_serializable | |
| except (ImportError, AttributeError): | |
| def register_keras_serializable(package: Optional[str] = None): | |
| """No-op fallback when Keras saving API is unavailable.""" | |
| def decorator(cls): | |
| return cls | |
| return decorator | |
| __all__ = [ | |
| "ArchBuildingClassifier", | |
| "GeMPooling", | |
| "FocalLoss", | |
| "DiscriminativeAdamW", | |
| "CUSTOM_OBJECTS", | |
| "LABELS", | |
| "build_model", | |
| ] | |
| # --------------------------------------------------------------------------- | |
| # Module-level constants | |
| # --------------------------------------------------------------------------- | |
| LABELS: List[str] = [ | |
| "barn", | |
| "bridge", | |
| "castle", | |
| "mosque", | |
| "skyscraper", | |
| "stadium", | |
| "temple", | |
| "windmill", | |
| ] | |
| INPUT_SHAPE: Tuple[int, int, int] = (320, 320, 3) | |
| NUM_CLASSES: int = len(LABELS) | |
| PACKAGE: str = "ArchClassifier" | |
| # =========================================================================== | |
| # Custom Layers | |
| # =========================================================================== | |
| class GeMPooling(Layer): | |
| """Generalized Mean Pooling layer for fine-grained visual recognition. | |
| Replaces standard Global Average Pooling with a learnable generalized | |
| mean that better preserves discriminative spatial features. The pooling | |
| parameter ``p`` is trainable: ``p -> 1`` reduces to average pooling, | |
| ``p -> inf`` approaches max pooling. | |
| Args: | |
| p: Initial value for the pooling power parameter (default: 3.0). | |
| eps: Small constant for numerical stability when clamping inputs | |
| (default: 1e-6). | |
| **kwargs: Standard Keras layer keyword arguments (name, trainable, etc.). | |
| Reference: | |
| Radenovic, F., Tolias, G., & Chum, O. (2018). Fine-tuning CNN | |
| Image Retrieval with No Human Annotation. IEEE TPAMI. | |
| """ | |
| def __init__(self, p: float = 3.0, eps: float = 1e-6, **kwargs): | |
| super().__init__(**kwargs) | |
| self.p_init = p | |
| self.eps = eps | |
| def build(self, input_shape): | |
| self.p = self.add_weight( | |
| name="gem_p", | |
| shape=(), | |
| initializer=tf.keras.initializers.Constant(self.p_init), | |
| trainable=True, | |
| dtype=tf.float32, | |
| ) | |
| super().build(input_shape) | |
| def call(self, x: tf.Tensor) -> tf.Tensor: | |
| x = tf.maximum(x, self.eps) | |
| x = tf.pow(x, self.p) | |
| x = tf.reduce_mean(x, axis=[1, 2], keepdims=False) | |
| x = tf.pow(x, 1.0 / self.p) | |
| return x | |
| def get_config(self) -> dict: | |
| config = super().get_config() | |
| config.update({"p": self.p_init, "eps": self.eps}) | |
| return config | |
| class FocalLoss(tf.keras.losses.Loss): | |
| """Focal Loss for class imbalance and hard-example mining. | |
| Down-weights well-classified examples via ``(1 - p)^gamma``, focusing | |
| gradient updates on difficult samples. Combined with optional label | |
| smoothing to prevent overconfidence. | |
| Args: | |
| gamma: Focusing parameter; higher values increase down-weighting | |
| of easy examples (default: 2.0, per Lin et al.). | |
| alpha: Optional per-class weighting factor. If None, no class | |
| weighting is applied. | |
| label_smoothing: Smoothing factor in [0, 1) to soft-target labels | |
| (default: 0.0). | |
| **kwargs: Standard Keras loss keyword arguments. | |
| Reference: | |
| Lin, T.-Y., Goyal, P., Girshick, R., He, K., & Dollar, P. (2017). | |
| Focal Loss for Dense Object Detection. ICCV 2017. | |
| """ | |
| def __init__( | |
| self, | |
| gamma: float = 2.0, | |
| alpha: Optional[float] = None, | |
| label_smoothing: float = 0.0, | |
| **kwargs, | |
| ): | |
| super().__init__(**kwargs) | |
| self.gamma = gamma | |
| self.alpha = alpha | |
| self.label_smoothing = label_smoothing | |
| def call(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor: | |
| y_pred = tf.clip_by_value(y_pred, 1e-7, 1.0 - 1e-7) | |
| if self.label_smoothing > 0: | |
| num_classes = tf.cast(tf.shape(y_true)[-1], tf.float32) | |
| y_true = y_true * (1.0 - self.label_smoothing) + ( | |
| self.label_smoothing / num_classes | |
| ) | |
| ce = -y_true * tf.math.log(y_pred) | |
| weight = tf.pow(1.0 - y_pred, self.gamma) | |
| fl = weight * ce | |
| if self.alpha is not None: | |
| alpha_t = y_true * self.alpha | |
| fl = alpha_t * fl | |
| return tf.reduce_mean(tf.reduce_sum(fl, axis=-1)) | |
| def get_config(self) -> dict: | |
| config = super().get_config() | |
| config.update( | |
| { | |
| "gamma": self.gamma, | |
| "alpha": self.alpha, | |
| "label_smoothing": self.label_smoothing, | |
| } | |
| ) | |
| return config | |
| class DiscriminativeAdamW(tf.keras.optimizers.AdamW): | |
| """AdamW with per-variable learning rate scaling for selective fine-tuning. | |
| Overrides ``update_step`` to scale the learning rate per-variable based | |
| on layer name patterns within the backbone network. Unlike gradient | |
| scaling (which is scale-invariant in Adam), LR scaling produces truly | |
| discriminative updates — block6 variables receive 10x smaller updates | |
| than head variables. | |
| Args: | |
| lr_multipliers: Mapping from layer-name substrings to LR scale | |
| factors. e.g. ``{'block6': 0.1}`` applies 0.1x learning rate | |
| to all block6 variables. | |
| backbone_layer_idx: Index of the backbone model within the | |
| Functional model container (default: 0). | |
| **kwargs: Standard AdamW keyword arguments (learning_rate, | |
| weight_decay, etc.). | |
| Note: | |
| LR scaling is applied inside ``update_step`` by multiplying | |
| ``learning_rate * mult`` before calling the parent AdamW update. | |
| A variable cache is built via ``_build_var_cache(model)`` to map | |
| ``id(variable) -> multiplier``. | |
| Reference: | |
| Howard, J., & Ruder, S. (2018). Universal Language Model | |
| Fine-tuning for Text Classification. ACL 2018. | |
| """ | |
| def __init__( | |
| self, | |
| lr_multipliers: Optional[Dict[str, float]] = None, | |
| backbone_layer_idx: int = 0, | |
| **kwargs, | |
| ): | |
| super().__init__(**kwargs) | |
| self.lr_multipliers = lr_multipliers or {} | |
| self.backbone_layer_idx = backbone_layer_idx | |
| self._var_mult_cache: Dict[int, float] = {} | |
| def _build_var_cache(self, model: tf.keras.Model) -> None: | |
| """Build the variable-to-multiplier cache from the model's backbone.""" | |
| self._var_mult_cache = {} | |
| base_model = next((l for l in model.layers if isinstance(l, tf.keras.Model)), None) | |
| if base_model is None: | |
| base_model = model.layers[self.backbone_layer_idx] | |
| for layer in base_model.layers: | |
| mult = 1.0 | |
| for pattern, m in self.lr_multipliers.items(): | |
| if pattern in layer.name: | |
| mult = m | |
| break | |
| for var in layer.trainable_variables: | |
| self._var_mult_cache[id(var)] = mult | |
| def _get_multiplier(self, var: tf.Variable) -> float: | |
| return self._var_mult_cache.get(id(var), 1.0) | |
| def update_step(self, gradient, variable, learning_rate): | |
| """Scale learning_rate per-variable — truly discriminative.""" | |
| mult = self._get_multiplier(variable) | |
| effective_lr = learning_rate * mult | |
| return super().update_step(gradient, variable, effective_lr) | |
| def get_config(self) -> dict: | |
| config = super().get_config() | |
| config.update( | |
| { | |
| "lr_multipliers": self.lr_multipliers, | |
| "backbone_layer_idx": self.backbone_layer_idx, | |
| } | |
| ) | |
| return config | |
| # --------------------------------------------------------------------------- | |
| # Custom objects registry (for explicit load_model custom_objects dict) | |
| # --------------------------------------------------------------------------- | |
| CUSTOM_OBJECTS: Dict[str, type] = { | |
| "GeMPooling": GeMPooling, | |
| "FocalLoss": FocalLoss, | |
| "DiscriminativeAdamW": DiscriminativeAdamW, | |
| } | |
| # =========================================================================== | |
| # Model Wrapper Class | |
| # =========================================================================== | |
| class ArchBuildingClassifier: | |
| """High-level wrapper for the Arch-Building-Image-Classification model. | |
| Encapsulates architecture construction, weight loading from multiple | |
| formats, and single/batch inference with EfficientNetV2-S preprocessing. | |
| The underlying architecture is a Functional model: | |
| EfficientNetV2-S (frozen, training=False) -> Conv2D(256) -> BN -> MaxPool -> | |
| GeMPooling(p=3.0) -> Dense(256) -> BN -> Dropout(0.4) -> | |
| Dense(8, softmax, dtype=float32) | |
| Attributes: | |
| labels: List of class label strings (alphabetical order). | |
| input_shape: Expected input tensor shape (H, W, C). | |
| num_classes: Number of output classes. | |
| Example: | |
| >>> clf = ArchBuildingClassifier.from_weights('model.weights.h5') | |
| >>> label, conf, top3 = clf.predict(image) | |
| >>> print(f"Predicted: {label} ({conf:.1%})") | |
| """ | |
| labels: List[str] = LABELS | |
| input_shape: Tuple[int, int, int] = INPUT_SHAPE | |
| num_classes: int = NUM_CLASSES | |
| def __init__(self, model: Optional[tf.keras.Model] = None): | |
| self._model = model | |
| # ------------------------------------------------------------------ | |
| # Construction | |
| # ------------------------------------------------------------------ | |
| def build( | |
| cls, | |
| input_shape: Optional[Tuple[int, int, int]] = None, | |
| num_classes: Optional[int] = None, | |
| ) -> "ArchBuildingClassifier": | |
| """Construct the model architecture from scratch. | |
| Creates a Functional model with EfficientNetV2-S backbone (ImageNet | |
| weights, frozen) and a custom classification head featuring GeM | |
| pooling. The output Dense layer uses dtype=float32 for mixed | |
| precision stability. | |
| Args: | |
| input_shape: Input tensor shape (default: (320, 320, 3)). | |
| num_classes: Number of output classes (default: 8). | |
| Returns: | |
| An ArchBuildingClassifier instance with an untrained model. | |
| """ | |
| input_shape = input_shape or cls.input_shape | |
| num_classes = num_classes or cls.num_classes | |
| base_model = EfficientNetV2S( | |
| weights="imagenet", | |
| include_top=False, | |
| include_preprocessing=True, | |
| input_shape=input_shape, | |
| ) | |
| base_model.trainable = False | |
| inputs = Input(shape=input_shape) | |
| x = base_model(inputs, training=False) | |
| x = Conv2D(256, (3, 3), activation="relu", padding="same")(x) | |
| x = BatchNormalization()(x) | |
| x = MaxPooling2D(pool_size=(2, 2))(x) | |
| x = GeMPooling(p=3.0, name="gem_pooling")(x) | |
| x = Dense(256, activation="relu")(x) | |
| x = BatchNormalization()(x) | |
| x = Dropout(0.4)(x) | |
| outputs = Dense(num_classes, activation="softmax", dtype="float32")(x) | |
| model = tf.keras.Model(inputs, outputs) | |
| return cls(model) | |
| def from_keras(cls, path: str) -> "ArchBuildingClassifier": | |
| """Load from a .keras checkpoint file. | |
| Requires that custom classes are registered (importing this module | |
| is sufficient) or passed via ``CUSTOM_OBJECTS``. | |
| Args: | |
| path: Path to the .keras file. | |
| Returns: | |
| An ArchBuildingClassifier with loaded weights and architecture. | |
| """ | |
| model = tf.keras.models.load_model( | |
| path, custom_objects=CUSTOM_OBJECTS, compile=False | |
| ) | |
| return cls(model) | |
| def from_weights(cls, weights_path: str) -> "ArchBuildingClassifier": | |
| """Reconstruct architecture and load weights from .weights.h5. | |
| This is the recommended loading path for production inference — | |
| the .weights.h5 format does not carry custom class references and | |
| is not flagged by ProtectAI Guardian (PAIT-KERAS-301). | |
| Args: | |
| weights_path: Path to the .weights.h5 file. | |
| Returns: | |
| An ArchBuildingClassifier with loaded weights. | |
| """ | |
| clf = cls.build() | |
| clf._model.load_weights(weights_path) | |
| return clf | |
| # ------------------------------------------------------------------ | |
| # Loading | |
| # ------------------------------------------------------------------ | |
| def load_weights(self, weights_path: str) -> None: | |
| """Load weights into the existing model. | |
| Args: | |
| weights_path: Path to the .weights.h5 file. | |
| """ | |
| if self._model is None: | |
| raise RuntimeError("Model not initialized. Call build() first.") | |
| self._model.load_weights(weights_path) | |
| # ------------------------------------------------------------------ | |
| # Inference | |
| # ------------------------------------------------------------------ | |
| def _preprocess(self, image: Union[np.ndarray, "Image.Image"]) -> np.ndarray: | |
| """Resize and apply EfficientNetV2-S preprocessing to a single image. | |
| Args: | |
| image: PIL Image or numpy array (H, W, C) in uint8 range. | |
| Returns: | |
| Preprocessed batch of shape (1, 320, 320, 3) as float32. | |
| """ | |
| if hasattr(image, "resize"): # PIL Image | |
| image = image.convert("RGB").resize( | |
| (self.input_shape[1], self.input_shape[0]) | |
| ) | |
| image = np.array(image, dtype=np.float32) | |
| elif image.shape[:2] != self.input_shape[:2]: | |
| image = tf.image.resize(image, self.input_shape[:2]).numpy() | |
| if image.ndim == 3: | |
| image = np.expand_dims(image, axis=0) | |
| image = preprocess_input(image) | |
| return image | |
| def predict( | |
| self, | |
| image: Union[np.ndarray, "Image.Image"], | |
| top_k: int = 3, | |
| ) -> Tuple[str, float, List[Tuple[str, float]]]: | |
| """Run inference on a single image. | |
| Args: | |
| image: PIL Image or numpy array (H, W, C) in uint8 range. | |
| top_k: Number of top predictions to return. | |
| Returns: | |
| Tuple of (predicted_label, confidence, top_k_list) where | |
| top_k_list is a list of (label, probability) pairs. | |
| """ | |
| if self._model is None: | |
| raise RuntimeError("Model not initialized. Call build() first.") | |
| x = self._preprocess(image) | |
| probs = self._model.predict(x, verbose=0)[0] | |
| idx = int(np.argmax(probs)) | |
| label = self.labels[idx] | |
| confidence = float(probs[idx]) | |
| top_indices = np.argsort(probs)[::-1][:top_k] | |
| top_k_list = [(self.labels[i], float(probs[i])) for i in top_indices] | |
| return label, confidence, top_k_list | |
| def predict_batch( | |
| self, | |
| images: List[Union[np.ndarray, "Image.Image"]], | |
| ) -> List[Tuple[str, float]]: | |
| """Run batch inference on multiple images. | |
| Args: | |
| images: List of PIL Images or numpy arrays. | |
| Returns: | |
| List of (label, confidence) tuples. | |
| """ | |
| if self._model is None: | |
| raise RuntimeError("Model not initialized. Call build() first.") | |
| batch = np.vstack([self._preprocess(img) for img in images]) | |
| probs = self._model.predict(batch, verbose=0) | |
| results = [] | |
| for row in probs: | |
| idx = int(np.argmax(row)) | |
| results.append((self.labels[idx], float(row[idx]))) | |
| return results | |
| # ------------------------------------------------------------------ | |
| # Utilities | |
| # ------------------------------------------------------------------ | |
| def keras_model(self) -> tf.keras.Model: | |
| """Return the underlying tf.keras.Model instance.""" | |
| if self._model is None: | |
| raise RuntimeError("Model not initialized. Call build() first.") | |
| return self._model | |
| def parameters(self) -> int: | |
| """Total number of model parameters.""" | |
| return self.keras_model.count_params() | |
| def summary(self) -> None: | |
| """Print the model architecture summary.""" | |
| self.keras_model.summary() | |
| # =========================================================================== | |
| # Backward-compatible convenience function | |
| # =========================================================================== | |
| def build_model( | |
| input_shape: Tuple[int, int, int] = INPUT_SHAPE, | |
| num_classes: int = NUM_CLASSES, | |
| ) -> tf.keras.Model: | |
| """Construct the architecture and return a raw tf.keras.Model. | |
| This is a backward-compatible thin wrapper around | |
| ``ArchBuildingClassifier.build()``. New code should prefer using | |
| the class directly for access to ``predict()``, ``from_weights()``, | |
| and other utilities. | |
| Args: | |
| input_shape: Input tensor shape (default: (320, 320, 3)). | |
| num_classes: Number of output classes (default: 8). | |
| Returns: | |
| A compiled but untrained tf.keras.Model instance. | |
| """ | |
| return ArchBuildingClassifier.build( | |
| input_shape=input_shape, num_classes=num_classes | |
| ).keras_model | |
| # =========================================================================== | |
| # CLI entry point | |
| # =========================================================================== | |
| if __name__ == "__main__": | |
| import argparse | |
| parser = argparse.ArgumentParser( | |
| description="Arch-Building-Image-Classification model loader" | |
| ) | |
| parser.add_argument( | |
| "--weights", | |
| type=str, | |
| default="fine_tuning_swa.weights.h5", | |
| help="Path to .weights.h5 file (default: fine_tuning_swa.weights.h5)", | |
| ) | |
| parser.add_argument( | |
| "--keras", | |
| type=str, | |
| default=None, | |
| help="Path to .keras file (alternative to --weights)", | |
| ) | |
| args = parser.parse_args() | |
| if args.keras: | |
| clf = ArchBuildingClassifier.from_keras(args.keras) | |
| print(f"Loaded from .keras: {args.keras}") | |
| else: | |
| clf = ArchBuildingClassifier.from_weights(args.weights) | |
| print(f"Loaded from weights: {args.weights}") | |
| print(f" Parameters: {clf.parameters:,}") | |
| print(f" Input shape: {clf.input_shape}") | |
| print(f" Classes: {clf.num_classes} ({', '.join(clf.labels)})") | |
| print(" Status: Ready for inference.") | |