File size: 3,862 Bytes

{
  "model_type": "efficientnetv2-s",
  "base_model": "efficientnetv2-s",
  "task": "image-classification",
  "num_labels": 8,
  "id2label": {
    "0": "barn",
    "1": "bridge",
    "2": "castle",
    "3": "mosque",
    "4": "skyscraper",
    "5": "stadium",
    "6": "temple",
    "7": "windmill"
  },
  "label2id": {
    "barn": 0,
    "bridge": 1,
    "castle": 2,
    "mosque": 3,
    "skyscraper": 4,
    "stadium": 5,
    "temple": 6,
    "windmill": 7
  },
  "input_shape": [
    320,
    320,
    3
  ],
  "preprocessing": {
    "mode": "efficientnet_v2_preprocess_input",
    "mean": [
      0.0,
      0.0,
      0.0
    ],
    "std": [
      1.0,
      1.0,
      1.0
    ],
    "note": "preprocess_input is identity in TF 2.12+; EfficientNetV2-S includes internal Rescaling layer. Input expects raw [0, 255] float32.",
    "channel_order": "RGB"
  },
  "training_info": {
    "dataset_source": "Pexels Architectural Buildings (13440 images, 8 classes, balanced)",
    "dataset_size": 13440,
    "split_ratio": [
      0.8,
      0.1,
      0.1
    ],
    "split_seed": 42,
    "phases": [
      {
        "name": "Fase 1 - Feature Extraction (Head)",
        "optimizer": "AdamW",
        "lr": 0.001,
        "epochs_max": 25,
        "epochs_actual": 1,
        "val_accuracy": 0.9234,
        "val_loss": 1.0109,
        "train_accuracy": 0.5696,
        "cutmix_mixup": true,
        "label_smoothing": 0.1,
        "early_stop_reason": "myCallback val_acc >= 0.85"
      },
      {
        "name": "Fase 2 - Selective Fine-Tuning",
        "optimizer": "DiscriminativeAdamW",
        "lr": 0.0003,
        "epochs_max": 50,
        "epochs_actual": 1,
        "val_accuracy": 0.9628,
        "val_loss": 0.5655,
        "train_accuracy": 0.8496,
        "unfreeze": "block6+top_conv (BN frozen)",
        "discriminative_lr": {
          "block6": 0.1
        },
        "cutmix_mixup": false,
        "label_smoothing": 0.05,
        "early_stop_reason": "myCallback val_acc >= 0.92"
      },
      {
        "name": "SWA Post-Training",
        "epochs": 10,
        "lr": 0.0001,
        "bn_update": true,
        "bn_update_steps": 100,
        "val_accuracy": 0.9836,
        "val_loss": 0.4109,
        "method": "Izmailov et al., UAI 2018"
      }
    ],
    "metrics": {
      "train_accuracy": 0.9988,
      "val_accuracy": 0.9836,
      "test_accuracy": 0.9777,
      "test_loss": 0.4262,
      "tta_accuracy": 0.9799,
      "overfitting_gap": 0.0211,
      "test_correct": 1314,
      "test_total": 1344,
      "macro_precision": 0.9777,
      "macro_recall": 0.9777,
      "macro_f1": 0.9777,
      "per_class_f1": {
        "barn": 0.9731,
        "bridge": 0.9676,
        "castle": 0.9792,
        "mosque": 0.9792,
        "skyscraper": 0.994,
        "stadium": 0.9791,
        "temple": 0.9668,
        "windmill": 0.9822
      },
      "per_class_recall": {
        "barn": 0.9702,
        "bridge": 0.9762,
        "castle": 0.9821,
        "mosque": 0.9821,
        "skyscraper": 0.994,
        "stadium": 0.9762,
        "temple": 0.9524,
        "windmill": 0.9881
      },
      "checkpoint_comparison": {
        "fine_tuning_swa": {
          "val_accuracy": 0.9836,
          "val_loss": 0.4109,
          "rank": 0
        },
        "fine_tuning": {
          "val_accuracy": 0.9628,
          "val_loss": 0.5655,
          "rank": 1
        },
        "fine_tuning_ema": {
          "val_accuracy": 0.9353,
          "val_loss": 0.6007,
          "rank": 2
        },
        "head_training": {
          "val_accuracy": 0.9234,
          "val_loss": 1.0109,
          "rank": 3
        }
      }
    }
  },
  "version": "v6",
  "license": "apache-2.0",
  "github": "https://github.com/arcxteam/building-architectural-image-classifier",
  "author": {
    "name": "Saugani",
    "email": "team@greyscope.xyz"
  }
}