{ "model_type": "efficientnetv2-s", "base_model": "efficientnetv2-s", "task": "image-classification", "num_labels": 8, "id2label": { "0": "barn", "1": "bridge", "2": "castle", "3": "mosque", "4": "skyscraper", "5": "stadium", "6": "temple", "7": "windmill" }, "label2id": { "barn": 0, "bridge": 1, "castle": 2, "mosque": 3, "skyscraper": 4, "stadium": 5, "temple": 6, "windmill": 7 }, "input_shape": [ 320, 320, 3 ], "preprocessing": { "mode": "efficientnet_v2_preprocess_input", "mean": [ 0.0, 0.0, 0.0 ], "std": [ 1.0, 1.0, 1.0 ], "note": "preprocess_input is identity in TF 2.12+; EfficientNetV2-S includes internal Rescaling layer. Input expects raw [0, 255] float32.", "channel_order": "RGB" }, "training_info": { "dataset_source": "Pexels Architectural Buildings (13440 images, 8 classes, balanced)", "dataset_size": 13440, "split_ratio": [ 0.8, 0.1, 0.1 ], "split_seed": 42, "phases": [ { "name": "Fase 1 - Feature Extraction (Head)", "optimizer": "AdamW", "lr": 0.001, "epochs_max": 25, "epochs_actual": 1, "val_accuracy": 0.9234, "val_loss": 1.0109, "train_accuracy": 0.5696, "cutmix_mixup": true, "label_smoothing": 0.1, "early_stop_reason": "myCallback val_acc >= 0.85" }, { "name": "Fase 2 - Selective Fine-Tuning", "optimizer": "DiscriminativeAdamW", "lr": 0.0003, "epochs_max": 50, "epochs_actual": 1, "val_accuracy": 0.9628, "val_loss": 0.5655, "train_accuracy": 0.8496, "unfreeze": "block6+top_conv (BN frozen)", "discriminative_lr": { "block6": 0.1 }, "cutmix_mixup": false, "label_smoothing": 0.05, "early_stop_reason": "myCallback val_acc >= 0.92" }, { "name": "SWA Post-Training", "epochs": 10, "lr": 0.0001, "bn_update": true, "bn_update_steps": 100, "val_accuracy": 0.9836, "val_loss": 0.4109, "method": "Izmailov et al., UAI 2018" } ], "metrics": { "train_accuracy": 0.9988, "val_accuracy": 0.9836, "test_accuracy": 0.9777, "test_loss": 0.4262, "tta_accuracy": 0.9799, "overfitting_gap": 0.0211, "test_correct": 1314, "test_total": 1344, "macro_precision": 0.9777, "macro_recall": 0.9777, "macro_f1": 0.9777, "per_class_f1": { "barn": 0.9731, "bridge": 0.9676, "castle": 0.9792, "mosque": 0.9792, "skyscraper": 0.994, "stadium": 0.9791, "temple": 0.9668, "windmill": 0.9822 }, "per_class_recall": { "barn": 0.9702, "bridge": 0.9762, "castle": 0.9821, "mosque": 0.9821, "skyscraper": 0.994, "stadium": 0.9762, "temple": 0.9524, "windmill": 0.9881 }, "checkpoint_comparison": { "fine_tuning_swa": { "val_accuracy": 0.9836, "val_loss": 0.4109, "rank": 0 }, "fine_tuning": { "val_accuracy": 0.9628, "val_loss": 0.5655, "rank": 1 }, "fine_tuning_ema": { "val_accuracy": 0.9353, "val_loss": 0.6007, "rank": 2 }, "head_training": { "val_accuracy": 0.9234, "val_loss": 1.0109, "rank": 3 } } } }, "version": "v6", "license": "apache-2.0", "github": "https://github.com/arcxteam/building-architectural-image-classifier", "author": { "name": "Saugani", "email": "team@greyscope.xyz" } }