File size: 3,862 Bytes
202ce04
83bee53
 
202ce04
0fe9a57
202ce04
0fe9a57
 
 
 
 
 
 
 
202ce04
 
0fe9a57
 
 
 
 
 
 
 
202ce04
0fe9a57
 
 
 
 
202ce04
735a201
0fe9a57
735a201
 
 
0fe9a57
 
735a201
 
 
0fe9a57
735a201
0fe9a57
202ce04
 
735a201
0fe9a57
 
 
 
 
 
202ce04
 
 
735a201
202ce04
 
 
 
735a201
 
 
202ce04
 
 
 
 
735a201
202ce04
 
 
735a201
 
 
 
 
0fe9a57
735a201
0fe9a57
202ce04
 
 
 
 
 
0fe9a57
202ce04
 
 
735a201
 
202ce04
 
 
 
0fe9a57
735a201
 
 
 
 
 
0fe9a57
735a201
 
 
202ce04
735a201
 
 
 
 
 
0fe9a57
735a201
202ce04
 
0fe9a57
735a201
202ce04
 
735a201
 
0fe9a57
735a201
202ce04
 
735a201
 
 
 
 
 
 
 
0fe9a57
 
735a201
 
 
0fe9a57
 
735a201
 
 
0fe9a57
 
202ce04
 
 
0fe9a57
5de52cc
735a201
202ce04
9a41f78
5de52cc
 
0fe9a57
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
{
  "model_type": "efficientnetv2-s",
  "base_model": "efficientnetv2-s",
  "task": "image-classification",
  "num_labels": 8,
  "id2label": {
    "0": "barn",
    "1": "bridge",
    "2": "castle",
    "3": "mosque",
    "4": "skyscraper",
    "5": "stadium",
    "6": "temple",
    "7": "windmill"
  },
  "label2id": {
    "barn": 0,
    "bridge": 1,
    "castle": 2,
    "mosque": 3,
    "skyscraper": 4,
    "stadium": 5,
    "temple": 6,
    "windmill": 7
  },
  "input_shape": [
    320,
    320,
    3
  ],
  "preprocessing": {
    "mode": "efficientnet_v2_preprocess_input",
    "mean": [
      0.0,
      0.0,
      0.0
    ],
    "std": [
      1.0,
      1.0,
      1.0
    ],
    "note": "preprocess_input is identity in TF 2.12+; EfficientNetV2-S includes internal Rescaling layer. Input expects raw [0, 255] float32.",
    "channel_order": "RGB"
  },
  "training_info": {
    "dataset_source": "Pexels Architectural Buildings (13440 images, 8 classes, balanced)",
    "dataset_size": 13440,
    "split_ratio": [
      0.8,
      0.1,
      0.1
    ],
    "split_seed": 42,
    "phases": [
      {
        "name": "Fase 1 - Feature Extraction (Head)",
        "optimizer": "AdamW",
        "lr": 0.001,
        "epochs_max": 25,
        "epochs_actual": 1,
        "val_accuracy": 0.9234,
        "val_loss": 1.0109,
        "train_accuracy": 0.5696,
        "cutmix_mixup": true,
        "label_smoothing": 0.1,
        "early_stop_reason": "myCallback val_acc >= 0.85"
      },
      {
        "name": "Fase 2 - Selective Fine-Tuning",
        "optimizer": "DiscriminativeAdamW",
        "lr": 0.0003,
        "epochs_max": 50,
        "epochs_actual": 1,
        "val_accuracy": 0.9628,
        "val_loss": 0.5655,
        "train_accuracy": 0.8496,
        "unfreeze": "block6+top_conv (BN frozen)",
        "discriminative_lr": {
          "block6": 0.1
        },
        "cutmix_mixup": false,
        "label_smoothing": 0.05,
        "early_stop_reason": "myCallback val_acc >= 0.92"
      },
      {
        "name": "SWA Post-Training",
        "epochs": 10,
        "lr": 0.0001,
        "bn_update": true,
        "bn_update_steps": 100,
        "val_accuracy": 0.9836,
        "val_loss": 0.4109,
        "method": "Izmailov et al., UAI 2018"
      }
    ],
    "metrics": {
      "train_accuracy": 0.9988,
      "val_accuracy": 0.9836,
      "test_accuracy": 0.9777,
      "test_loss": 0.4262,
      "tta_accuracy": 0.9799,
      "overfitting_gap": 0.0211,
      "test_correct": 1314,
      "test_total": 1344,
      "macro_precision": 0.9777,
      "macro_recall": 0.9777,
      "macro_f1": 0.9777,
      "per_class_f1": {
        "barn": 0.9731,
        "bridge": 0.9676,
        "castle": 0.9792,
        "mosque": 0.9792,
        "skyscraper": 0.994,
        "stadium": 0.9791,
        "temple": 0.9668,
        "windmill": 0.9822
      },
      "per_class_recall": {
        "barn": 0.9702,
        "bridge": 0.9762,
        "castle": 0.9821,
        "mosque": 0.9821,
        "skyscraper": 0.994,
        "stadium": 0.9762,
        "temple": 0.9524,
        "windmill": 0.9881
      },
      "checkpoint_comparison": {
        "fine_tuning_swa": {
          "val_accuracy": 0.9836,
          "val_loss": 0.4109,
          "rank": 0
        },
        "fine_tuning": {
          "val_accuracy": 0.9628,
          "val_loss": 0.5655,
          "rank": 1
        },
        "fine_tuning_ema": {
          "val_accuracy": 0.9353,
          "val_loss": 0.6007,
          "rank": 2
        },
        "head_training": {
          "val_accuracy": 0.9234,
          "val_loss": 1.0109,
          "rank": 3
        }
      }
    }
  },
  "version": "v6",
  "license": "apache-2.0",
  "github": "https://github.com/arcxteam/building-architectural-image-classifier",
  "author": {
    "name": "Saugani",
    "email": "team@greyscope.xyz"
  }
}