File size: 5,024 Bytes
c31e9ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
{
  "preprocessing_version": "1.0.0",
  "model_family": "car_body_shared",
  "supported_candidates": [
    {
      "name": "EfficientNet-B0",
      "timm_id": "efficientnet_b0",
      "role": "primary_baseline",
      "image_size": 224,
      "crop_size": 224,
      "notes": "Primary baseline. ~5.3M params, ~21 MB FP32. Mild augmentation sufficient."
    },
    {
      "name": "MobileNetV3-Large",
      "timm_id": "mobilenetv3_large_100",
      "role": "active_candidate",
      "image_size": 224,
      "crop_size": 224,
      "notes": "Lightweight CNN. ~5.5M params, ~22 MB FP32. Shares same preprocessing contract."
    },
    {
      "name": "EfficientNetV2-S",
      "timm_id": "tf_efficientnetv2_s",
      "role": "active_candidate",
      "image_size": 224,
      "crop_size": 224,
      "notes": "Improved EfficientNet. ~21.5M params, ~86 MB FP32. Within 95 MB limit."
    },
    {
      "name": "ConvNeXt-V2-Atto",
      "timm_id": "convnextv2_atto",
      "role": "active_candidate",
      "image_size": 224,
      "crop_size": 224,
      "notes": "Hybrid CNN/ViT macro-design. ~3.7M params, ~15 MB FP32. Ideal for small datasets."
    },
    {
      "name": "EfficientNet-B3",
      "timm_id": "efficientnet_b3",
      "role": "active_candidate",
      "image_size": 224,
      "crop_size": 224,
      "notes": "Larger EfficientNet. ~12M params, ~48 MB FP32. Within 95 MB limit."
    },
    {
      "name": "ResNet-50",
      "timm_id": "resnet50",
      "role": "fallback",
      "fallback_flag_required": true,
      "image_size": 224,
      "crop_size": 224,
      "notes": "Classic CNN baseline. ~25M params, ~100 MB FP32 – exceeds 95 MB; requires quantization or FP16."
    },
    {
      "name": "ConvNeXt-Tiny",
      "timm_id": "convnext_tiny",
      "role": "fallback",
      "fallback_flag_required": true,
      "image_size": 224,
      "crop_size": 224,
      "notes": "~28M params, ~112 MB FP32 – exceeds 95 MB; requires FP16 cast."
    },
    {
      "name": "Swin-Transformer-Tiny",
      "timm_id": "swin_tiny_patch4_window7_224",
      "role": "fallback",
      "fallback_flag_required": true,
      "image_size": 224,
      "crop_size": 224,
      "notes": "~28M params, ~112 MB FP32 – exceeds 95 MB; requires FP16 cast (model.half())."
    }
  ],
  "image_size": 224,
  "crop_size": 224,
  "interpolation": "bicubic",
  "interpolation_rationale": "Bicubic uses 4x4 pixel neighborhoods with cubic polynomials, preserving derivative continuity at vehicle edges (roofline, C-pillar, wheel arches). Bilinear blurs these high-frequency spatial features.",
  "normalization": {
    "type": "imagenet_zscore",
    "mean": [0.485, 0.456, 0.406],
    "std": [0.229, 0.224, 0.225],
    "channels": "RGB",
    "rationale": "ImageNet Z-Score normalization conditions the Hessian matrix (kappa(H) ≈ 1), producing a near-spherical loss landscape for stable gradient descent convergence."
  },
  "augmentation": {
    "vertical_flip_enabled": false,
    "vertical_flip_rationale": "FORBIDDEN. Creates physically impossible upside-down vehicles. Neural networks use gravity direction as implicit spatial reference; vertical flip destroys the structural hierarchy (wheels-down, roof-up) that distinguishes body types.",
    "horizontal_flip": {
      "probability": 0.5,
      "rationale": "Safe. Aerodynamic symmetry means left/right views are label-invariant. Effectively doubles dataset size at zero semantic cost."
    },
    "random_resized_crop": {
      "scale_min": 0.8,
      "scale_max": 1.0,
      "aspect_ratio_min": 0.95,
      "aspect_ratio_max": 1.05,
      "rationale": "Aspect-ratio locked to [0.95, 1.05]. Vehicle body type is defined by length-to-height ratio; extreme distortion can make a Sedan appear as a MICRO or Hatchback."
    },
    "rotation": {
      "max_degrees": 10.0,
      "padding_mode": "reflect",
      "rationale": "Simulates camera tilt (±10°). Reflection padding avoids zero-pad corner artefacts that create spurious edge features in early conv layers. Rotation >15° risks same label-invariance violation as vertical flip."
    },
    "color_jitter": {
      "brightness": 0.2,
      "contrast": 0.2,
      "saturation": 0.2,
      "hue": 0.1,
      "probability": 0.5,
      "rationale": "Breaks spurious color correlations: MICRO cars tend to have bright colors; F1/Open-Wheel cars are often red (sponsor liveries). Forces model to learn chassis geometry, not color shortcuts."
    }
  },
  "transform_contract": {
    "train": "get_train_transforms() – includes all augmentation above",
    "validation": "get_val_transforms() – deterministic: Resize + CenterCrop + ToTensor + Normalize",
    "evaluation": "get_eval_transforms() – identical to validation",
    "inference_api": "get_inference_transforms() – identical to validation; Phase 5 API imports this"
  },
  "class_count": 8,
  "class_slugs": [
    "SUV",
    "VAN",
    "STATION_WAGON",
    "MICRO",
    "ACIK_TEKERLEKLI_F1_ARACLARI",
    "SEDAN",
    "HATCHBACK",
    "PICK_UP"
  ]
}