car-body-classifier / preprocessor_config.json
ryan12345441's picture
Upload folder using huggingface_hub
c31e9ec verified
Raw
History Blame Contribute Delete
5.02 kB
{
"preprocessing_version": "1.0.0",
"model_family": "car_body_shared",
"supported_candidates": [
{
"name": "EfficientNet-B0",
"timm_id": "efficientnet_b0",
"role": "primary_baseline",
"image_size": 224,
"crop_size": 224,
"notes": "Primary baseline. ~5.3M params, ~21 MB FP32. Mild augmentation sufficient."
},
{
"name": "MobileNetV3-Large",
"timm_id": "mobilenetv3_large_100",
"role": "active_candidate",
"image_size": 224,
"crop_size": 224,
"notes": "Lightweight CNN. ~5.5M params, ~22 MB FP32. Shares same preprocessing contract."
},
{
"name": "EfficientNetV2-S",
"timm_id": "tf_efficientnetv2_s",
"role": "active_candidate",
"image_size": 224,
"crop_size": 224,
"notes": "Improved EfficientNet. ~21.5M params, ~86 MB FP32. Within 95 MB limit."
},
{
"name": "ConvNeXt-V2-Atto",
"timm_id": "convnextv2_atto",
"role": "active_candidate",
"image_size": 224,
"crop_size": 224,
"notes": "Hybrid CNN/ViT macro-design. ~3.7M params, ~15 MB FP32. Ideal for small datasets."
},
{
"name": "EfficientNet-B3",
"timm_id": "efficientnet_b3",
"role": "active_candidate",
"image_size": 224,
"crop_size": 224,
"notes": "Larger EfficientNet. ~12M params, ~48 MB FP32. Within 95 MB limit."
},
{
"name": "ResNet-50",
"timm_id": "resnet50",
"role": "fallback",
"fallback_flag_required": true,
"image_size": 224,
"crop_size": 224,
"notes": "Classic CNN baseline. ~25M params, ~100 MB FP32 – exceeds 95 MB; requires quantization or FP16."
},
{
"name": "ConvNeXt-Tiny",
"timm_id": "convnext_tiny",
"role": "fallback",
"fallback_flag_required": true,
"image_size": 224,
"crop_size": 224,
"notes": "~28M params, ~112 MB FP32 – exceeds 95 MB; requires FP16 cast."
},
{
"name": "Swin-Transformer-Tiny",
"timm_id": "swin_tiny_patch4_window7_224",
"role": "fallback",
"fallback_flag_required": true,
"image_size": 224,
"crop_size": 224,
"notes": "~28M params, ~112 MB FP32 – exceeds 95 MB; requires FP16 cast (model.half())."
}
],
"image_size": 224,
"crop_size": 224,
"interpolation": "bicubic",
"interpolation_rationale": "Bicubic uses 4x4 pixel neighborhoods with cubic polynomials, preserving derivative continuity at vehicle edges (roofline, C-pillar, wheel arches). Bilinear blurs these high-frequency spatial features.",
"normalization": {
"type": "imagenet_zscore",
"mean": [0.485, 0.456, 0.406],
"std": [0.229, 0.224, 0.225],
"channels": "RGB",
"rationale": "ImageNet Z-Score normalization conditions the Hessian matrix (kappa(H) ≈ 1), producing a near-spherical loss landscape for stable gradient descent convergence."
},
"augmentation": {
"vertical_flip_enabled": false,
"vertical_flip_rationale": "FORBIDDEN. Creates physically impossible upside-down vehicles. Neural networks use gravity direction as implicit spatial reference; vertical flip destroys the structural hierarchy (wheels-down, roof-up) that distinguishes body types.",
"horizontal_flip": {
"probability": 0.5,
"rationale": "Safe. Aerodynamic symmetry means left/right views are label-invariant. Effectively doubles dataset size at zero semantic cost."
},
"random_resized_crop": {
"scale_min": 0.8,
"scale_max": 1.0,
"aspect_ratio_min": 0.95,
"aspect_ratio_max": 1.05,
"rationale": "Aspect-ratio locked to [0.95, 1.05]. Vehicle body type is defined by length-to-height ratio; extreme distortion can make a Sedan appear as a MICRO or Hatchback."
},
"rotation": {
"max_degrees": 10.0,
"padding_mode": "reflect",
"rationale": "Simulates camera tilt (±10°). Reflection padding avoids zero-pad corner artefacts that create spurious edge features in early conv layers. Rotation >15° risks same label-invariance violation as vertical flip."
},
"color_jitter": {
"brightness": 0.2,
"contrast": 0.2,
"saturation": 0.2,
"hue": 0.1,
"probability": 0.5,
"rationale": "Breaks spurious color correlations: MICRO cars tend to have bright colors; F1/Open-Wheel cars are often red (sponsor liveries). Forces model to learn chassis geometry, not color shortcuts."
}
},
"transform_contract": {
"train": "get_train_transforms() – includes all augmentation above",
"validation": "get_val_transforms() – deterministic: Resize + CenterCrop + ToTensor + Normalize",
"evaluation": "get_eval_transforms() – identical to validation",
"inference_api": "get_inference_transforms() – identical to validation; Phase 5 API imports this"
},
"class_count": 8,
"class_slugs": [
"SUV",
"VAN",
"STATION_WAGON",
"MICRO",
"ACIK_TEKERLEKLI_F1_ARACLARI",
"SEDAN",
"HATCHBACK",
"PICK_UP"
]
}