car-body-classifier / preprocessor_config.json

Upload folder using huggingface_hub

c31e9ec verified 8 days ago

5.02 kB

	{
	"preprocessing_version": "1.0.0",
	"model_family": "car_body_shared",
	"supported_candidates": [
	{
	"name": "EfficientNet-B0",
	"timm_id": "efficientnet_b0",
	"role": "primary_baseline",
	"image_size": 224,
	"crop_size": 224,
	"notes": "Primary baseline. ~5.3M params, ~21 MB FP32. Mild augmentation sufficient."
	},
	{
	"name": "MobileNetV3-Large",
	"timm_id": "mobilenetv3_large_100",
	"role": "active_candidate",
	"image_size": 224,
	"crop_size": 224,
	"notes": "Lightweight CNN. ~5.5M params, ~22 MB FP32. Shares same preprocessing contract."
	},
	{
	"name": "EfficientNetV2-S",
	"timm_id": "tf_efficientnetv2_s",
	"role": "active_candidate",
	"image_size": 224,
	"crop_size": 224,
	"notes": "Improved EfficientNet. ~21.5M params, ~86 MB FP32. Within 95 MB limit."
	},
	{
	"name": "ConvNeXt-V2-Atto",
	"timm_id": "convnextv2_atto",
	"role": "active_candidate",
	"image_size": 224,
	"crop_size": 224,
	"notes": "Hybrid CNN/ViT macro-design. ~3.7M params, ~15 MB FP32. Ideal for small datasets."
	},
	{
	"name": "EfficientNet-B3",
	"timm_id": "efficientnet_b3",
	"role": "active_candidate",
	"image_size": 224,
	"crop_size": 224,
	"notes": "Larger EfficientNet. ~12M params, ~48 MB FP32. Within 95 MB limit."
	},
	{
	"name": "ResNet-50",
	"timm_id": "resnet50",
	"role": "fallback",
	"fallback_flag_required": true,
	"image_size": 224,
	"crop_size": 224,
	"notes": "Classic CNN baseline. ~25M params, ~100 MB FP32 – exceeds 95 MB; requires quantization or FP16."
	},
	{
	"name": "ConvNeXt-Tiny",
	"timm_id": "convnext_tiny",
	"role": "fallback",
	"fallback_flag_required": true,
	"image_size": 224,
	"crop_size": 224,
	"notes": "~28M params, ~112 MB FP32 – exceeds 95 MB; requires FP16 cast."
	},
	{
	"name": "Swin-Transformer-Tiny",
	"timm_id": "swin_tiny_patch4_window7_224",
	"role": "fallback",
	"fallback_flag_required": true,
	"image_size": 224,
	"crop_size": 224,
	"notes": "~28M params, ~112 MB FP32 – exceeds 95 MB; requires FP16 cast (model.half())."
	}
	],
	"image_size": 224,
	"crop_size": 224,
	"interpolation": "bicubic",
	"interpolation_rationale": "Bicubic uses 4x4 pixel neighborhoods with cubic polynomials, preserving derivative continuity at vehicle edges (roofline, C-pillar, wheel arches). Bilinear blurs these high-frequency spatial features.",
	"normalization": {
	"type": "imagenet_zscore",
	"mean": [0.485, 0.456, 0.406],
	"std": [0.229, 0.224, 0.225],
	"channels": "RGB",
	"rationale": "ImageNet Z-Score normalization conditions the Hessian matrix (kappa(H) ≈ 1), producing a near-spherical loss landscape for stable gradient descent convergence."
	},
	"augmentation": {
	"vertical_flip_enabled": false,
	"vertical_flip_rationale": "FORBIDDEN. Creates physically impossible upside-down vehicles. Neural networks use gravity direction as implicit spatial reference; vertical flip destroys the structural hierarchy (wheels-down, roof-up) that distinguishes body types.",
	"horizontal_flip": {
	"probability": 0.5,
	"rationale": "Safe. Aerodynamic symmetry means left/right views are label-invariant. Effectively doubles dataset size at zero semantic cost."
	},
	"random_resized_crop": {
	"scale_min": 0.8,
	"scale_max": 1.0,
	"aspect_ratio_min": 0.95,
	"aspect_ratio_max": 1.05,
	"rationale": "Aspect-ratio locked to [0.95, 1.05]. Vehicle body type is defined by length-to-height ratio; extreme distortion can make a Sedan appear as a MICRO or Hatchback."
	},
	"rotation": {
	"max_degrees": 10.0,
	"padding_mode": "reflect",
	"rationale": "Simulates camera tilt (±10°). Reflection padding avoids zero-pad corner artefacts that create spurious edge features in early conv layers. Rotation >15° risks same label-invariance violation as vertical flip."
	},
	"color_jitter": {
	"brightness": 0.2,
	"contrast": 0.2,
	"saturation": 0.2,
	"hue": 0.1,
	"probability": 0.5,
	"rationale": "Breaks spurious color correlations: MICRO cars tend to have bright colors; F1/Open-Wheel cars are often red (sponsor liveries). Forces model to learn chassis geometry, not color shortcuts."
	}
	},
	"transform_contract": {
	"train": "get_train_transforms() – includes all augmentation above",
	"validation": "get_val_transforms() – deterministic: Resize + CenterCrop + ToTensor + Normalize",
	"evaluation": "get_eval_transforms() – identical to validation",
	"inference_api": "get_inference_transforms() – identical to validation; Phase 5 API imports this"
	},
	"class_count": 8,
	"class_slugs": [
	"SUV",
	"VAN",
	"STATION_WAGON",
	"MICRO",
	"ACIK_TEKERLEKLI_F1_ARACLARI",
	"SEDAN",
	"HATCHBACK",
	"PICK_UP"
	]
	}