codealchemist01
/

food-image-classifier-hybrid

+# Food Image Classifier Configuration
+project:
+  name: "food_image_classifier"
+  version: "1.0.0"
+  description: "World-Class Food Image Classifier with Hybrid CNN-ViT Architecture"
+# Hardware Configuration
+hardware:
+  device: "cuda"  # RTX 5060 Laptop GPU
+  mixed_precision: true
+  compile_model: true
+  num_workers: 4
+  pin_memory: true
+# Data Configuration
+data:
+  image_size: 224
+  batch_size: 32  # Reduced to avoid memory issues
+  num_classes: 101  # Food101 dataset: 101 classes, 1000 images per class
+  datasets:
+    - name: "food101"
+      source: "kaggle"
+      path: "data/raw/food101"
+    # Temporarily disabled HuggingFace dataset to use only Food101
+    # - name: "food_images_hf"
+    #   source: "huggingface"
+    #   path: "data/raw/food_images_hf"
+  # Data splits
+  train_ratio: 0.8
+  val_ratio: 0.15
+  test_ratio: 0.05
+  # Augmentation
+  augmentation:
+    horizontal_flip: 0.5
+    rotation: 15
+    color_jitter:
+      brightness: 0.2
+      contrast: 0.2
+      saturation: 0.2
+      hue: 0.1
+    normalize:
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
+# Model Configuration
+model:
+  architecture: "hybrid_cnn_vit"
+  # CNN Branch (ResNet50)
+  cnn:
+    backbone: "resnet50"
+    pretrained: true
+    freeze_early_layers: true
+    dropout: 0.3
+  # ViT Branch (DeiT-Base)
+  vit:
+    model_name: "facebook/deit-base-distilled-patch16-224"
+    pretrained: true
+    freeze_early_layers: true
+    dropout: 0.1
+  # Fusion Module
+  fusion:
+    hidden_dim: 512
+    num_heads: 8
+    dropout: 0.2
+  # Classification Head
+  head:
+    hidden_dims: [1024, 512]
+    dropout: 0.4
+# Training Configuration
+training:
+  epochs: 100  # Increased for comprehensive training with 101k images
+  learning_rate: 1e-4
+  weight_decay: 1e-5
+  # Optimizer
+  optimizer:
+    type: "adamw"
+    betas: [0.9, 0.999]
+    eps: 1e-8
+  # Learning Rate Scheduler
+  scheduler:
+    type: "cosine_annealing_warm_restarts"
+    T_0: 10
+    T_mult: 2
+    eta_min: 1e-6
+  # Loss Function
+  loss:
+    type: "label_smoothing_cross_entropy"
+    smoothing: 0.1
+  # Advanced Training Techniques
+  ema:
+    enabled: true
+    decay: 0.9999
+  gradient_clipping:
+    enabled: true
+    max_norm: 1.0
+  early_stopping:
+    enabled: true
+    patience: 10
+    min_delta: 0.001
+# Evaluation Configuration
+evaluation:
+  metrics:
+    - "accuracy"
+    - "top5_accuracy"
+    - "f1_score"
+    - "precision"
+    - "recall"
+  save_confusion_matrix: true
+  save_classification_report: true
+# Logging Configuration
+logging:
+  tensorboard:
+    enabled: true
+    log_dir: "runs"
+  wandb:
+    enabled: false  # Set to true if you want to use wandb
+    project: "food_classifier"
+  checkpoint:
+    save_best: true
+    save_last: true
+    save_every_n_epochs: 10
+# API Keys (will be loaded from environment)
+api_keys:
+  kaggle_username: "${KAGGLE_USERNAME}"
+  kaggle_key: "${KAGGLE_KEY}"
+  huggingface_token: "${HF_TOKEN}"