| # Food Image Classifier Configuration | |
| project: | |
| name: "food_image_classifier" | |
| version: "1.0.0" | |
| description: "World-Class Food Image Classifier with Hybrid CNN-ViT Architecture" | |
| # Hardware Configuration | |
| hardware: | |
| device: "cuda" # RTX 5060 Laptop GPU | |
| mixed_precision: true | |
| compile_model: true | |
| num_workers: 4 | |
| pin_memory: true | |
| # Data Configuration | |
| data: | |
| image_size: 224 | |
| batch_size: 32 # Reduced to avoid memory issues | |
| num_classes: 101 # Food101 dataset: 101 classes, 1000 images per class | |
| datasets: | |
| - name: "food101" | |
| source: "kaggle" | |
| path: "data/raw/food101" | |
| # Temporarily disabled HuggingFace dataset to use only Food101 | |
| # - name: "food_images_hf" | |
| # source: "huggingface" | |
| # path: "data/raw/food_images_hf" | |
| # Data splits | |
| train_ratio: 0.8 | |
| val_ratio: 0.15 | |
| test_ratio: 0.05 | |
| # Augmentation | |
| augmentation: | |
| horizontal_flip: 0.5 | |
| rotation: 15 | |
| color_jitter: | |
| brightness: 0.2 | |
| contrast: 0.2 | |
| saturation: 0.2 | |
| hue: 0.1 | |
| normalize: | |
| mean: [0.485, 0.456, 0.406] | |
| std: [0.229, 0.224, 0.225] | |
| # Model Configuration | |
| model: | |
| architecture: "hybrid_cnn_vit" | |
| # CNN Branch (ResNet50) | |
| cnn: | |
| backbone: "resnet50" | |
| pretrained: true | |
| freeze_early_layers: true | |
| dropout: 0.3 | |
| # ViT Branch (DeiT-Base) | |
| vit: | |
| model_name: "facebook/deit-base-distilled-patch16-224" | |
| pretrained: true | |
| freeze_early_layers: true | |
| dropout: 0.1 | |
| # Fusion Module | |
| fusion: | |
| hidden_dim: 512 | |
| num_heads: 8 | |
| dropout: 0.2 | |
| # Classification Head | |
| head: | |
| hidden_dims: [1024, 512] | |
| dropout: 0.4 | |
| # Training Configuration | |
| training: | |
| epochs: 100 # Increased for comprehensive training with 101k images | |
| learning_rate: 1e-4 | |
| weight_decay: 1e-5 | |
| # Optimizer | |
| optimizer: | |
| type: "adamw" | |
| betas: [0.9, 0.999] | |
| eps: 1e-8 | |
| # Learning Rate Scheduler | |
| scheduler: | |
| type: "cosine_annealing_warm_restarts" | |
| T_0: 10 | |
| T_mult: 2 | |
| eta_min: 1e-6 | |
| # Loss Function | |
| loss: | |
| type: "label_smoothing_cross_entropy" | |
| smoothing: 0.1 | |
| # Advanced Training Techniques | |
| ema: | |
| enabled: true | |
| decay: 0.9999 | |
| gradient_clipping: | |
| enabled: true | |
| max_norm: 1.0 | |
| early_stopping: | |
| enabled: true | |
| patience: 10 | |
| min_delta: 0.001 | |
| # Evaluation Configuration | |
| evaluation: | |
| metrics: | |
| - "accuracy" | |
| - "top5_accuracy" | |
| - "f1_score" | |
| - "precision" | |
| - "recall" | |
| save_confusion_matrix: true | |
| save_classification_report: true | |
| # Logging Configuration | |
| logging: | |
| tensorboard: | |
| enabled: true | |
| log_dir: "runs" | |
| wandb: | |
| enabled: false # Set to true if you want to use wandb | |
| project: "food_classifier" | |
| checkpoint: | |
| save_best: true | |
| save_last: true | |
| save_every_n_epochs: 10 | |
| # API Keys (will be loaded from environment) | |
| api_keys: | |
| kaggle_username: "${KAGGLE_USERNAME}" | |
| kaggle_key: "${KAGGLE_KEY}" | |
| huggingface_token: "${HF_TOKEN}" |