""" Configuration management for the project """ import os from pathlib import Path from typing import Optional class Config: """ Central configuration class for the project """ def __init__(self): # Project paths self.PROJECT_ROOT = Path(__file__).parent.parent.parent self.DATA_DIR = self.PROJECT_ROOT / "data" self.RAW_DATA_DIR = self.DATA_DIR / "raw" self.PROCESSED_DATA_DIR = self.DATA_DIR / "processed" self.AUGMENTED_DATA_DIR = self.DATA_DIR / "augmented" self.MODELS_DIR = self.DATA_DIR / "models" self.LOGS_DIR = self.PROJECT_ROOT / "logs" # Create directories if they don't exist for directory in [ self.DATA_DIR, self.RAW_DATA_DIR, self.PROCESSED_DATA_DIR, self.AUGMENTED_DATA_DIR, self.MODELS_DIR, self.LOGS_DIR, ]: directory.mkdir(parents=True, exist_ok=True) # Image processing settings self.TARGET_IMAGE_SIZE = 256 # pixels (256×256) self.IMAGE_CHANNELS = 3 # RGB self.NORMALIZATION_RANGE = (0, 1) # Pixel normalization range # Data augmentation settings self.AUGMENTATION_FACTOR = 5 # Generate 5 variations per image self.ROTATION_RANGE = 10 # ±10 degrees self.BRIGHTNESS_RANGE = 0.15 # ±15% self.ZOOM_RANGE = (0.95, 1.05) # 95-105% # Dataset split settings self.TEST_SIZE = 0.2 # 20% for testing self.VAL_SIZE = 0.1 # 10% for validation self.RANDOM_STATE = 42 # For reproducibility self.CV_FOLDS = 5 # Stratified 5-fold cross-validation # Deep Learning settings self.DL_IMAGE_SIZE = 224 # ResNet50/EfficientNet input self.DL_BATCH_SIZE = 8 # Reduced for MPS memory limits with dual backbone self.DL_EPOCHS = 100 # More epochs with early stopping self.DL_LEARNING_RATE = 1e-4 # Lower LR for fine-tuning backbone self.DL_WEIGHT_DECAY = 1e-4 self.DL_PATIENCE = 15 # Early stopping patience self.DL_BACKBONE_FROZEN = True # Freeze backbone, train only heads (faster) self.DL_MODELS_DIR = self.DATA_DIR / "models" / "dl" self.DL_EXPANDED_DATA_DIR = self.DATA_DIR / "raw" / "expanded" self.DL_EXTERNAL_DATA_DIR = self.DATA_DIR / "raw" / "external" # Multi-head model settings (SVDD + classifier heads) self.DL_SVDD_EMBEDDING_DIM = 128 # Deep SVDD embedding dimension self.DL_HEAD_A_ALPHA = 0.15 # Pokemon classifier (increased: now have non-Pokemon negatives) self.DL_HEAD_B_BETA = 0.40 # Back authenticator loss weight self.DL_HEAD_C_GAMMA = 0.45 # Front SVDD loss weight (primary mechanism) # Training improvements for counterfeit detection self.DL_BACK_COUNTERFEIT_WEIGHT = 2.5 # Class weight for counterfeit backs (ratio real/fake: 300/120) self.DL_MINORITY_AUGMENT_FACTOR = 2 # Duplication factor for minority class (backs_fake) self.DL_CALIBRATION_FBETA = 2.0 # F-beta for threshold calibration (2.0 = recall-weighted) self.DL_USE_FOCAL_LOSS = True # Enable focal loss for Head A/B self.DL_FOCAL_GAMMA = 2.0 # Focal loss gamma (focus on hard examples) self.DL_SVDD_CONTRASTIVE_ETA = 1.0 # Weight for contrastive SVDD term (Deep SAD) # Ensure DL directories exist self.DL_MODELS_DIR.mkdir(parents=True, exist_ok=True) self.DL_EXPANDED_DATA_DIR.mkdir(parents=True, exist_ok=True) self.DL_EXTERNAL_DATA_DIR.mkdir(parents=True, exist_ok=True) # Logging settings self.LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") self.LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" self.LOG_FILE = self.LOGS_DIR / "cardauth.log" def get_dataset_path(self, dataset_type: str = "raw") -> Path: """ Get path to dataset directory Args: dataset_type: One of 'raw', 'processed', 'augmented' Returns: Path to the dataset directory """ dataset_map = { "raw": self.RAW_DATA_DIR, "processed": self.PROCESSED_DATA_DIR, "augmented": self.AUGMENTED_DATA_DIR, } if dataset_type not in dataset_map: raise ValueError( f"Unknown dataset type: {dataset_type}. " f"Choose from: {list(dataset_map.keys())}" ) return dataset_map[dataset_type] def get_model_path(self, model_name: str) -> Path: """ Get path to save/load a model Args: model_name: Name of the model file Returns: Path to the model file """ return self.MODELS_DIR / model_name def __repr__(self) -> str: return f"Config(root={self.PROJECT_ROOT})" # Global config instance config = Config()