Spaces:
Starting
Starting
| """ | |
| Configuration management for the project | |
| """ | |
| import os | |
| from pathlib import Path | |
| from typing import Optional | |
| class Config: | |
| """ | |
| Central configuration class for the project | |
| """ | |
| def __init__(self): | |
| # Project paths | |
| self.PROJECT_ROOT = Path(__file__).parent.parent.parent | |
| self.DATA_DIR = self.PROJECT_ROOT / "data" | |
| self.RAW_DATA_DIR = self.DATA_DIR / "raw" | |
| self.PROCESSED_DATA_DIR = self.DATA_DIR / "processed" | |
| self.AUGMENTED_DATA_DIR = self.DATA_DIR / "augmented" | |
| self.MODELS_DIR = self.DATA_DIR / "models" | |
| self.LOGS_DIR = self.PROJECT_ROOT / "logs" | |
| # Create directories if they don't exist | |
| for directory in [ | |
| self.DATA_DIR, | |
| self.RAW_DATA_DIR, | |
| self.PROCESSED_DATA_DIR, | |
| self.AUGMENTED_DATA_DIR, | |
| self.MODELS_DIR, | |
| self.LOGS_DIR, | |
| ]: | |
| directory.mkdir(parents=True, exist_ok=True) | |
| # Image processing settings | |
| self.TARGET_IMAGE_SIZE = 256 # pixels (256×256) | |
| self.IMAGE_CHANNELS = 3 # RGB | |
| self.NORMALIZATION_RANGE = (0, 1) # Pixel normalization range | |
| # Data augmentation settings | |
| self.AUGMENTATION_FACTOR = 5 # Generate 5 variations per image | |
| self.ROTATION_RANGE = 10 # ±10 degrees | |
| self.BRIGHTNESS_RANGE = 0.15 # ±15% | |
| self.ZOOM_RANGE = (0.95, 1.05) # 95-105% | |
| # Dataset split settings | |
| self.TEST_SIZE = 0.2 # 20% for testing | |
| self.VAL_SIZE = 0.1 # 10% for validation | |
| self.RANDOM_STATE = 42 # For reproducibility | |
| self.CV_FOLDS = 5 # Stratified 5-fold cross-validation | |
| # Deep Learning settings | |
| self.DL_IMAGE_SIZE = 224 # ResNet50/EfficientNet input | |
| self.DL_BATCH_SIZE = 8 # Reduced for MPS memory limits with dual backbone | |
| self.DL_EPOCHS = 100 # More epochs with early stopping | |
| self.DL_LEARNING_RATE = 1e-4 # Lower LR for fine-tuning backbone | |
| self.DL_WEIGHT_DECAY = 1e-4 | |
| self.DL_PATIENCE = 15 # Early stopping patience | |
| self.DL_BACKBONE_FROZEN = True # Freeze backbone, train only heads (faster) | |
| self.DL_MODELS_DIR = self.DATA_DIR / "models" / "dl" | |
| self.DL_EXPANDED_DATA_DIR = self.DATA_DIR / "raw" / "expanded" | |
| self.DL_EXTERNAL_DATA_DIR = self.DATA_DIR / "raw" / "external" | |
| # Multi-head model settings (SVDD + classifier heads) | |
| self.DL_SVDD_EMBEDDING_DIM = 128 # Deep SVDD embedding dimension | |
| self.DL_HEAD_A_ALPHA = 0.15 # Pokemon classifier (increased: now have non-Pokemon negatives) | |
| self.DL_HEAD_B_BETA = 0.40 # Back authenticator loss weight | |
| self.DL_HEAD_C_GAMMA = 0.45 # Front SVDD loss weight (primary mechanism) | |
| # Training improvements for counterfeit detection | |
| self.DL_BACK_COUNTERFEIT_WEIGHT = 2.5 # Class weight for counterfeit backs (ratio real/fake: 300/120) | |
| self.DL_MINORITY_AUGMENT_FACTOR = 2 # Duplication factor for minority class (backs_fake) | |
| self.DL_CALIBRATION_FBETA = 2.0 # F-beta for threshold calibration (2.0 = recall-weighted) | |
| self.DL_USE_FOCAL_LOSS = True # Enable focal loss for Head A/B | |
| self.DL_FOCAL_GAMMA = 2.0 # Focal loss gamma (focus on hard examples) | |
| self.DL_SVDD_CONTRASTIVE_ETA = 1.0 # Weight for contrastive SVDD term (Deep SAD) | |
| # Ensure DL directories exist | |
| self.DL_MODELS_DIR.mkdir(parents=True, exist_ok=True) | |
| self.DL_EXPANDED_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
| self.DL_EXTERNAL_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
| # Logging settings | |
| self.LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") | |
| self.LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" | |
| self.LOG_FILE = self.LOGS_DIR / "cardauth.log" | |
| def get_dataset_path(self, dataset_type: str = "raw") -> Path: | |
| """ | |
| Get path to dataset directory | |
| Args: | |
| dataset_type: One of 'raw', 'processed', 'augmented' | |
| Returns: | |
| Path to the dataset directory | |
| """ | |
| dataset_map = { | |
| "raw": self.RAW_DATA_DIR, | |
| "processed": self.PROCESSED_DATA_DIR, | |
| "augmented": self.AUGMENTED_DATA_DIR, | |
| } | |
| if dataset_type not in dataset_map: | |
| raise ValueError( | |
| f"Unknown dataset type: {dataset_type}. " | |
| f"Choose from: {list(dataset_map.keys())}" | |
| ) | |
| return dataset_map[dataset_type] | |
| def get_model_path(self, model_name: str) -> Path: | |
| """ | |
| Get path to save/load a model | |
| Args: | |
| model_name: Name of the model file | |
| Returns: | |
| Path to the model file | |
| """ | |
| return self.MODELS_DIR / model_name | |
| def __repr__(self) -> str: | |
| return f"Config(root={self.PROJECT_ROOT})" | |
| # Global config instance | |
| config = Config() | |