File size: 4,977 Bytes
0ba6002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f392f42
0ba6002
 
 
 
f392f42
0ba6002
 
 
 
 
 
f392f42
 
 
0ba6002
c61ba70
 
 
 
 
 
 
 
0ba6002
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
Configuration management for the project
"""

import os
from pathlib import Path
from typing import Optional


class Config:
    """
    Central configuration class for the project
    """

    def __init__(self):
        # Project paths
        self.PROJECT_ROOT = Path(__file__).parent.parent.parent
        self.DATA_DIR = self.PROJECT_ROOT / "data"
        self.RAW_DATA_DIR = self.DATA_DIR / "raw"
        self.PROCESSED_DATA_DIR = self.DATA_DIR / "processed"
        self.AUGMENTED_DATA_DIR = self.DATA_DIR / "augmented"
        self.MODELS_DIR = self.DATA_DIR / "models"
        self.LOGS_DIR = self.PROJECT_ROOT / "logs"

        # Create directories if they don't exist
        for directory in [
            self.DATA_DIR,
            self.RAW_DATA_DIR,
            self.PROCESSED_DATA_DIR,
            self.AUGMENTED_DATA_DIR,
            self.MODELS_DIR,
            self.LOGS_DIR,
        ]:
            directory.mkdir(parents=True, exist_ok=True)

        # Image processing settings
        self.TARGET_IMAGE_SIZE = 256  # pixels (256×256)
        self.IMAGE_CHANNELS = 3  # RGB
        self.NORMALIZATION_RANGE = (0, 1)  # Pixel normalization range

        # Data augmentation settings
        self.AUGMENTATION_FACTOR = 5  # Generate 5 variations per image
        self.ROTATION_RANGE = 10  # ±10 degrees
        self.BRIGHTNESS_RANGE = 0.15  # ±15%
        self.ZOOM_RANGE = (0.95, 1.05)  # 95-105%

        # Dataset split settings
        self.TEST_SIZE = 0.2  # 20% for testing
        self.VAL_SIZE = 0.1  # 10% for validation
        self.RANDOM_STATE = 42  # For reproducibility
        self.CV_FOLDS = 5  # Stratified 5-fold cross-validation

        # Deep Learning settings
        self.DL_IMAGE_SIZE = 224  # ResNet50/EfficientNet input
        self.DL_BATCH_SIZE = 8  # Reduced for MPS memory limits with dual backbone
        self.DL_EPOCHS = 100  # More epochs with early stopping
        self.DL_LEARNING_RATE = 1e-4  # Lower LR for fine-tuning backbone
        self.DL_WEIGHT_DECAY = 1e-4
        self.DL_PATIENCE = 15  # Early stopping patience
        self.DL_BACKBONE_FROZEN = True  # Freeze backbone, train only heads (faster)
        self.DL_MODELS_DIR = self.DATA_DIR / "models" / "dl"
        self.DL_EXPANDED_DATA_DIR = self.DATA_DIR / "raw" / "expanded"
        self.DL_EXTERNAL_DATA_DIR = self.DATA_DIR / "raw" / "external"

        # Multi-head model settings (SVDD + classifier heads)
        self.DL_SVDD_EMBEDDING_DIM = 128  # Deep SVDD embedding dimension
        self.DL_HEAD_A_ALPHA = 0.15  # Pokemon classifier (increased: now have non-Pokemon negatives)
        self.DL_HEAD_B_BETA = 0.40   # Back authenticator loss weight
        self.DL_HEAD_C_GAMMA = 0.45  # Front SVDD loss weight (primary mechanism)

        # Training improvements for counterfeit detection
        self.DL_BACK_COUNTERFEIT_WEIGHT = 2.5    # Class weight for counterfeit backs (ratio real/fake: 300/120)
        self.DL_MINORITY_AUGMENT_FACTOR = 2      # Duplication factor for minority class (backs_fake)
        self.DL_CALIBRATION_FBETA = 2.0           # F-beta for threshold calibration (2.0 = recall-weighted)
        self.DL_USE_FOCAL_LOSS = True             # Enable focal loss for Head A/B
        self.DL_FOCAL_GAMMA = 2.0                 # Focal loss gamma (focus on hard examples)
        self.DL_SVDD_CONTRASTIVE_ETA = 1.0        # Weight for contrastive SVDD term (Deep SAD)

        # Ensure DL directories exist
        self.DL_MODELS_DIR.mkdir(parents=True, exist_ok=True)
        self.DL_EXPANDED_DATA_DIR.mkdir(parents=True, exist_ok=True)
        self.DL_EXTERNAL_DATA_DIR.mkdir(parents=True, exist_ok=True)

        # Logging settings
        self.LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
        self.LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
        self.LOG_FILE = self.LOGS_DIR / "cardauth.log"

    def get_dataset_path(self, dataset_type: str = "raw") -> Path:
        """
        Get path to dataset directory

        Args:
            dataset_type: One of 'raw', 'processed', 'augmented'

        Returns:
            Path to the dataset directory
        """
        dataset_map = {
            "raw": self.RAW_DATA_DIR,
            "processed": self.PROCESSED_DATA_DIR,
            "augmented": self.AUGMENTED_DATA_DIR,
        }

        if dataset_type not in dataset_map:
            raise ValueError(
                f"Unknown dataset type: {dataset_type}. "
                f"Choose from: {list(dataset_map.keys())}"
            )

        return dataset_map[dataset_type]

    def get_model_path(self, model_name: str) -> Path:
        """
        Get path to save/load a model

        Args:
            model_name: Name of the model file

        Returns:
            Path to the model file
        """
        return self.MODELS_DIR / model_name

    def __repr__(self) -> str:
        return f"Config(root={self.PROJECT_ROOT})"


# Global config instance
config = Config()