File size: 5,678 Bytes
f381be8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d3996f2
 
f381be8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d3996f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f381be8
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""
src.utils.config
================
Central project configuration β€” paths, constants, hyperparameters.

Artifact Versioning
-------------------
Trained models, scalers, figures, and result files live under
``artifacts/<version>/`` (e.g. ``artifacts/v1/``, ``artifacts/v2/``).

Use :func:`get_version_paths` and :func:`ensure_version_dirs` to work
with versioned artifact directories consistently.  The module-level
``MODELS_DIR``, ``SCALERS_DIR``, ``FIGURES_DIR`` variables point to the
repository-root artifacts folder and are kept for backward compatibility.
"""

from __future__ import annotations

from pathlib import Path
from typing import Dict

# ── Paths ────────────────────────────────────────────────────────────────────
PROJECT_ROOT = Path(__file__).resolve().parents[2]
DATASET_DIR = PROJECT_ROOT / "cleaned_dataset"
DATA_DIR = DATASET_DIR / "data"
METADATA_PATH = DATASET_DIR / "metadata.csv"
ARTIFACTS_DIR = PROJECT_ROOT / "artifacts"
MODELS_DIR = ARTIFACTS_DIR / "models"
SCALERS_DIR = ARTIFACTS_DIR / "scalers"
FIGURES_DIR = ARTIFACTS_DIR / "figures"
LOGS_DIR = ARTIFACTS_DIR / "logs"

# Currently active artifact version  (changed when v3 is validated)
ACTIVE_VERSION: str = "v3"

# Ensure all legacy artifact directories exist (backward compat)
for _d in (MODELS_DIR, SCALERS_DIR, FIGURES_DIR, LOGS_DIR,
           MODELS_DIR / "classical", MODELS_DIR / "deep", MODELS_DIR / "ensemble"):
    _d.mkdir(parents=True, exist_ok=True)


# ── Artifact versioning helpers ──────────────────────────────────────────────
def get_version_paths(version: str = ACTIVE_VERSION) -> Dict[str, Path]:
    """Return a dict of typed paths for a given artifact version.

    Keys
    ----
    root, models_classical, models_deep, models_ensemble,
    scalers, figures, results, logs

    Example
    -------
    >>> v2 = get_version_paths("v2")
    >>> joblib.dump(model, v2["models_classical"] / "rf.joblib")
    """
    root = ARTIFACTS_DIR / version
    return {
        "root":             root,
        "models_classical": root / "models" / "classical",
        "models_deep":      root / "models" / "deep",
        "models_ensemble":  root / "models" / "ensemble",
        "scalers":          root / "scalers",
        "figures":          root / "figures",
        "results":          root / "results",
        "logs":             root / "logs",
    }


def ensure_version_dirs(version: str = ACTIVE_VERSION) -> Dict[str, Path]:
    """Create all subdirectories for a given version and return paths dict."""
    paths = get_version_paths(version)
    for p in paths.values():
        p.mkdir(parents=True, exist_ok=True)
    return paths

# ── Battery dataset constants ────────────────────────────────────────────────
NOMINAL_CAPACITY_AH = 2.0
EOL_30PCT_AH = 1.4
EOL_20PCT_AH = 1.6
EXCLUDED_BATTERIES = {"B0049", "B0050", "B0051", "B0052"}

# ── Training defaults ────────────────────────────────────────────────────────
RANDOM_STATE = 42
TRAIN_RATIO = 0.8
WINDOW_SIZE = 32
N_BINS = 20               # For fixed-length downsample of within-cycle data
BATCH_SIZE = 32
MAX_EPOCHS = 150
EARLY_STOP_PATIENCE = 20
MC_DROPOUT_SAMPLES = 50   # For uncertainty estimation

# ── Classical ML ─────────────────────────────────────────────────────────────
N_OPTUNA_TRIALS = 100
CV_FOLDS = 5

# ── Deep learning ────────────────────────────────────────────────────────────
LEARNING_RATE = 1e-3
LSTM_HIDDEN = 128
LSTM_LAYERS = 2
TRANSFORMER_D_MODEL = 64
TRANSFORMER_NHEAD = 4
TRANSFORMER_NLAYERS = 2
TRANSFORMER_LAYERS = TRANSFORMER_NLAYERS  # alias for convenience
DROPOUT = 0.2
LATENT_DIM = 16            # For VAE

# ── Feature col lists (duplicated from preprocessing for easy import) ────────
FEATURE_COLS_V2 = [
    "cycle_number", "ambient_temperature",
    "peak_voltage", "min_voltage", "voltage_range",
    "avg_current", "avg_temp", "temp_rise",
    "cycle_duration", "Re", "Rct", "delta_capacity",
]

# v3 adds 6 physics-informed features on top of v2's 12
FEATURE_COLS_V3 = FEATURE_COLS_V2 + [
    "capacity_retention",   # Q_n / Q_1 per battery (0-1 ratio)
    "cumulative_energy",    # cumulative Ah throughput
    "dRe_dn",               # impedance growth rate (Ξ”Re per cycle)
    "dRct_dn",              # impedance growth rate (Ξ”Rct per cycle)
    "soh_rolling_mean",     # 5-cycle rolling mean SOH (smoothed)
    "voltage_slope",        # cycle-over-cycle voltage midpoint slope
]

FEATURE_COLS_SCALAR = [
    "cycle_number", "ambient_temperature",
    "peak_voltage", "min_voltage", "voltage_range",
    "avg_current", "avg_temp", "temp_rise",
    "cycle_duration", "Re", "Rct", "delta_capacity",
]

# ── Visualization ────────────────────────────────────────────────────────────
MATPLOTLIB_STYLE = "seaborn-v0_8-whitegrid"
FIG_DPI = 150
FIG_SIZE = (12, 7)
CMAP_DIVERGING = "RdYlGn"
CMAP_SEQUENTIAL = "viridis"