Spaces:
Running
Running
File size: 5,678 Bytes
f381be8 d3996f2 f381be8 d3996f2 f381be8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | """
src.utils.config
================
Central project configuration β paths, constants, hyperparameters.
Artifact Versioning
-------------------
Trained models, scalers, figures, and result files live under
``artifacts/<version>/`` (e.g. ``artifacts/v1/``, ``artifacts/v2/``).
Use :func:`get_version_paths` and :func:`ensure_version_dirs` to work
with versioned artifact directories consistently. The module-level
``MODELS_DIR``, ``SCALERS_DIR``, ``FIGURES_DIR`` variables point to the
repository-root artifacts folder and are kept for backward compatibility.
"""
from __future__ import annotations
from pathlib import Path
from typing import Dict
# ββ Paths ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
PROJECT_ROOT = Path(__file__).resolve().parents[2]
DATASET_DIR = PROJECT_ROOT / "cleaned_dataset"
DATA_DIR = DATASET_DIR / "data"
METADATA_PATH = DATASET_DIR / "metadata.csv"
ARTIFACTS_DIR = PROJECT_ROOT / "artifacts"
MODELS_DIR = ARTIFACTS_DIR / "models"
SCALERS_DIR = ARTIFACTS_DIR / "scalers"
FIGURES_DIR = ARTIFACTS_DIR / "figures"
LOGS_DIR = ARTIFACTS_DIR / "logs"
# Currently active artifact version (changed when v3 is validated)
ACTIVE_VERSION: str = "v3"
# Ensure all legacy artifact directories exist (backward compat)
for _d in (MODELS_DIR, SCALERS_DIR, FIGURES_DIR, LOGS_DIR,
MODELS_DIR / "classical", MODELS_DIR / "deep", MODELS_DIR / "ensemble"):
_d.mkdir(parents=True, exist_ok=True)
# ββ Artifact versioning helpers ββββββββββββββββββββββββββββββββββββββββββββββ
def get_version_paths(version: str = ACTIVE_VERSION) -> Dict[str, Path]:
"""Return a dict of typed paths for a given artifact version.
Keys
----
root, models_classical, models_deep, models_ensemble,
scalers, figures, results, logs
Example
-------
>>> v2 = get_version_paths("v2")
>>> joblib.dump(model, v2["models_classical"] / "rf.joblib")
"""
root = ARTIFACTS_DIR / version
return {
"root": root,
"models_classical": root / "models" / "classical",
"models_deep": root / "models" / "deep",
"models_ensemble": root / "models" / "ensemble",
"scalers": root / "scalers",
"figures": root / "figures",
"results": root / "results",
"logs": root / "logs",
}
def ensure_version_dirs(version: str = ACTIVE_VERSION) -> Dict[str, Path]:
"""Create all subdirectories for a given version and return paths dict."""
paths = get_version_paths(version)
for p in paths.values():
p.mkdir(parents=True, exist_ok=True)
return paths
# ββ Battery dataset constants ββββββββββββββββββββββββββββββββββββββββββββββββ
NOMINAL_CAPACITY_AH = 2.0
EOL_30PCT_AH = 1.4
EOL_20PCT_AH = 1.6
EXCLUDED_BATTERIES = {"B0049", "B0050", "B0051", "B0052"}
# ββ Training defaults ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
RANDOM_STATE = 42
TRAIN_RATIO = 0.8
WINDOW_SIZE = 32
N_BINS = 20 # For fixed-length downsample of within-cycle data
BATCH_SIZE = 32
MAX_EPOCHS = 150
EARLY_STOP_PATIENCE = 20
MC_DROPOUT_SAMPLES = 50 # For uncertainty estimation
# ββ Classical ML βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
N_OPTUNA_TRIALS = 100
CV_FOLDS = 5
# ββ Deep learning ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
LEARNING_RATE = 1e-3
LSTM_HIDDEN = 128
LSTM_LAYERS = 2
TRANSFORMER_D_MODEL = 64
TRANSFORMER_NHEAD = 4
TRANSFORMER_NLAYERS = 2
TRANSFORMER_LAYERS = TRANSFORMER_NLAYERS # alias for convenience
DROPOUT = 0.2
LATENT_DIM = 16 # For VAE
# ββ Feature col lists (duplicated from preprocessing for easy import) ββββββββ
FEATURE_COLS_V2 = [
"cycle_number", "ambient_temperature",
"peak_voltage", "min_voltage", "voltage_range",
"avg_current", "avg_temp", "temp_rise",
"cycle_duration", "Re", "Rct", "delta_capacity",
]
# v3 adds 6 physics-informed features on top of v2's 12
FEATURE_COLS_V3 = FEATURE_COLS_V2 + [
"capacity_retention", # Q_n / Q_1 per battery (0-1 ratio)
"cumulative_energy", # cumulative Ah throughput
"dRe_dn", # impedance growth rate (ΞRe per cycle)
"dRct_dn", # impedance growth rate (ΞRct per cycle)
"soh_rolling_mean", # 5-cycle rolling mean SOH (smoothed)
"voltage_slope", # cycle-over-cycle voltage midpoint slope
]
FEATURE_COLS_SCALAR = [
"cycle_number", "ambient_temperature",
"peak_voltage", "min_voltage", "voltage_range",
"avg_current", "avg_temp", "temp_rise",
"cycle_duration", "Re", "Rct", "delta_capacity",
]
# ββ Visualization ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
MATPLOTLIB_STYLE = "seaborn-v0_8-whitegrid"
FIG_DPI = 150
FIG_SIZE = (12, 7)
CMAP_DIVERGING = "RdYlGn"
CMAP_SEQUENTIAL = "viridis"
|