""" General project settings: visualisation, paths, constants """ import warnings import matplotlib.pyplot as plt import seaborn as sns from pathlib import Path from typing import Dict, Any, Optional import yaml import json import os # ============================================================================ # PATHS AND DIRECTORIES # ============================================================================ PROJECT_ROOT = Path(__file__).parent.parent.parent DATA_DIR = PROJECT_ROOT / "data" RAW_DATA_DIR = DATA_DIR / "raw" PROCESSED_DATA_DIR = DATA_DIR / "processed" EXTERNAL_DATA_DIR = DATA_DIR / "external" RESULTS_DIR = PROJECT_ROOT / "results" PLOTS_DIR = RESULTS_DIR / "plots" MODELS_DIR = RESULTS_DIR / "models" REPORTS_DIR = RESULTS_DIR / "reports" LOGS_DIR = RESULTS_DIR / "logs" CONFIGS_DIR = PROJECT_ROOT / "configs" NOTEBOOKS_DIR = PROJECT_ROOT / "notebooks" TESTS_DIR = PROJECT_ROOT / "tests" # Create directories on import for directory in [RAW_DATA_DIR, PROCESSED_DATA_DIR, EXTERNAL_DATA_DIR, PLOTS_DIR, MODELS_DIR, REPORTS_DIR, LOGS_DIR]: directory.mkdir(parents=True, exist_ok=True) # ============================================================================ # VISUALISATION SETTINGS # ============================================================================ def setup_visualization( style: str = "seaborn-whitegrid", palette: str = "husl", context: str = "notebook", font_scale: float = 1.0, dpi: int = 150, figsize: tuple = (12, 6), **kwargs ): """ Configure visualisation parameters for matplotlib and seaborn Parameters: ----------- style : str Matplotlib style: 'seaborn-whitegrid', 'ggplot', 'bmh', 'dark_background' palette : str Seaborn palette: 'husl', 'Set2', 'viridis', 'mako' context : str Seaborn context: 'paper', 'notebook', 'talk', 'poster' font_scale : float Font scale dpi : int Plot resolution figsize : tuple Default figure size """ # Ignore warnings warnings.filterwarnings('ignore') # Matplotlib settings plt.style.use(style) # RC parameters rc_params = { 'font.size': 10, 'figure.figsize': figsize, 'figure.dpi': dpi, 'savefig.dpi': 300, 'savefig.bbox': 'tight', 'savefig.format': 'png', 'axes.titlesize': 12, 'axes.labelsize': 10, 'xtick.labelsize': 9, 'ytick.labelsize': 9, 'legend.fontsize': 9, 'font.family': ['DejaVu Sans', 'Arial', 'sans-serif'], 'figure.titlesize': 14, 'axes.grid': True, 'grid.alpha': 0.3, 'lines.linewidth': 1.5, 'lines.markersize': 6, 'patch.edgecolor': 'black', 'patch.force_edgecolor': True, 'xtick.top': False, 'ytick.right': False, 'axes.spines.top': False, 'axes.spines.right': False } # Update additional parameters rc_params.update(kwargs) plt.rcParams.update(rc_params) # Seaborn settings sns.set_style(style.replace('seaborn-', '')) sns.set_palette(palette) sns.set_context(context, font_scale=font_scale) print(f"✓ Visualisation settings applied: style={style}, palette={palette}") def get_color_palette(name: str = "husl", n_colors: int = 8) -> list: """ Get colour palette Parameters: ----------- name : str Palette name n_colors : int Number of colours Returns: -------- list List of colours in HEX format """ palette_map = { "husl": sns.color_palette("husl", n_colors), "Set2": sns.color_palette("Set2", n_colors), "Set3": sns.color_palette("Set3", n_colors), "viridis": sns.color_palette("viridis", n_colors), "plasma": sns.color_palette("plasma", n_colors), "coolwarm": sns.color_palette("coolwarm", n_colors), "RdYlBu": sns.color_palette("RdYlBu", n_colors), "Spectral": sns.color_palette("Spectral", n_colors), "tab10": sns.color_palette("tab10", n_colors), "tab20": sns.color_palette("tab20", n_colors), } palette = palette_map.get(name, sns.color_palette("husl", n_colors)) return [f"#{int(r*255):02x}{int(g*255):02x}{int(b*255):02x}" for r, g, b in palette] # ============================================================================ # CONSTANTS # ============================================================================ # Data types DATETIME_FORMATS = [ "%Y-%m-%d", "%Y/%m/%d", "%d.%m.%Y", "%d/%m/%Y", "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S", "%d.%m.%Y %H:%M:%S", "%d/%m/%Y %H:%M:%S" ] # Metrics METRICS = { "regression": ["mse", "rmse", "mae", "mape", "r2", "explained_variance"], "classification": ["accuracy", "precision", "recall", "f1", "roc_auc"] } # Statistical constants STATS_CONSTANTS = { "confidence_levels": [0.9, 0.95, 0.99], "z_scores": {0.9: 1.645, 0.95: 1.96, 0.99: 2.576}, "outlier_multipliers": {"mild": 1.5, "extreme": 3.0} } # Time series parameters TIME_SERIES_CONSTANTS = { "frequencies": { "H": "hourly", "D": "daily", "W": "weekly", "M": "monthly", "Q": "quarterly", "Y": "yearly" }, "seasonal_periods": { "hourly": 24, "daily": 7, "weekly": 52, "monthly": 12, "quarterly": 4, "yearly": 1 } } # ============================================================================ # CONFIGURATION UTILITIES # ============================================================================ def load_config(config_path: Optional[str] = None) -> Dict[str, Any]: """ Load configuration from file Parameters: ----------- config_path : str, optional Path to configuration file Returns: -------- Dict[str, Any] Configuration dictionary """ if config_path is None: config_path = CONFIGS_DIR / "default_config.json" config_path = Path(config_path) if not config_path.exists(): print(f"⚠ Configuration file not found: {config_path}") return {} # Determine file format if config_path.suffix.lower() in ['.json']: with open(config_path, 'r', encoding='utf-8') as f: config = json.load(f) elif config_path.suffix.lower() in ['.yaml', '.yml']: with open(config_path, 'r', encoding='utf-8') as f: config = yaml.safe_load(f) else: raise ValueError(f"Unsupported file format: {config_path.suffix}") print(f"✓ Configuration loaded from: {config_path}") return config def save_config(config: Dict[str, Any], config_path: str) -> None: """ Save configuration to file Parameters: ----------- config : Dict[str, Any] Configuration to save config_path : str Save path """ config_path = Path(config_path) config_path.parent.mkdir(parents=True, exist_ok=True) # Determine format if config_path.suffix.lower() in ['.json']: with open(config_path, 'w', encoding='utf-8') as f: json.dump(config, f, indent=2, ensure_ascii=False) elif config_path.suffix.lower() in ['.yaml', '.yml']: with open(config_path, 'w', encoding='utf-8') as f: yaml.dump(config, f, default_flow_style=False, allow_unicode=True) else: raise ValueError(f"Unsupported file format: {config_path.suffix}") print(f"✓ Configuration saved to: {config_path}") def merge_configs(base_config: Dict[str, Any], override_config: Dict[str, Any]) -> Dict[str, Any]: """ Recursive configuration merging Parameters: ----------- base_config : Dict[str, Any] Base configuration override_config : Dict[str, Any] Override configuration Returns: -------- Dict[str, Any] Merged configuration """ result = base_config.copy() for key, value in override_config.items(): if (key in result and isinstance(result[key], dict) and isinstance(value, dict)): result[key] = merge_configs(result[key], value) else: result[key] = value return result # ============================================================================ # ENVIRONMENT SETUP # ============================================================================ def setup_environment( log_level: str = "INFO", random_seed: int = 42, enable_warnings: bool = False, memory_limit_gb: Optional[int] = None ) -> None: """ Set up environment for reproducibility Parameters: ----------- log_level : str Logging level random_seed : int Seed for random generators enable_warnings : bool Enable warnings memory_limit_gb : int, optional Memory limit in GB """ import numpy as np import random import torch import tensorflow as tf # Set seeds np.random.seed(random_seed) random.seed(random_seed) try: torch.manual_seed(random_seed) except: pass try: tf.random.set_seed(random_seed) except: pass # Configure warnings if enable_warnings: warnings.filterwarnings('default') else: warnings.filterwarnings('ignore') # Memory limit (if specified) if memory_limit_gb: import resource soft, hard = resource.getrlimit(resource.RLIMIT_AS) memory_limit = memory_limit_gb * 1024**3 # GB to bytes resource.setrlimit(resource.RLIMIT_AS, (memory_limit, hard)) print(f"✓ Memory limit set: {memory_limit_gb} GB") print(f"✓ Environment configured. Random seed: {random_seed}") # ============================================================================ # AUTOMATIC SETUP ON IMPORT # ============================================================================ # Automatically apply visualisation settings setup_visualization() # Export useful variables __all__ = [ 'setup_visualization', 'get_color_palette', 'load_config', 'save_config', 'merge_configs', 'setup_environment', 'PROJECT_ROOT', 'DATA_DIR', 'RAW_DATA_DIR', 'PROCESSED_DATA_DIR', 'RESULTS_DIR', 'PLOTS_DIR', 'DATETIME_FORMATS', 'METRICS', 'STATS_CONSTANTS', 'TIME_SERIES_CONSTANTS' ]