Spaces:
Sleeping
Sleeping
| """ | |
| General project settings: visualisation, paths, constants | |
| """ | |
| import warnings | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from pathlib import Path | |
| from typing import Dict, Any, Optional | |
| import yaml | |
| import json | |
| import os | |
| # ============================================================================ | |
| # PATHS AND DIRECTORIES | |
| # ============================================================================ | |
| PROJECT_ROOT = Path(__file__).parent.parent.parent | |
| DATA_DIR = PROJECT_ROOT / "data" | |
| RAW_DATA_DIR = DATA_DIR / "raw" | |
| PROCESSED_DATA_DIR = DATA_DIR / "processed" | |
| EXTERNAL_DATA_DIR = DATA_DIR / "external" | |
| RESULTS_DIR = PROJECT_ROOT / "results" | |
| PLOTS_DIR = RESULTS_DIR / "plots" | |
| MODELS_DIR = RESULTS_DIR / "models" | |
| REPORTS_DIR = RESULTS_DIR / "reports" | |
| LOGS_DIR = RESULTS_DIR / "logs" | |
| CONFIGS_DIR = PROJECT_ROOT / "configs" | |
| NOTEBOOKS_DIR = PROJECT_ROOT / "notebooks" | |
| TESTS_DIR = PROJECT_ROOT / "tests" | |
| # Create directories on import | |
| for directory in [RAW_DATA_DIR, PROCESSED_DATA_DIR, EXTERNAL_DATA_DIR, | |
| PLOTS_DIR, MODELS_DIR, REPORTS_DIR, LOGS_DIR]: | |
| directory.mkdir(parents=True, exist_ok=True) | |
| # ============================================================================ | |
| # VISUALISATION SETTINGS | |
| # ============================================================================ | |
| def setup_visualization( | |
| style: str = "seaborn-whitegrid", | |
| palette: str = "husl", | |
| context: str = "notebook", | |
| font_scale: float = 1.0, | |
| dpi: int = 150, | |
| figsize: tuple = (12, 6), | |
| **kwargs | |
| ): | |
| """ | |
| Configure visualisation parameters for matplotlib and seaborn | |
| Parameters: | |
| ----------- | |
| style : str | |
| Matplotlib style: 'seaborn-whitegrid', 'ggplot', 'bmh', 'dark_background' | |
| palette : str | |
| Seaborn palette: 'husl', 'Set2', 'viridis', 'mako' | |
| context : str | |
| Seaborn context: 'paper', 'notebook', 'talk', 'poster' | |
| font_scale : float | |
| Font scale | |
| dpi : int | |
| Plot resolution | |
| figsize : tuple | |
| Default figure size | |
| """ | |
| # Ignore warnings | |
| warnings.filterwarnings('ignore') | |
| # Matplotlib settings | |
| plt.style.use(style) | |
| # RC parameters | |
| rc_params = { | |
| 'font.size': 10, | |
| 'figure.figsize': figsize, | |
| 'figure.dpi': dpi, | |
| 'savefig.dpi': 300, | |
| 'savefig.bbox': 'tight', | |
| 'savefig.format': 'png', | |
| 'axes.titlesize': 12, | |
| 'axes.labelsize': 10, | |
| 'xtick.labelsize': 9, | |
| 'ytick.labelsize': 9, | |
| 'legend.fontsize': 9, | |
| 'font.family': ['DejaVu Sans', 'Arial', 'sans-serif'], | |
| 'figure.titlesize': 14, | |
| 'axes.grid': True, | |
| 'grid.alpha': 0.3, | |
| 'lines.linewidth': 1.5, | |
| 'lines.markersize': 6, | |
| 'patch.edgecolor': 'black', | |
| 'patch.force_edgecolor': True, | |
| 'xtick.top': False, | |
| 'ytick.right': False, | |
| 'axes.spines.top': False, | |
| 'axes.spines.right': False | |
| } | |
| # Update additional parameters | |
| rc_params.update(kwargs) | |
| plt.rcParams.update(rc_params) | |
| # Seaborn settings | |
| sns.set_style(style.replace('seaborn-', '')) | |
| sns.set_palette(palette) | |
| sns.set_context(context, font_scale=font_scale) | |
| print(f"✓ Visualisation settings applied: style={style}, palette={palette}") | |
| def get_color_palette(name: str = "husl", n_colors: int = 8) -> list: | |
| """ | |
| Get colour palette | |
| Parameters: | |
| ----------- | |
| name : str | |
| Palette name | |
| n_colors : int | |
| Number of colours | |
| Returns: | |
| -------- | |
| list | |
| List of colours in HEX format | |
| """ | |
| palette_map = { | |
| "husl": sns.color_palette("husl", n_colors), | |
| "Set2": sns.color_palette("Set2", n_colors), | |
| "Set3": sns.color_palette("Set3", n_colors), | |
| "viridis": sns.color_palette("viridis", n_colors), | |
| "plasma": sns.color_palette("plasma", n_colors), | |
| "coolwarm": sns.color_palette("coolwarm", n_colors), | |
| "RdYlBu": sns.color_palette("RdYlBu", n_colors), | |
| "Spectral": sns.color_palette("Spectral", n_colors), | |
| "tab10": sns.color_palette("tab10", n_colors), | |
| "tab20": sns.color_palette("tab20", n_colors), | |
| } | |
| palette = palette_map.get(name, sns.color_palette("husl", n_colors)) | |
| return [f"#{int(r*255):02x}{int(g*255):02x}{int(b*255):02x}" | |
| for r, g, b in palette] | |
| # ============================================================================ | |
| # CONSTANTS | |
| # ============================================================================ | |
| # Data types | |
| DATETIME_FORMATS = [ | |
| "%Y-%m-%d", "%Y/%m/%d", "%d.%m.%Y", "%d/%m/%Y", | |
| "%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S", | |
| "%d.%m.%Y %H:%M:%S", "%d/%m/%Y %H:%M:%S" | |
| ] | |
| # Metrics | |
| METRICS = { | |
| "regression": ["mse", "rmse", "mae", "mape", "r2", "explained_variance"], | |
| "classification": ["accuracy", "precision", "recall", "f1", "roc_auc"] | |
| } | |
| # Statistical constants | |
| STATS_CONSTANTS = { | |
| "confidence_levels": [0.9, 0.95, 0.99], | |
| "z_scores": {0.9: 1.645, 0.95: 1.96, 0.99: 2.576}, | |
| "outlier_multipliers": {"mild": 1.5, "extreme": 3.0} | |
| } | |
| # Time series parameters | |
| TIME_SERIES_CONSTANTS = { | |
| "frequencies": { | |
| "H": "hourly", | |
| "D": "daily", | |
| "W": "weekly", | |
| "M": "monthly", | |
| "Q": "quarterly", | |
| "Y": "yearly" | |
| }, | |
| "seasonal_periods": { | |
| "hourly": 24, | |
| "daily": 7, | |
| "weekly": 52, | |
| "monthly": 12, | |
| "quarterly": 4, | |
| "yearly": 1 | |
| } | |
| } | |
| # ============================================================================ | |
| # CONFIGURATION UTILITIES | |
| # ============================================================================ | |
| def load_config(config_path: Optional[str] = None) -> Dict[str, Any]: | |
| """ | |
| Load configuration from file | |
| Parameters: | |
| ----------- | |
| config_path : str, optional | |
| Path to configuration file | |
| Returns: | |
| -------- | |
| Dict[str, Any] | |
| Configuration dictionary | |
| """ | |
| if config_path is None: | |
| config_path = CONFIGS_DIR / "default_config.json" | |
| config_path = Path(config_path) | |
| if not config_path.exists(): | |
| print(f"⚠ Configuration file not found: {config_path}") | |
| return {} | |
| # Determine file format | |
| if config_path.suffix.lower() in ['.json']: | |
| with open(config_path, 'r', encoding='utf-8') as f: | |
| config = json.load(f) | |
| elif config_path.suffix.lower() in ['.yaml', '.yml']: | |
| with open(config_path, 'r', encoding='utf-8') as f: | |
| config = yaml.safe_load(f) | |
| else: | |
| raise ValueError(f"Unsupported file format: {config_path.suffix}") | |
| print(f"✓ Configuration loaded from: {config_path}") | |
| return config | |
| def save_config(config: Dict[str, Any], config_path: str) -> None: | |
| """ | |
| Save configuration to file | |
| Parameters: | |
| ----------- | |
| config : Dict[str, Any] | |
| Configuration to save | |
| config_path : str | |
| Save path | |
| """ | |
| config_path = Path(config_path) | |
| config_path.parent.mkdir(parents=True, exist_ok=True) | |
| # Determine format | |
| if config_path.suffix.lower() in ['.json']: | |
| with open(config_path, 'w', encoding='utf-8') as f: | |
| json.dump(config, f, indent=2, ensure_ascii=False) | |
| elif config_path.suffix.lower() in ['.yaml', '.yml']: | |
| with open(config_path, 'w', encoding='utf-8') as f: | |
| yaml.dump(config, f, default_flow_style=False, allow_unicode=True) | |
| else: | |
| raise ValueError(f"Unsupported file format: {config_path.suffix}") | |
| print(f"✓ Configuration saved to: {config_path}") | |
| def merge_configs(base_config: Dict[str, Any], | |
| override_config: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Recursive configuration merging | |
| Parameters: | |
| ----------- | |
| base_config : Dict[str, Any] | |
| Base configuration | |
| override_config : Dict[str, Any] | |
| Override configuration | |
| Returns: | |
| -------- | |
| Dict[str, Any] | |
| Merged configuration | |
| """ | |
| result = base_config.copy() | |
| for key, value in override_config.items(): | |
| if (key in result and isinstance(result[key], dict) | |
| and isinstance(value, dict)): | |
| result[key] = merge_configs(result[key], value) | |
| else: | |
| result[key] = value | |
| return result | |
| # ============================================================================ | |
| # ENVIRONMENT SETUP | |
| # ============================================================================ | |
| def setup_environment( | |
| log_level: str = "INFO", | |
| random_seed: int = 42, | |
| enable_warnings: bool = False, | |
| memory_limit_gb: Optional[int] = None | |
| ) -> None: | |
| """ | |
| Set up environment for reproducibility | |
| Parameters: | |
| ----------- | |
| log_level : str | |
| Logging level | |
| random_seed : int | |
| Seed for random generators | |
| enable_warnings : bool | |
| Enable warnings | |
| memory_limit_gb : int, optional | |
| Memory limit in GB | |
| """ | |
| import numpy as np | |
| import random | |
| import torch | |
| import tensorflow as tf | |
| # Set seeds | |
| np.random.seed(random_seed) | |
| random.seed(random_seed) | |
| try: | |
| torch.manual_seed(random_seed) | |
| except: | |
| pass | |
| try: | |
| tf.random.set_seed(random_seed) | |
| except: | |
| pass | |
| # Configure warnings | |
| if enable_warnings: | |
| warnings.filterwarnings('default') | |
| else: | |
| warnings.filterwarnings('ignore') | |
| # Memory limit (if specified) | |
| if memory_limit_gb: | |
| import resource | |
| soft, hard = resource.getrlimit(resource.RLIMIT_AS) | |
| memory_limit = memory_limit_gb * 1024**3 # GB to bytes | |
| resource.setrlimit(resource.RLIMIT_AS, (memory_limit, hard)) | |
| print(f"✓ Memory limit set: {memory_limit_gb} GB") | |
| print(f"✓ Environment configured. Random seed: {random_seed}") | |
| # ============================================================================ | |
| # AUTOMATIC SETUP ON IMPORT | |
| # ============================================================================ | |
| # Automatically apply visualisation settings | |
| setup_visualization() | |
| # Export useful variables | |
| __all__ = [ | |
| 'setup_visualization', | |
| 'get_color_palette', | |
| 'load_config', | |
| 'save_config', | |
| 'merge_configs', | |
| 'setup_environment', | |
| 'PROJECT_ROOT', | |
| 'DATA_DIR', | |
| 'RAW_DATA_DIR', | |
| 'PROCESSED_DATA_DIR', | |
| 'RESULTS_DIR', | |
| 'PLOTS_DIR', | |
| 'DATETIME_FORMATS', | |
| 'METRICS', | |
| 'STATS_CONSTANTS', | |
| 'TIME_SERIES_CONSTANTS' | |
| ] |