TimeFlowPro / config /settings.py
ArabovMK's picture
Update all files
d8f69a9
"""
General project settings: visualisation, paths, constants
"""
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from typing import Dict, Any, Optional
import yaml
import json
import os
# ============================================================================
# PATHS AND DIRECTORIES
# ============================================================================
PROJECT_ROOT = Path(__file__).parent.parent.parent
DATA_DIR = PROJECT_ROOT / "data"
RAW_DATA_DIR = DATA_DIR / "raw"
PROCESSED_DATA_DIR = DATA_DIR / "processed"
EXTERNAL_DATA_DIR = DATA_DIR / "external"
RESULTS_DIR = PROJECT_ROOT / "results"
PLOTS_DIR = RESULTS_DIR / "plots"
MODELS_DIR = RESULTS_DIR / "models"
REPORTS_DIR = RESULTS_DIR / "reports"
LOGS_DIR = RESULTS_DIR / "logs"
CONFIGS_DIR = PROJECT_ROOT / "configs"
NOTEBOOKS_DIR = PROJECT_ROOT / "notebooks"
TESTS_DIR = PROJECT_ROOT / "tests"
# Create directories on import
for directory in [RAW_DATA_DIR, PROCESSED_DATA_DIR, EXTERNAL_DATA_DIR,
PLOTS_DIR, MODELS_DIR, REPORTS_DIR, LOGS_DIR]:
directory.mkdir(parents=True, exist_ok=True)
# ============================================================================
# VISUALISATION SETTINGS
# ============================================================================
def setup_visualization(
style: str = "seaborn-whitegrid",
palette: str = "husl",
context: str = "notebook",
font_scale: float = 1.0,
dpi: int = 150,
figsize: tuple = (12, 6),
**kwargs
):
"""
Configure visualisation parameters for matplotlib and seaborn
Parameters:
-----------
style : str
Matplotlib style: 'seaborn-whitegrid', 'ggplot', 'bmh', 'dark_background'
palette : str
Seaborn palette: 'husl', 'Set2', 'viridis', 'mako'
context : str
Seaborn context: 'paper', 'notebook', 'talk', 'poster'
font_scale : float
Font scale
dpi : int
Plot resolution
figsize : tuple
Default figure size
"""
# Ignore warnings
warnings.filterwarnings('ignore')
# Matplotlib settings
plt.style.use(style)
# RC parameters
rc_params = {
'font.size': 10,
'figure.figsize': figsize,
'figure.dpi': dpi,
'savefig.dpi': 300,
'savefig.bbox': 'tight',
'savefig.format': 'png',
'axes.titlesize': 12,
'axes.labelsize': 10,
'xtick.labelsize': 9,
'ytick.labelsize': 9,
'legend.fontsize': 9,
'font.family': ['DejaVu Sans', 'Arial', 'sans-serif'],
'figure.titlesize': 14,
'axes.grid': True,
'grid.alpha': 0.3,
'lines.linewidth': 1.5,
'lines.markersize': 6,
'patch.edgecolor': 'black',
'patch.force_edgecolor': True,
'xtick.top': False,
'ytick.right': False,
'axes.spines.top': False,
'axes.spines.right': False
}
# Update additional parameters
rc_params.update(kwargs)
plt.rcParams.update(rc_params)
# Seaborn settings
sns.set_style(style.replace('seaborn-', ''))
sns.set_palette(palette)
sns.set_context(context, font_scale=font_scale)
print(f"✓ Visualisation settings applied: style={style}, palette={palette}")
def get_color_palette(name: str = "husl", n_colors: int = 8) -> list:
"""
Get colour palette
Parameters:
-----------
name : str
Palette name
n_colors : int
Number of colours
Returns:
--------
list
List of colours in HEX format
"""
palette_map = {
"husl": sns.color_palette("husl", n_colors),
"Set2": sns.color_palette("Set2", n_colors),
"Set3": sns.color_palette("Set3", n_colors),
"viridis": sns.color_palette("viridis", n_colors),
"plasma": sns.color_palette("plasma", n_colors),
"coolwarm": sns.color_palette("coolwarm", n_colors),
"RdYlBu": sns.color_palette("RdYlBu", n_colors),
"Spectral": sns.color_palette("Spectral", n_colors),
"tab10": sns.color_palette("tab10", n_colors),
"tab20": sns.color_palette("tab20", n_colors),
}
palette = palette_map.get(name, sns.color_palette("husl", n_colors))
return [f"#{int(r*255):02x}{int(g*255):02x}{int(b*255):02x}"
for r, g, b in palette]
# ============================================================================
# CONSTANTS
# ============================================================================
# Data types
DATETIME_FORMATS = [
"%Y-%m-%d", "%Y/%m/%d", "%d.%m.%Y", "%d/%m/%Y",
"%Y-%m-%d %H:%M:%S", "%Y/%m/%d %H:%M:%S",
"%d.%m.%Y %H:%M:%S", "%d/%m/%Y %H:%M:%S"
]
# Metrics
METRICS = {
"regression": ["mse", "rmse", "mae", "mape", "r2", "explained_variance"],
"classification": ["accuracy", "precision", "recall", "f1", "roc_auc"]
}
# Statistical constants
STATS_CONSTANTS = {
"confidence_levels": [0.9, 0.95, 0.99],
"z_scores": {0.9: 1.645, 0.95: 1.96, 0.99: 2.576},
"outlier_multipliers": {"mild": 1.5, "extreme": 3.0}
}
# Time series parameters
TIME_SERIES_CONSTANTS = {
"frequencies": {
"H": "hourly",
"D": "daily",
"W": "weekly",
"M": "monthly",
"Q": "quarterly",
"Y": "yearly"
},
"seasonal_periods": {
"hourly": 24,
"daily": 7,
"weekly": 52,
"monthly": 12,
"quarterly": 4,
"yearly": 1
}
}
# ============================================================================
# CONFIGURATION UTILITIES
# ============================================================================
def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
"""
Load configuration from file
Parameters:
-----------
config_path : str, optional
Path to configuration file
Returns:
--------
Dict[str, Any]
Configuration dictionary
"""
if config_path is None:
config_path = CONFIGS_DIR / "default_config.json"
config_path = Path(config_path)
if not config_path.exists():
print(f"⚠ Configuration file not found: {config_path}")
return {}
# Determine file format
if config_path.suffix.lower() in ['.json']:
with open(config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
elif config_path.suffix.lower() in ['.yaml', '.yml']:
with open(config_path, 'r', encoding='utf-8') as f:
config = yaml.safe_load(f)
else:
raise ValueError(f"Unsupported file format: {config_path.suffix}")
print(f"✓ Configuration loaded from: {config_path}")
return config
def save_config(config: Dict[str, Any], config_path: str) -> None:
"""
Save configuration to file
Parameters:
-----------
config : Dict[str, Any]
Configuration to save
config_path : str
Save path
"""
config_path = Path(config_path)
config_path.parent.mkdir(parents=True, exist_ok=True)
# Determine format
if config_path.suffix.lower() in ['.json']:
with open(config_path, 'w', encoding='utf-8') as f:
json.dump(config, f, indent=2, ensure_ascii=False)
elif config_path.suffix.lower() in ['.yaml', '.yml']:
with open(config_path, 'w', encoding='utf-8') as f:
yaml.dump(config, f, default_flow_style=False, allow_unicode=True)
else:
raise ValueError(f"Unsupported file format: {config_path.suffix}")
print(f"✓ Configuration saved to: {config_path}")
def merge_configs(base_config: Dict[str, Any],
override_config: Dict[str, Any]) -> Dict[str, Any]:
"""
Recursive configuration merging
Parameters:
-----------
base_config : Dict[str, Any]
Base configuration
override_config : Dict[str, Any]
Override configuration
Returns:
--------
Dict[str, Any]
Merged configuration
"""
result = base_config.copy()
for key, value in override_config.items():
if (key in result and isinstance(result[key], dict)
and isinstance(value, dict)):
result[key] = merge_configs(result[key], value)
else:
result[key] = value
return result
# ============================================================================
# ENVIRONMENT SETUP
# ============================================================================
def setup_environment(
log_level: str = "INFO",
random_seed: int = 42,
enable_warnings: bool = False,
memory_limit_gb: Optional[int] = None
) -> None:
"""
Set up environment for reproducibility
Parameters:
-----------
log_level : str
Logging level
random_seed : int
Seed for random generators
enable_warnings : bool
Enable warnings
memory_limit_gb : int, optional
Memory limit in GB
"""
import numpy as np
import random
import torch
import tensorflow as tf
# Set seeds
np.random.seed(random_seed)
random.seed(random_seed)
try:
torch.manual_seed(random_seed)
except:
pass
try:
tf.random.set_seed(random_seed)
except:
pass
# Configure warnings
if enable_warnings:
warnings.filterwarnings('default')
else:
warnings.filterwarnings('ignore')
# Memory limit (if specified)
if memory_limit_gb:
import resource
soft, hard = resource.getrlimit(resource.RLIMIT_AS)
memory_limit = memory_limit_gb * 1024**3 # GB to bytes
resource.setrlimit(resource.RLIMIT_AS, (memory_limit, hard))
print(f"✓ Memory limit set: {memory_limit_gb} GB")
print(f"✓ Environment configured. Random seed: {random_seed}")
# ============================================================================
# AUTOMATIC SETUP ON IMPORT
# ============================================================================
# Automatically apply visualisation settings
setup_visualization()
# Export useful variables
__all__ = [
'setup_visualization',
'get_color_palette',
'load_config',
'save_config',
'merge_configs',
'setup_environment',
'PROJECT_ROOT',
'DATA_DIR',
'RAW_DATA_DIR',
'PROCESSED_DATA_DIR',
'RESULTS_DIR',
'PLOTS_DIR',
'DATETIME_FORMATS',
'METRICS',
'STATS_CONSTANTS',
'TIME_SERIES_CONSTANTS'
]