"""Cube resolution logic for mapping indicator IDs to data cubes. The API uses a specific naming convention: - Data cubes: {thematique}_{maille} (e.g., conso_enaf_com, surface_bio_dpt) - Measures: {cube_name}.id_{indicator_id} (e.g., conso_enaf_com.id_611) - Geographic dimensions: geocode_{maille}, libelle_{maille} This module provides logic to find the correct cube for a given indicator and geographic level by parsing the /meta endpoint. """ from typing import Any from .models import MAILLE_SUFFIX_MAP, GEO_DIMENSION_PATTERNS, CubeInfo class CubeResolver: """Resolves indicator IDs to their corresponding data cubes. The resolver caches the /meta response and provides efficient lookup of cubes by indicator ID and geographic level. """ def __init__(self): """Initialize the resolver.""" # Cache of cube metadata from /meta self._cubes_meta: list[dict[str, Any]] = [] # Mapping: indicator_id -> {maille -> cube_name} self._indicator_cube_map: dict[int, dict[str, str]] = {} # Mapping: cube_name -> CubeInfo self._cube_info: dict[str, CubeInfo] = {} # Set of all indicator IDs found in cubes self._known_indicator_ids: set[int] = set() self._initialized = False @property def is_initialized(self) -> bool: """Check if the resolver has been initialized.""" return self._initialized def load_from_meta(self, meta_response: dict[str, Any]) -> None: """Load and parse cube metadata from /meta response. Args: meta_response: The response from /cubejs-api/v1/meta """ self._cubes_meta = meta_response.get("cubes", []) self._build_mappings() self._initialized = True def _build_mappings(self) -> None: """Build the internal mappings from cube metadata.""" self._indicator_cube_map.clear() self._cube_info.clear() self._known_indicator_ids.clear() for cube in self._cubes_meta: cube_name = cube.get("name", "") # Skip metadata cubes if cube_name in ("indicateur_metadata", "indicateur_x_source_metadata"): continue # Determine ALL available mailles from cube dimensions available_mailles = self._detect_all_mailles(cube) if not available_mailles: continue # Extract indicator IDs from measures indicator_ids = self._extract_indicator_ids(cube) if indicator_ids: # Store cube info (use finest maille as primary) finest_maille = available_mailles[0] # Already sorted finest-first self._cube_info[cube_name] = CubeInfo( name=cube_name, maille=finest_maille, indicator_ids=indicator_ids, ) # Build reverse mapping: indicator_id -> {maille -> cube_name} # Register cube for ALL available mailles for ind_id in indicator_ids: self._known_indicator_ids.add(ind_id) if ind_id not in self._indicator_cube_map: self._indicator_cube_map[ind_id] = {} for maille in available_mailles: # Only register if not already mapped (prefer finest cube) if maille not in self._indicator_cube_map[ind_id]: self._indicator_cube_map[ind_id][maille] = cube_name def _detect_all_mailles(self, cube: dict[str, Any]) -> list[str]: """Detect ALL available geographic levels (mailles) in a cube. Cubes like conso_enaf_com contain dimensions for all levels (commune, epci, departement, region) allowing queries at any level. Args: cube: Cube metadata from /meta Returns: List of available mailles, sorted from finest to coarsest (commune, epci, departement, region) """ dimensions = cube.get("dimensions", []) dim_names = [d.get("name", "") for d in dimensions] # Order of mailles from finest to coarsest maille_order = ["commune", "epci", "departement", "region"] available = [] for maille in maille_order: patterns = GEO_DIMENSION_PATTERNS.get(maille, {}) geocode_dim = patterns.get("geocode", "") # Dimension names are prefixed with cube name if any(geocode_dim in dim_name for dim_name in dim_names): available.append(maille) return available def _detect_maille(self, cube: dict[str, Any]) -> str | None: """Detect the finest geographic level (maille) of a cube. Args: cube: Cube metadata from /meta Returns: The finest maille name or None """ mailles = self._detect_all_mailles(cube) return mailles[0] if mailles else None def _extract_indicator_ids(self, cube: dict[str, Any]) -> list[int]: """Extract indicator IDs from cube measures. Measures follow the pattern: {cube_name}.id_{indicator_id} Args: cube: Cube metadata from /meta Returns: List of indicator IDs found in the cube's measures """ measures = cube.get("measures", []) indicator_ids = [] for measure in measures: measure_name = measure.get("name", "") # Look for .id_{number} pattern if ".id_" in measure_name: try: # Extract the ID after "id_" id_part = measure_name.split(".id_")[-1] # Handle potential additional suffixes id_str = id_part.split("_")[0].split(".")[0] indicator_id = int(id_str) indicator_ids.append(indicator_id) except (ValueError, IndexError): continue return indicator_ids def find_cube_for_indicator( self, indicator_id: int, maille: str, ) -> str | None: """Find the data cube for a given indicator and geographic level. Args: indicator_id: The indicator ID to look up maille: The geographic level ('commune', 'epci', 'departement', 'region') Returns: The cube name if found, None otherwise """ if not self._initialized: return None maille_lower = maille.lower() # Check direct mapping if indicator_id in self._indicator_cube_map: cube_map = self._indicator_cube_map[indicator_id] if maille_lower in cube_map: return cube_map[maille_lower] return None def get_measure_name(self, cube_name: str, indicator_id: int) -> str: """Get the full measure name for an indicator in a cube. Args: cube_name: The cube name indicator_id: The indicator ID Returns: The full measure name (e.g., 'conso_enaf_com.id_611') """ return f"{cube_name}.id_{indicator_id}" def get_dimension_name(self, cube_name: str, dimension: str) -> str: """Get the full dimension name for a cube. Args: cube_name: The cube name dimension: The dimension name (e.g., 'geocode_region') Returns: The full dimension name (e.g., 'conso_enaf_com.geocode_region') """ return f"{cube_name}.{dimension}" def get_available_mailles(self, indicator_id: int) -> list[str]: """Get the available geographic levels for an indicator. Args: indicator_id: The indicator ID Returns: List of available mailles """ if indicator_id not in self._indicator_cube_map: return [] return list(self._indicator_cube_map[indicator_id].keys()) def get_cube_info(self, cube_name: str) -> CubeInfo | None: """Get information about a cube. Args: cube_name: The cube name Returns: CubeInfo if found, None otherwise """ return self._cube_info.get(cube_name) def is_indicator_known(self, indicator_id: int) -> bool: """Check if an indicator ID exists in any cube. Args: indicator_id: The indicator ID to check Returns: True if the indicator exists in at least one cube """ return indicator_id in self._known_indicator_ids def list_all_cubes(self) -> list[CubeInfo]: """List all data cubes with their metadata. Returns: List of CubeInfo objects """ return list(self._cube_info.values()) def get_cubes_for_indicator(self, indicator_id: int) -> dict[str, str]: """Get all cubes containing a given indicator. Args: indicator_id: The indicator ID Returns: Dict mapping maille to cube_name """ return self._indicator_cube_map.get(indicator_id, {}).copy() # Singleton instance _resolver_instance: CubeResolver | None = None def get_resolver() -> CubeResolver: """Get or create the singleton CubeResolver instance. Returns: The shared CubeResolver instance """ global _resolver_instance if _resolver_instance is None: _resolver_instance = CubeResolver() return _resolver_instance