File size: 9,958 Bytes
bad6218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
"""Cube resolution logic for mapping indicator IDs to data cubes.

The API uses a specific naming convention:
- Data cubes: {thematique}_{maille} (e.g., conso_enaf_com, surface_bio_dpt)
- Measures: {cube_name}.id_{indicator_id} (e.g., conso_enaf_com.id_611)
- Geographic dimensions: geocode_{maille}, libelle_{maille}

This module provides logic to find the correct cube for a given indicator
and geographic level by parsing the /meta endpoint.
"""

from typing import Any

from .models import MAILLE_SUFFIX_MAP, GEO_DIMENSION_PATTERNS, CubeInfo


class CubeResolver:
    """Resolves indicator IDs to their corresponding data cubes.
    
    The resolver caches the /meta response and provides efficient lookup
    of cubes by indicator ID and geographic level.
    """

    def __init__(self):
        """Initialize the resolver."""
        # Cache of cube metadata from /meta
        self._cubes_meta: list[dict[str, Any]] = []
        
        # Mapping: indicator_id -> {maille -> cube_name}
        self._indicator_cube_map: dict[int, dict[str, str]] = {}
        
        # Mapping: cube_name -> CubeInfo
        self._cube_info: dict[str, CubeInfo] = {}
        
        # Set of all indicator IDs found in cubes
        self._known_indicator_ids: set[int] = set()
        
        self._initialized = False

    @property
    def is_initialized(self) -> bool:
        """Check if the resolver has been initialized."""
        return self._initialized

    def load_from_meta(self, meta_response: dict[str, Any]) -> None:
        """Load and parse cube metadata from /meta response.
        
        Args:
            meta_response: The response from /cubejs-api/v1/meta
        """
        self._cubes_meta = meta_response.get("cubes", [])
        self._build_mappings()
        self._initialized = True

    def _build_mappings(self) -> None:
        """Build the internal mappings from cube metadata."""
        self._indicator_cube_map.clear()
        self._cube_info.clear()
        self._known_indicator_ids.clear()
        
        for cube in self._cubes_meta:
            cube_name = cube.get("name", "")
            
            # Skip metadata cubes
            if cube_name in ("indicateur_metadata", "indicateur_x_source_metadata"):
                continue
            
            # Determine ALL available mailles from cube dimensions
            available_mailles = self._detect_all_mailles(cube)
            if not available_mailles:
                continue
            
            # Extract indicator IDs from measures
            indicator_ids = self._extract_indicator_ids(cube)
            
            if indicator_ids:
                # Store cube info (use finest maille as primary)
                finest_maille = available_mailles[0]  # Already sorted finest-first
                self._cube_info[cube_name] = CubeInfo(
                    name=cube_name,
                    maille=finest_maille,
                    indicator_ids=indicator_ids,
                )
                
                # Build reverse mapping: indicator_id -> {maille -> cube_name}
                # Register cube for ALL available mailles
                for ind_id in indicator_ids:
                    self._known_indicator_ids.add(ind_id)
                    if ind_id not in self._indicator_cube_map:
                        self._indicator_cube_map[ind_id] = {}
                    for maille in available_mailles:
                        # Only register if not already mapped (prefer finest cube)
                        if maille not in self._indicator_cube_map[ind_id]:
                            self._indicator_cube_map[ind_id][maille] = cube_name

    def _detect_all_mailles(self, cube: dict[str, Any]) -> list[str]:
        """Detect ALL available geographic levels (mailles) in a cube.
        
        Cubes like conso_enaf_com contain dimensions for all levels
        (commune, epci, departement, region) allowing queries at any level.
        
        Args:
            cube: Cube metadata from /meta
            
        Returns:
            List of available mailles, sorted from finest to coarsest
            (commune, epci, departement, region)
        """
        dimensions = cube.get("dimensions", [])
        dim_names = [d.get("name", "") for d in dimensions]
        
        # Order of mailles from finest to coarsest
        maille_order = ["commune", "epci", "departement", "region"]
        available = []
        
        for maille in maille_order:
            patterns = GEO_DIMENSION_PATTERNS.get(maille, {})
            geocode_dim = patterns.get("geocode", "")
            # Dimension names are prefixed with cube name
            if any(geocode_dim in dim_name for dim_name in dim_names):
                available.append(maille)
        
        return available

    def _detect_maille(self, cube: dict[str, Any]) -> str | None:
        """Detect the finest geographic level (maille) of a cube.
        
        Args:
            cube: Cube metadata from /meta
            
        Returns:
            The finest maille name or None
        """
        mailles = self._detect_all_mailles(cube)
        return mailles[0] if mailles else None

    def _extract_indicator_ids(self, cube: dict[str, Any]) -> list[int]:
        """Extract indicator IDs from cube measures.
        
        Measures follow the pattern: {cube_name}.id_{indicator_id}
        
        Args:
            cube: Cube metadata from /meta
            
        Returns:
            List of indicator IDs found in the cube's measures
        """
        measures = cube.get("measures", [])
        indicator_ids = []
        
        for measure in measures:
            measure_name = measure.get("name", "")
            # Look for .id_{number} pattern
            if ".id_" in measure_name:
                try:
                    # Extract the ID after "id_"
                    id_part = measure_name.split(".id_")[-1]
                    # Handle potential additional suffixes
                    id_str = id_part.split("_")[0].split(".")[0]
                    indicator_id = int(id_str)
                    indicator_ids.append(indicator_id)
                except (ValueError, IndexError):
                    continue
        
        return indicator_ids

    def find_cube_for_indicator(
        self,
        indicator_id: int,
        maille: str,
    ) -> str | None:
        """Find the data cube for a given indicator and geographic level.
        
        Args:
            indicator_id: The indicator ID to look up
            maille: The geographic level ('commune', 'epci', 'departement', 'region')
            
        Returns:
            The cube name if found, None otherwise
        """
        if not self._initialized:
            return None
        
        maille_lower = maille.lower()
        
        # Check direct mapping
        if indicator_id in self._indicator_cube_map:
            cube_map = self._indicator_cube_map[indicator_id]
            if maille_lower in cube_map:
                return cube_map[maille_lower]
        
        return None

    def get_measure_name(self, cube_name: str, indicator_id: int) -> str:
        """Get the full measure name for an indicator in a cube.
        
        Args:
            cube_name: The cube name
            indicator_id: The indicator ID
            
        Returns:
            The full measure name (e.g., 'conso_enaf_com.id_611')
        """
        return f"{cube_name}.id_{indicator_id}"

    def get_dimension_name(self, cube_name: str, dimension: str) -> str:
        """Get the full dimension name for a cube.
        
        Args:
            cube_name: The cube name
            dimension: The dimension name (e.g., 'geocode_region')
            
        Returns:
            The full dimension name (e.g., 'conso_enaf_com.geocode_region')
        """
        return f"{cube_name}.{dimension}"

    def get_available_mailles(self, indicator_id: int) -> list[str]:
        """Get the available geographic levels for an indicator.
        
        Args:
            indicator_id: The indicator ID
            
        Returns:
            List of available mailles
        """
        if indicator_id not in self._indicator_cube_map:
            return []
        return list(self._indicator_cube_map[indicator_id].keys())

    def get_cube_info(self, cube_name: str) -> CubeInfo | None:
        """Get information about a cube.
        
        Args:
            cube_name: The cube name
            
        Returns:
            CubeInfo if found, None otherwise
        """
        return self._cube_info.get(cube_name)

    def is_indicator_known(self, indicator_id: int) -> bool:
        """Check if an indicator ID exists in any cube.
        
        Args:
            indicator_id: The indicator ID to check
            
        Returns:
            True if the indicator exists in at least one cube
        """
        return indicator_id in self._known_indicator_ids

    def list_all_cubes(self) -> list[CubeInfo]:
        """List all data cubes with their metadata.
        
        Returns:
            List of CubeInfo objects
        """
        return list(self._cube_info.values())

    def get_cubes_for_indicator(self, indicator_id: int) -> dict[str, str]:
        """Get all cubes containing a given indicator.
        
        Args:
            indicator_id: The indicator ID
            
        Returns:
            Dict mapping maille to cube_name
        """
        return self._indicator_cube_map.get(indicator_id, {}).copy()


# Singleton instance
_resolver_instance: CubeResolver | None = None


def get_resolver() -> CubeResolver:
    """Get or create the singleton CubeResolver instance.
    
    Returns:
        The shared CubeResolver instance
    """
    global _resolver_instance
    if _resolver_instance is None:
        _resolver_instance = CubeResolver()
    return _resolver_instance