"""
Compute price/m2 breakdown for the top 10 French cities.

Responsibility: Produce a clean table of time-weighted median price per m2
by property type for the largest cities.
"""

import json
import logging
from pathlib import Path

import polars as pl

from src.aggregator import _aggregate_group
from src.config import AGGREGATED_DIR, TOP_10_CITIES, TYPE_LOCAL_SHORT

logger = logging.getLogger(__name__)


def compute_top_cities(df: pl.DataFrame) -> dict[str, dict]:
    """
    Compute price statistics for top 10 cities, broken down by property type.

    Uses code_commune_city (with arrondissements mapped to parent city)
    to correctly aggregate Paris, Lyon, and Marseille.

    Args:
        df: Collected DataFrame with code_commune_city, prix_m2, temporal_weight.

    Returns:
        Nested dict: {"Paris": {"code": "75056", "tous": {...}, ...}, ...}
    """
    city_codes = list(TOP_10_CITIES.keys())
    city_data = df.filter(pl.col("code_commune_city").is_in(city_codes))

    result: dict[str, dict] = {}

    for city_code, city_name in TOP_10_CITIES.items():
        city_df = city_data.filter(pl.col("code_commune_city") == city_code)

        if len(city_df) == 0:
            logger.warning("No data for %s (%s)", city_name, city_code)
            continue

        entry: dict = {"code": city_code}

        # All residential combined
        entry["tous"] = _aggregate_group(city_df)

        # Per property type
        for full_name, short_name in TYPE_LOCAL_SHORT.items():
            type_df = city_df.filter(pl.col("type_local") == full_name)
            if len(type_df) > 0:
                entry[short_name] = _aggregate_group(type_df)

        result[city_name] = entry

    return result


def export_top_cities(data: dict[str, dict], output_dir: Path | None = None) -> None:
    """
    Export top cities data to JSON.
    """
    output_dir = output_dir or AGGREGATED_DIR
    output_dir.mkdir(parents=True, exist_ok=True)

    path = output_dir / "top_cities.json"
    with open(path, "w") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    logger.info("Exported: %s (%d cities)", path.name, len(data))