"""
MEDIUM TASK: Refactor a monolithic `stats.py` into a package
while maintaining full API backward compatibility.

The repo has:
  - stats.py              — monolithic 120-line module
  - tests/test_stats.py   — existing tests that must keep passing
  - tests/test_compat.py  — NEW compatibility tests (agent must NOT break these)

The agent must:
1. Create a `stats/` package with sub-modules
2. Ensure `from stats import mean, median, std_dev, percentile` still works
3. Add type hints to all public functions
4. All tests must pass; lint clean
5. Write a review noting what was improved
"""

from __future__ import annotations

TASK_ID = "medium_refactor_stats"
DIFFICULTY = "medium"
MAX_STEPS = 30

DESCRIPTION = """
## Task: Refactor `stats.py` into a Package

The codebase has a growing `stats.py` module that mixes concerns.
You need to refactor it into a proper `stats/` package structure:

```
stats/
  __init__.py      ← re-exports everything (backward compat REQUIRED)
  central.py       ← mean, median, mode
  dispersion.py    ← std_dev, variance, range_val
  quantiles.py     ← percentile, quartiles
```

**Rules:**
- Do NOT modify any test files
- `from stats import mean, median, std_dev, percentile` must still work
- Add full type hints (List[float] -> float pattern)
- All 12 existing tests + 3 compatibility tests must pass
- Run lint and fix issues
- Commit the refactor
"""

# ── Initial repo snapshot ─────────────────────────────────────────────────────

INITIAL_FILES: dict[str, str] = {
    "stats.py": """\
\"\"\"Statistics utilities - monolithic module (to be refactored).\"\"\"
import math
from typing import List


def mean(data: List[float]) -> float:
    if not data:
        raise ValueError("Cannot compute mean of empty list")
    return sum(data) / len(data)


def median(data: List[float]) -> float:
    if not data:
        raise ValueError("Cannot compute median of empty list")
    s = sorted(data)
    n = len(s)
    mid = n // 2
    if n % 2 == 0:
        return (s[mid - 1] + s[mid]) / 2.0
    return float(s[mid])


def mode(data: List[float]) -> float:
    if not data:
        raise ValueError("Cannot compute mode of empty list")
    from collections import Counter
    c = Counter(data)
    return float(c.most_common(1)[0][0])


def variance(data: List[float]) -> float:
    if len(data) < 2:
        raise ValueError("Variance requires at least 2 data points")
    m = mean(data)
    return sum((x - m) ** 2 for x in data) / (len(data) - 1)


def std_dev(data: List[float]) -> float:
    return math.sqrt(variance(data))


def range_val(data: List[float]) -> float:
    if not data:
        raise ValueError("Cannot compute range of empty list")
    return max(data) - min(data)


def percentile(data: List[float], p: float) -> float:
    if not data:
        raise ValueError("Cannot compute percentile of empty list")
    if not 0 <= p <= 100:
        raise ValueError("Percentile must be between 0 and 100")
    s = sorted(data)
    idx = (p / 100) * (len(s) - 1)
    lo = int(idx)
    hi = lo + 1
    if hi >= len(s):
        return float(s[-1])
    frac = idx - lo
    return s[lo] + frac * (s[hi] - s[lo])


def quartiles(data: List[float]):
    return {
        "q1": percentile(data, 25),
        "q2": percentile(data, 50),
        "q3": percentile(data, 75),
    }
""",
    "tests/__init__.py": "",
    "tests/test_stats.py": """\
\"\"\"Tests for stats module.\"\"\"
import pytest
from stats import mean, median, mode, variance, std_dev, range_val, percentile, quartiles


DATA = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0]


def test_mean():
    assert mean(DATA) == pytest.approx(5.0)


def test_median_even():
    assert median([1, 2, 3, 4]) == pytest.approx(2.5)


def test_median_odd():
    assert median([1, 2, 3]) == pytest.approx(2.0)


def test_mode():
    assert mode(DATA) == pytest.approx(4.0)


def test_variance():
    assert variance(DATA) == pytest.approx(4.571428, rel=1e-4)


def test_std_dev():
    assert std_dev(DATA) == pytest.approx(2.13809, rel=1e-4)


def test_range_val():
    assert range_val(DATA) == pytest.approx(7.0)


def test_percentile_50():
    assert percentile(DATA, 50) == pytest.approx(4.5, rel=1e-4)


def test_percentile_100():
    assert percentile(DATA, 100) == pytest.approx(9.0)


def test_quartiles():
    q = quartiles(DATA)
    assert q["q2"] == pytest.approx(4.5, rel=1e-4)


def test_mean_empty():
    with pytest.raises(ValueError):
        mean([])


def test_std_dev_single():
    with pytest.raises(ValueError):
        std_dev([1.0])
""",
    "tests/test_compat.py": """\
\"\"\"Backward-compatibility tests — must pass after refactor.\"\"\"
from stats import mean, median, std_dev, percentile


def test_import_mean():
    assert mean([1.0, 2.0, 3.0]) == pytest.approx(2.0)


def test_import_std_dev():
    import pytest
    assert std_dev([2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0]) == pytest.approx(2.13809, rel=1e-4)


def test_import_percentile():
    import pytest
    assert percentile([1.0, 2.0, 3.0, 4.0, 5.0], 0) == pytest.approx(1.0)
""",
    "pyproject.toml": """\
[tool.ruff]
line-length = 88
select = ["E", "F", "W"]
ignore = []
""",
    "README.md": "# Stats Refactor Challenge\n\nRefactor monolithic stats.py into a package.\n",
}

# ── Expected outputs (for grader) ────────────────────────────────────────────

EXPECTED_PACKAGE_FILES = [
    "stats/__init__.py",
    "stats/central.py",
    "stats/dispersion.py",
    "stats/quantiles.py",
]

REQUIRED_KEYWORDS_IN_REVIEW = [
    "backward",
    "compat",
    "package",
    "type hint",
]

PASSING_TESTS = 15