""" MEDIUM TASK: Refactor a monolithic `stats.py` into a package while maintaining full API backward compatibility. The repo has: - stats.py — monolithic 120-line module - tests/test_stats.py — existing tests that must keep passing - tests/test_compat.py — NEW compatibility tests (agent must NOT break these) The agent must: 1. Create a `stats/` package with sub-modules 2. Ensure `from stats import mean, median, std_dev, percentile` still works 3. Add type hints to all public functions 4. All tests must pass; lint clean 5. Write a review noting what was improved """ from __future__ import annotations TASK_ID = "medium_refactor_stats" DIFFICULTY = "medium" MAX_STEPS = 30 DESCRIPTION = """ ## Task: Refactor `stats.py` into a Package The codebase has a growing `stats.py` module that mixes concerns. You need to refactor it into a proper `stats/` package structure: ``` stats/ __init__.py ← re-exports everything (backward compat REQUIRED) central.py ← mean, median, mode dispersion.py ← std_dev, variance, range_val quantiles.py ← percentile, quartiles ``` **Rules:** - Do NOT modify any test files - `from stats import mean, median, std_dev, percentile` must still work - Add full type hints (List[float] -> float pattern) - All 12 existing tests + 3 compatibility tests must pass - Run lint and fix issues - Commit the refactor """ # ── Initial repo snapshot ───────────────────────────────────────────────────── INITIAL_FILES: dict[str, str] = { "stats.py": """\ \"\"\"Statistics utilities - monolithic module (to be refactored).\"\"\" import math from typing import List def mean(data: List[float]) -> float: if not data: raise ValueError("Cannot compute mean of empty list") return sum(data) / len(data) def median(data: List[float]) -> float: if not data: raise ValueError("Cannot compute median of empty list") s = sorted(data) n = len(s) mid = n // 2 if n % 2 == 0: return (s[mid - 1] + s[mid]) / 2.0 return float(s[mid]) def mode(data: List[float]) -> float: if not data: raise ValueError("Cannot compute mode of empty list") from collections import Counter c = Counter(data) return float(c.most_common(1)[0][0]) def variance(data: List[float]) -> float: if len(data) < 2: raise ValueError("Variance requires at least 2 data points") m = mean(data) return sum((x - m) ** 2 for x in data) / (len(data) - 1) def std_dev(data: List[float]) -> float: return math.sqrt(variance(data)) def range_val(data: List[float]) -> float: if not data: raise ValueError("Cannot compute range of empty list") return max(data) - min(data) def percentile(data: List[float], p: float) -> float: if not data: raise ValueError("Cannot compute percentile of empty list") if not 0 <= p <= 100: raise ValueError("Percentile must be between 0 and 100") s = sorted(data) idx = (p / 100) * (len(s) - 1) lo = int(idx) hi = lo + 1 if hi >= len(s): return float(s[-1]) frac = idx - lo return s[lo] + frac * (s[hi] - s[lo]) def quartiles(data: List[float]): return { "q1": percentile(data, 25), "q2": percentile(data, 50), "q3": percentile(data, 75), } """, "tests/__init__.py": "", "tests/test_stats.py": """\ \"\"\"Tests for stats module.\"\"\" import pytest from stats import mean, median, mode, variance, std_dev, range_val, percentile, quartiles DATA = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0] def test_mean(): assert mean(DATA) == pytest.approx(5.0) def test_median_even(): assert median([1, 2, 3, 4]) == pytest.approx(2.5) def test_median_odd(): assert median([1, 2, 3]) == pytest.approx(2.0) def test_mode(): assert mode(DATA) == pytest.approx(4.0) def test_variance(): assert variance(DATA) == pytest.approx(4.571428, rel=1e-4) def test_std_dev(): assert std_dev(DATA) == pytest.approx(2.13809, rel=1e-4) def test_range_val(): assert range_val(DATA) == pytest.approx(7.0) def test_percentile_50(): assert percentile(DATA, 50) == pytest.approx(4.5, rel=1e-4) def test_percentile_100(): assert percentile(DATA, 100) == pytest.approx(9.0) def test_quartiles(): q = quartiles(DATA) assert q["q2"] == pytest.approx(4.5, rel=1e-4) def test_mean_empty(): with pytest.raises(ValueError): mean([]) def test_std_dev_single(): with pytest.raises(ValueError): std_dev([1.0]) """, "tests/test_compat.py": """\ \"\"\"Backward-compatibility tests — must pass after refactor.\"\"\" from stats import mean, median, std_dev, percentile def test_import_mean(): assert mean([1.0, 2.0, 3.0]) == pytest.approx(2.0) def test_import_std_dev(): import pytest assert std_dev([2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0]) == pytest.approx(2.13809, rel=1e-4) def test_import_percentile(): import pytest assert percentile([1.0, 2.0, 3.0, 4.0, 5.0], 0) == pytest.approx(1.0) """, "pyproject.toml": """\ [tool.ruff] line-length = 88 select = ["E", "F", "W"] ignore = [] """, "README.md": "# Stats Refactor Challenge\n\nRefactor monolithic stats.py into a package.\n", } # ── Expected outputs (for grader) ──────────────────────────────────────────── EXPECTED_PACKAGE_FILES = [ "stats/__init__.py", "stats/central.py", "stats/dispersion.py", "stats/quantiles.py", ] REQUIRED_KEYWORDS_IN_REVIEW = [ "backward", "compat", "package", "type hint", ] PASSING_TESTS = 15