Spaces:
Sleeping
Sleeping
| """ | |
| MEDIUM TASK: Refactor a monolithic `stats.py` into a package | |
| while maintaining full API backward compatibility. | |
| The repo has: | |
| - stats.py β monolithic 120-line module | |
| - tests/test_stats.py β existing tests that must keep passing | |
| - tests/test_compat.py β NEW compatibility tests (agent must NOT break these) | |
| The agent must: | |
| 1. Create a `stats/` package with sub-modules | |
| 2. Ensure `from stats import mean, median, std_dev, percentile` still works | |
| 3. Add type hints to all public functions | |
| 4. All tests must pass; lint clean | |
| 5. Write a review noting what was improved | |
| """ | |
| from __future__ import annotations | |
| TASK_ID = "medium_refactor_stats" | |
| DIFFICULTY = "medium" | |
| MAX_STEPS = 30 | |
| DESCRIPTION = """ | |
| ## Task: Refactor `stats.py` into a Package | |
| The codebase has a growing `stats.py` module that mixes concerns. | |
| You need to refactor it into a proper `stats/` package structure: | |
| ``` | |
| stats/ | |
| __init__.py β re-exports everything (backward compat REQUIRED) | |
| central.py β mean, median, mode | |
| dispersion.py β std_dev, variance, range_val | |
| quantiles.py β percentile, quartiles | |
| ``` | |
| **Rules:** | |
| - Do NOT modify any test files | |
| - `from stats import mean, median, std_dev, percentile` must still work | |
| - Add full type hints (List[float] -> float pattern) | |
| - All 12 existing tests + 3 compatibility tests must pass | |
| - Run lint and fix issues | |
| - Commit the refactor | |
| """ | |
| # ββ Initial repo snapshot βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| INITIAL_FILES: dict[str, str] = { | |
| "stats.py": """\ | |
| \"\"\"Statistics utilities - monolithic module (to be refactored).\"\"\" | |
| import math | |
| from typing import List | |
| def mean(data: List[float]) -> float: | |
| if not data: | |
| raise ValueError("Cannot compute mean of empty list") | |
| return sum(data) / len(data) | |
| def median(data: List[float]) -> float: | |
| if not data: | |
| raise ValueError("Cannot compute median of empty list") | |
| s = sorted(data) | |
| n = len(s) | |
| mid = n // 2 | |
| if n % 2 == 0: | |
| return (s[mid - 1] + s[mid]) / 2.0 | |
| return float(s[mid]) | |
| def mode(data: List[float]) -> float: | |
| if not data: | |
| raise ValueError("Cannot compute mode of empty list") | |
| from collections import Counter | |
| c = Counter(data) | |
| return float(c.most_common(1)[0][0]) | |
| def variance(data: List[float]) -> float: | |
| if len(data) < 2: | |
| raise ValueError("Variance requires at least 2 data points") | |
| m = mean(data) | |
| return sum((x - m) ** 2 for x in data) / (len(data) - 1) | |
| def std_dev(data: List[float]) -> float: | |
| return math.sqrt(variance(data)) | |
| def range_val(data: List[float]) -> float: | |
| if not data: | |
| raise ValueError("Cannot compute range of empty list") | |
| return max(data) - min(data) | |
| def percentile(data: List[float], p: float) -> float: | |
| if not data: | |
| raise ValueError("Cannot compute percentile of empty list") | |
| if not 0 <= p <= 100: | |
| raise ValueError("Percentile must be between 0 and 100") | |
| s = sorted(data) | |
| idx = (p / 100) * (len(s) - 1) | |
| lo = int(idx) | |
| hi = lo + 1 | |
| if hi >= len(s): | |
| return float(s[-1]) | |
| frac = idx - lo | |
| return s[lo] + frac * (s[hi] - s[lo]) | |
| def quartiles(data: List[float]): | |
| return { | |
| "q1": percentile(data, 25), | |
| "q2": percentile(data, 50), | |
| "q3": percentile(data, 75), | |
| } | |
| """, | |
| "tests/__init__.py": "", | |
| "tests/test_stats.py": """\ | |
| \"\"\"Tests for stats module.\"\"\" | |
| import pytest | |
| from stats import mean, median, mode, variance, std_dev, range_val, percentile, quartiles | |
| DATA = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0] | |
| def test_mean(): | |
| assert mean(DATA) == pytest.approx(5.0) | |
| def test_median_even(): | |
| assert median([1, 2, 3, 4]) == pytest.approx(2.5) | |
| def test_median_odd(): | |
| assert median([1, 2, 3]) == pytest.approx(2.0) | |
| def test_mode(): | |
| assert mode(DATA) == pytest.approx(4.0) | |
| def test_variance(): | |
| assert variance(DATA) == pytest.approx(4.571428, rel=1e-4) | |
| def test_std_dev(): | |
| assert std_dev(DATA) == pytest.approx(2.13809, rel=1e-4) | |
| def test_range_val(): | |
| assert range_val(DATA) == pytest.approx(7.0) | |
| def test_percentile_50(): | |
| assert percentile(DATA, 50) == pytest.approx(4.5, rel=1e-4) | |
| def test_percentile_100(): | |
| assert percentile(DATA, 100) == pytest.approx(9.0) | |
| def test_quartiles(): | |
| q = quartiles(DATA) | |
| assert q["q2"] == pytest.approx(4.5, rel=1e-4) | |
| def test_mean_empty(): | |
| with pytest.raises(ValueError): | |
| mean([]) | |
| def test_std_dev_single(): | |
| with pytest.raises(ValueError): | |
| std_dev([1.0]) | |
| """, | |
| "tests/test_compat.py": """\ | |
| \"\"\"Backward-compatibility tests β must pass after refactor.\"\"\" | |
| from stats import mean, median, std_dev, percentile | |
| def test_import_mean(): | |
| assert mean([1.0, 2.0, 3.0]) == pytest.approx(2.0) | |
| def test_import_std_dev(): | |
| import pytest | |
| assert std_dev([2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0]) == pytest.approx(2.13809, rel=1e-4) | |
| def test_import_percentile(): | |
| import pytest | |
| assert percentile([1.0, 2.0, 3.0, 4.0, 5.0], 0) == pytest.approx(1.0) | |
| """, | |
| "pyproject.toml": """\ | |
| [tool.ruff] | |
| line-length = 88 | |
| select = ["E", "F", "W"] | |
| ignore = [] | |
| """, | |
| "README.md": "# Stats Refactor Challenge\n\nRefactor monolithic stats.py into a package.\n", | |
| } | |
| # ββ Expected outputs (for grader) ββββββββββββββββββββββββββββββββββββββββββββ | |
| EXPECTED_PACKAGE_FILES = [ | |
| "stats/__init__.py", | |
| "stats/central.py", | |
| "stats/dispersion.py", | |
| "stats/quantiles.py", | |
| ] | |
| REQUIRED_KEYWORDS_IN_REVIEW = [ | |
| "backward", | |
| "compat", | |
| "package", | |
| "type hint", | |
| ] | |
| PASSING_TESTS = 15 | |