teamforge / tasks /medium_task.py
Your Name
fix: add FastAPI REST endpoints for OpenEnv validator
637f42c
"""
MEDIUM TASK: Refactor a monolithic `stats.py` into a package
while maintaining full API backward compatibility.
The repo has:
- stats.py β€” monolithic 120-line module
- tests/test_stats.py β€” existing tests that must keep passing
- tests/test_compat.py β€” NEW compatibility tests (agent must NOT break these)
The agent must:
1. Create a `stats/` package with sub-modules
2. Ensure `from stats import mean, median, std_dev, percentile` still works
3. Add type hints to all public functions
4. All tests must pass; lint clean
5. Write a review noting what was improved
"""
from __future__ import annotations
TASK_ID = "medium_refactor_stats"
DIFFICULTY = "medium"
MAX_STEPS = 30
DESCRIPTION = """
## Task: Refactor `stats.py` into a Package
The codebase has a growing `stats.py` module that mixes concerns.
You need to refactor it into a proper `stats/` package structure:
```
stats/
__init__.py ← re-exports everything (backward compat REQUIRED)
central.py ← mean, median, mode
dispersion.py ← std_dev, variance, range_val
quantiles.py ← percentile, quartiles
```
**Rules:**
- Do NOT modify any test files
- `from stats import mean, median, std_dev, percentile` must still work
- Add full type hints (List[float] -> float pattern)
- All 12 existing tests + 3 compatibility tests must pass
- Run lint and fix issues
- Commit the refactor
"""
# ── Initial repo snapshot ─────────────────────────────────────────────────────
INITIAL_FILES: dict[str, str] = {
"stats.py": """\
\"\"\"Statistics utilities - monolithic module (to be refactored).\"\"\"
import math
from typing import List
def mean(data: List[float]) -> float:
if not data:
raise ValueError("Cannot compute mean of empty list")
return sum(data) / len(data)
def median(data: List[float]) -> float:
if not data:
raise ValueError("Cannot compute median of empty list")
s = sorted(data)
n = len(s)
mid = n // 2
if n % 2 == 0:
return (s[mid - 1] + s[mid]) / 2.0
return float(s[mid])
def mode(data: List[float]) -> float:
if not data:
raise ValueError("Cannot compute mode of empty list")
from collections import Counter
c = Counter(data)
return float(c.most_common(1)[0][0])
def variance(data: List[float]) -> float:
if len(data) < 2:
raise ValueError("Variance requires at least 2 data points")
m = mean(data)
return sum((x - m) ** 2 for x in data) / (len(data) - 1)
def std_dev(data: List[float]) -> float:
return math.sqrt(variance(data))
def range_val(data: List[float]) -> float:
if not data:
raise ValueError("Cannot compute range of empty list")
return max(data) - min(data)
def percentile(data: List[float], p: float) -> float:
if not data:
raise ValueError("Cannot compute percentile of empty list")
if not 0 <= p <= 100:
raise ValueError("Percentile must be between 0 and 100")
s = sorted(data)
idx = (p / 100) * (len(s) - 1)
lo = int(idx)
hi = lo + 1
if hi >= len(s):
return float(s[-1])
frac = idx - lo
return s[lo] + frac * (s[hi] - s[lo])
def quartiles(data: List[float]):
return {
"q1": percentile(data, 25),
"q2": percentile(data, 50),
"q3": percentile(data, 75),
}
""",
"tests/__init__.py": "",
"tests/test_stats.py": """\
\"\"\"Tests for stats module.\"\"\"
import pytest
from stats import mean, median, mode, variance, std_dev, range_val, percentile, quartiles
DATA = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0]
def test_mean():
assert mean(DATA) == pytest.approx(5.0)
def test_median_even():
assert median([1, 2, 3, 4]) == pytest.approx(2.5)
def test_median_odd():
assert median([1, 2, 3]) == pytest.approx(2.0)
def test_mode():
assert mode(DATA) == pytest.approx(4.0)
def test_variance():
assert variance(DATA) == pytest.approx(4.571428, rel=1e-4)
def test_std_dev():
assert std_dev(DATA) == pytest.approx(2.13809, rel=1e-4)
def test_range_val():
assert range_val(DATA) == pytest.approx(7.0)
def test_percentile_50():
assert percentile(DATA, 50) == pytest.approx(4.5, rel=1e-4)
def test_percentile_100():
assert percentile(DATA, 100) == pytest.approx(9.0)
def test_quartiles():
q = quartiles(DATA)
assert q["q2"] == pytest.approx(4.5, rel=1e-4)
def test_mean_empty():
with pytest.raises(ValueError):
mean([])
def test_std_dev_single():
with pytest.raises(ValueError):
std_dev([1.0])
""",
"tests/test_compat.py": """\
\"\"\"Backward-compatibility tests β€” must pass after refactor.\"\"\"
from stats import mean, median, std_dev, percentile
def test_import_mean():
assert mean([1.0, 2.0, 3.0]) == pytest.approx(2.0)
def test_import_std_dev():
import pytest
assert std_dev([2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0]) == pytest.approx(2.13809, rel=1e-4)
def test_import_percentile():
import pytest
assert percentile([1.0, 2.0, 3.0, 4.0, 5.0], 0) == pytest.approx(1.0)
""",
"pyproject.toml": """\
[tool.ruff]
line-length = 88
select = ["E", "F", "W"]
ignore = []
""",
"README.md": "# Stats Refactor Challenge\n\nRefactor monolithic stats.py into a package.\n",
}
# ── Expected outputs (for grader) ────────────────────────────────────────────
EXPECTED_PACKAGE_FILES = [
"stats/__init__.py",
"stats/central.py",
"stats/dispersion.py",
"stats/quantiles.py",
]
REQUIRED_KEYWORDS_IN_REVIEW = [
"backward",
"compat",
"package",
"type hint",
]
PASSING_TESTS = 15