Spaces:

PrakashCider
/

teamforge

Sleeping

Your Name

fix: add FastAPI REST endpoints for OpenEnv validator

637f42c about 1 month ago

5.81 kB

	"""
	MEDIUM TASK: Refactor a monolithic `stats.py` into a package
	while maintaining full API backward compatibility.

	The repo has:
	- stats.py — monolithic 120-line module
	- tests/test_stats.py — existing tests that must keep passing
	- tests/test_compat.py — NEW compatibility tests (agent must NOT break these)

	The agent must:
	1. Create a `stats/` package with sub-modules
	2. Ensure `from stats import mean, median, std_dev, percentile` still works
	3. Add type hints to all public functions
	4. All tests must pass; lint clean
	5. Write a review noting what was improved
	"""

	from __future__ import annotations

	TASK_ID = "medium_refactor_stats"
	DIFFICULTY = "medium"
	MAX_STEPS = 30

	DESCRIPTION = """
	## Task: Refactor `stats.py` into a Package

	The codebase has a growing `stats.py` module that mixes concerns.
	You need to refactor it into a proper `stats/` package structure:

	```
	stats/
	__init__.py ← re-exports everything (backward compat REQUIRED)
	central.py ← mean, median, mode
	dispersion.py ← std_dev, variance, range_val
	quantiles.py ← percentile, quartiles
	```

	Rules:
	- Do NOT modify any test files
	- `from stats import mean, median, std_dev, percentile` must still work
	- Add full type hints (List[float] -> float pattern)
	- All 12 existing tests + 3 compatibility tests must pass
	- Run lint and fix issues
	- Commit the refactor
	"""

	# ── Initial repo snapshot ─────────────────────────────────────────────────────

	INITIAL_FILES: dict[str, str] = {
	"stats.py": """\
	\"\"\"Statistics utilities - monolithic module (to be refactored).\"\"\"
	import math
	from typing import List


	def mean(data: List[float]) -> float:
	if not data:
	raise ValueError("Cannot compute mean of empty list")
	return sum(data) / len(data)


	def median(data: List[float]) -> float:
	if not data:
	raise ValueError("Cannot compute median of empty list")
	s = sorted(data)
	n = len(s)
	mid = n // 2
	if n % 2 == 0:
	return (s[mid - 1] + s[mid]) / 2.0
	return float(s[mid])


	def mode(data: List[float]) -> float:
	if not data:
	raise ValueError("Cannot compute mode of empty list")
	from collections import Counter
	c = Counter(data)
	return float(c.most_common(1)[0][0])


	def variance(data: List[float]) -> float:
	if len(data) < 2:
	raise ValueError("Variance requires at least 2 data points")
	m = mean(data)
	return sum((x - m) ** 2 for x in data) / (len(data) - 1)


	def std_dev(data: List[float]) -> float:
	return math.sqrt(variance(data))


	def range_val(data: List[float]) -> float:
	if not data:
	raise ValueError("Cannot compute range of empty list")
	return max(data) - min(data)


	def percentile(data: List[float], p: float) -> float:
	if not data:
	raise ValueError("Cannot compute percentile of empty list")
	if not 0 <= p <= 100:
	raise ValueError("Percentile must be between 0 and 100")
	s = sorted(data)
	idx = (p / 100) * (len(s) - 1)
	lo = int(idx)
	hi = lo + 1
	if hi >= len(s):
	return float(s[-1])
	frac = idx - lo
	return s[lo] + frac * (s[hi] - s[lo])


	def quartiles(data: List[float]):
	return {
	"q1": percentile(data, 25),
	"q2": percentile(data, 50),
	"q3": percentile(data, 75),
	}
	""",
	"tests/__init__.py": "",
	"tests/test_stats.py": """\
	\"\"\"Tests for stats module.\"\"\"
	import pytest
	from stats import mean, median, mode, variance, std_dev, range_val, percentile, quartiles


	DATA = [2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0]


	def test_mean():
	assert mean(DATA) == pytest.approx(5.0)


	def test_median_even():
	assert median([1, 2, 3, 4]) == pytest.approx(2.5)


	def test_median_odd():
	assert median([1, 2, 3]) == pytest.approx(2.0)


	def test_mode():
	assert mode(DATA) == pytest.approx(4.0)


	def test_variance():
	assert variance(DATA) == pytest.approx(4.571428, rel=1e-4)


	def test_std_dev():
	assert std_dev(DATA) == pytest.approx(2.13809, rel=1e-4)


	def test_range_val():
	assert range_val(DATA) == pytest.approx(7.0)


	def test_percentile_50():
	assert percentile(DATA, 50) == pytest.approx(4.5, rel=1e-4)


	def test_percentile_100():
	assert percentile(DATA, 100) == pytest.approx(9.0)


	def test_quartiles():
	q = quartiles(DATA)
	assert q["q2"] == pytest.approx(4.5, rel=1e-4)


	def test_mean_empty():
	with pytest.raises(ValueError):
	mean([])


	def test_std_dev_single():
	with pytest.raises(ValueError):
	std_dev([1.0])
	""",
	"tests/test_compat.py": """\
	\"\"\"Backward-compatibility tests — must pass after refactor.\"\"\"
	from stats import mean, median, std_dev, percentile


	def test_import_mean():
	assert mean([1.0, 2.0, 3.0]) == pytest.approx(2.0)


	def test_import_std_dev():
	import pytest
	assert std_dev([2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0]) == pytest.approx(2.13809, rel=1e-4)


	def test_import_percentile():
	import pytest
	assert percentile([1.0, 2.0, 3.0, 4.0, 5.0], 0) == pytest.approx(1.0)
	""",
	"pyproject.toml": """\
	[tool.ruff]
	line-length = 88
	select = ["E", "F", "W"]
	ignore = []
	""",
	"README.md": "# Stats Refactor Challenge\n\nRefactor monolithic stats.py into a package.\n",
	}

	# ── Expected outputs (for grader) ────────────────────────────────────────────

	EXPECTED_PACKAGE_FILES = [
	"stats/__init__.py",
	"stats/central.py",
	"stats/dispersion.py",
	"stats/quantiles.py",
	]

	REQUIRED_KEYWORDS_IN_REVIEW = [
	"backward",
	"compat",
	"package",
	"type hint",
	]

	PASSING_TESTS = 15