from __future__ import annotations import subprocess from lean_migrate.env import grader from lean_migrate.env.grader import ( build_lean_sample_checks, clamp_open_unit, run_candidate_tests, ) from lean_migrate.env.tasks import get_task, list_tasks def test_list_tasks() -> None: tasks = list_tasks() assert {task["task_id"] for task in tasks} == { "rbac_auth", "pricing_engine", "payment_saga", "path_canonicalizer", "expression_eval", "lru_cache", "shortest_path", "interval_scheduler", } assert all(task["num_functions"] > 0 for task in tasks) def test_sample_checks_for_rbac_find_role() -> None: task = get_task("rbac_auth") function_spec = task.get_function("findRole") assert function_spec is not None checks = build_lean_sample_checks(task, function_spec) assert len(checks) == 10 assert all("example :" in check for check in checks) def test_proof_only_function_skips_sample_checks() -> None: task = get_task("payment_saga") function_spec = task.get_function("no_double_charge_proof") assert function_spec is not None checks = build_lean_sample_checks(task, function_spec) assert checks == [] def test_run_tests_for_python_candidate() -> None: task = get_task("pricing_engine") function_spec = task.get_function("subtotal") assert function_spec is not None result = run_candidate_tests( task, function_spec, """ def subtotal(order): return sum(item["unitPrice"] * item["quantity"] for item in order["items"]) """.strip(), ) assert result.passed assert result.tests_passed == 10 assert result.tests_total == 10 assert result.case_results is not None assert len(result.case_results) == 10 assert "10/10" in result.feedback def test_run_tests_reports_failures() -> None: task = get_task("pricing_engine") function_spec = task.get_function("subtotal") assert function_spec is not None result = run_candidate_tests( task, function_spec, """ def subtotal(order): raise RuntimeError("boom") """.strip(), ) assert not result.passed assert result.tests_passed == 0 assert result.tests_total == 10 assert "run_tests failed" in result.feedback or "0/10" in result.feedback def test_run_tests_reports_rust_build_failures(monkeypatch) -> None: task = get_task("lru_cache") function_spec = task.get_function("lruEvict") assert function_spec is not None class FakeRustRunner: def run(self, function_spec, candidate_code, cases): return ( None, subprocess.CompletedProcess( args=[], returncode=1, stdout="", stderr="error[E0308]: mismatched types\n", ), False, ) monkeypatch.setitem(grader._RUNNERS, "rust", FakeRustRunner()) result = run_candidate_tests( task, function_spec, "pub fn lru_evict(cache: Vec<(u64, u64)>, cap: usize) -> Vec<(u64, u64)> { cache }", ) assert not result.passed assert result.tests_passed == 0 assert result.tests_total == len(task.sample_inputs[function_spec.name]) assert "Rust build failed (compiler error)" in result.feedback assert "Interpreter exit code" not in result.feedback def test_run_tests_for_expression_eval_rust_candidate() -> None: task = get_task("expression_eval") function_spec = task.get_function("evalExpr") assert function_spec is not None result = run_candidate_tests( task, function_spec, """ #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Op { Add, Sub, Mul, Div, } #[derive(Debug, Clone)] pub enum Expr { Lit(i64), BinOp(Op, Box, Box), } pub fn eval_bin_op(op: Op, a: i64, b: i64) -> Option { match op { Op::Add => Some(a + b), Op::Sub => Some(a - b), Op::Mul => Some(a * b), Op::Div => { if b == 0 { None } else { Some(a / b) } } } } pub fn eval_expr(expr: &Expr) -> Option { match expr { Expr::Lit(n) => Some(*n), Expr::BinOp(op, l, r) => { let left = eval_expr(l); let right = eval_expr(r); match (left, right) { (Some(a), Some(b)) => eval_bin_op(*op, a, b), _ => None, } } } } """.strip(), ) assert result.passed assert result.tests_passed == 10 assert result.tests_total == 10 assert "10/10" in result.feedback def test_clamp_open_unit_keeps_scores_in_open_interval() -> None: assert clamp_open_unit(0.0) == 0.01 assert clamp_open_unit(1.0) == 0.99 assert clamp_open_unit(0.42) == 0.42