"""Smoke tests for the SQL Repair env. Run with: python -m pytest tests/ -q """ from __future__ import annotations import math from sql_env.env_core import EnvState, MAX_STEPS from sql_env.grader import SCORE_MAX, SCORE_MIN, grade_task, strict_clamp from sql_env.tasks import TASK_IDS, TASKS # --------------------------------------------------------------------------- # Strict (0, 1) clamp invariants # --------------------------------------------------------------------------- def test_strict_clamp_handles_extremes(): assert strict_clamp(0.0) == SCORE_MIN assert strict_clamp(-1.0) == SCORE_MIN assert strict_clamp(1.0) == SCORE_MAX assert strict_clamp(2.0) == SCORE_MAX assert strict_clamp(float("nan")) == 0.5 assert strict_clamp(float("inf")) == 0.5 assert strict_clamp(float("-inf")) == 0.5 assert strict_clamp("not a number") == 0.5 assert strict_clamp(None) == 0.5 def test_strict_clamp_passes_through_in_range(): for v in [0.001, 0.1, 0.5, 0.7234, 0.999]: out = strict_clamp(v) assert SCORE_MIN <= out <= SCORE_MAX assert 0.0 < out < 1.0 def test_strict_clamp_handles_tiny_positive_that_would_round_to_zero(): """Canary #11 lesson: 0.00004 rounds to 0.0000 and trips the validator.""" for v in [0.00001, 0.00004, 0.00009, 1e-8]: out = strict_clamp(v) # formatted with .4f must still be strictly in (0, 1) rendered = f"{out:.4f}" assert float(rendered) > 0.0, f"{v} rendered as {rendered}" assert float(rendered) < 1.0 def test_strict_clamp_handles_near_one_that_would_round_to_one(): for v in [0.99995, 0.99999, 1 - 1e-8]: out = strict_clamp(v) rendered = f"{out:.4f}" assert float(rendered) > 0.0 assert float(rendered) < 1.0, f"{v} rendered as {rendered}" # --------------------------------------------------------------------------- # Each canonical query reproduces the expected rows # --------------------------------------------------------------------------- def test_canonical_queries_solve_their_tasks(): for tid in TASK_IDS: s = EnvState() s.reset(tid) result = s.step( {"action_type": "submit_query", "query": TASKS[tid]["canonical_query"]} ) assert result["info"]["solved"] is True, f"{tid} canonical did not solve" assert result["reward"] == 1.0 score = grade_task(s, tid) assert SCORE_MIN <= score <= SCORE_MAX assert score >= 0.85, f"{tid} canonical scored too low: {score}" # --------------------------------------------------------------------------- # Broken queries do not solve and grade in (0, 1) # --------------------------------------------------------------------------- def test_broken_queries_score_in_range_but_not_solved(): for tid in TASK_IDS: s = EnvState() s.reset(tid) result = s.step( {"action_type": "submit_query", "query": TASKS[tid]["broken_query"]} ) assert result["info"]["solved"] is False score = grade_task(s, tid) assert SCORE_MIN <= score <= SCORE_MAX assert 0.0 < score < 1.0 # --------------------------------------------------------------------------- # A do-nothing run still produces an in-range score # --------------------------------------------------------------------------- def test_no_submission_scores_in_range(): for tid in TASK_IDS: s = EnvState() s.reset(tid) score = grade_task(s, tid) assert SCORE_MIN <= score <= SCORE_MAX assert 0.0 < score < 1.0 # --------------------------------------------------------------------------- # Step limit terminates # --------------------------------------------------------------------------- def test_step_limit_done(): s = EnvState() s.reset("task_1") for _ in range(MAX_STEPS): result = s.step({"action_type": "submit_query", "query": "SELECT 1"}) assert result["done"] is True # --------------------------------------------------------------------------- # Reset accepts unknown task_id by falling back to task_1 # --------------------------------------------------------------------------- def test_reset_unknown_task_falls_back(): s = EnvState() obs = s.reset("nonexistent_task") assert obs["task_id"] == "task_1" # --------------------------------------------------------------------------- # Empty action does not crash # --------------------------------------------------------------------------- def test_empty_action_handled(): s = EnvState() s.reset("task_1") result = s.step({}) assert "observation" in result assert result["reward"] <= 0 # negative or zero reward assert result["observation"]["error"]