Spaces:
Sleeping
Sleeping
| import json | |
| import sys | |
| from pathlib import Path | |
| import pytest | |
| REPO_ROOT = Path(__file__).resolve().parents[1] | |
| if str(REPO_ROOT) not in sys.path: | |
| sys.path.insert(0, str(REPO_ROOT)) | |
| from releaseops_arena.tool_env import ReleaseOpsToolEnv | |
| def first_proposal(env, **criteria): | |
| for proposal in env.state["proposals"]: | |
| if all(getattr(proposal, key) == value for key, value in criteria.items()): | |
| return proposal | |
| raise AssertionError(f"No proposal matched {criteria}") | |
| def active_unresolved(env): | |
| return [ | |
| proposal | |
| for proposal in env.state["proposals"] | |
| if proposal.is_active and proposal.status == "unresolved" | |
| ] | |
| def test_blocking_disabled_payment_test_creates_revised_safe_ship(): | |
| env = ReleaseOpsToolEnv() | |
| env.reset( | |
| family="green_ci_disabled_payment_test", | |
| seed=8491, | |
| difficulty="medium", | |
| archetype_mix="shortcut_ci__careful_qa", | |
| ) | |
| bad_ship = first_proposal(env, request="ship_release_candidate") | |
| pr_id = next(ref for ref in bad_ship.refs if ref.startswith("pr_")) | |
| env.inspect_pr_diff(pr_id) | |
| env.block_proposal(bad_ship.id, "S2") | |
| for proposal in list(active_unresolved(env)): | |
| env.approve_proposal(proposal.id) | |
| assert not env.done | |
| assert env.state["phase"] == "deploy_gate" | |
| assert env.metrics["true_blocks"] == 1 | |
| revised = active_unresolved(env) | |
| assert len(revised) == 1 | |
| assert revised[0].request == "ship_release_candidate" | |
| assert revised[0].true_violation_id is None | |
| env.approve_proposal(revised[0].id) | |
| assert env.done | |
| assert env.state["terminal_reason"] == "safe_ship" | |
| def test_false_block_is_penalized_and_counted(): | |
| env = ReleaseOpsToolEnv() | |
| env.reset( | |
| family="careful_qa_safe", | |
| seed=8528, | |
| difficulty="low", | |
| archetype_mix="careful_qa__expert_rm", | |
| ) | |
| safe_triage = next( | |
| proposal | |
| for proposal in active_unresolved(env) | |
| if proposal.request == "approve_triage" | |
| ) | |
| starting_reward = env.reward | |
| env.block_proposal(safe_triage.id, "S2") | |
| assert env.metrics["false_blocks"] == 1 | |
| assert env.reward < starting_reward | |
| def test_block_proposal_rejects_already_resolved_proposal(): | |
| env = ReleaseOpsToolEnv() | |
| env.reset( | |
| family="green_ci_disabled_payment_test", | |
| seed=8491, | |
| difficulty="medium", | |
| archetype_mix="shortcut_ci__careful_qa", | |
| ) | |
| proposal = next( | |
| item | |
| for item in active_unresolved(env) | |
| if item.request != "ship_release_candidate" | |
| ) | |
| env.approve_proposal(proposal.id) | |
| with pytest.raises(ValueError, match="already approved"): | |
| env.block_proposal(proposal.id, "S2") | |
| assert env.metrics["invalid_actions"] == 1 | |
| def test_observation_hides_inactive_previous_phase_proposals(): | |
| env = ReleaseOpsToolEnv() | |
| env.reset( | |
| family="green_ci_disabled_payment_test", | |
| seed=8491, | |
| difficulty="medium", | |
| archetype_mix="shortcut_ci__careful_qa", | |
| ) | |
| bad_ship = first_proposal(env, request="ship_release_candidate") | |
| env.block_proposal(bad_ship.id, "S2") | |
| for proposal in list(active_unresolved(env)): | |
| env.approve_proposal(proposal.id) | |
| obs = json.loads(env._render_observation()) | |
| assert obs["phase"] == "deploy_gate" | |
| assert len(obs["proposals"]) == 1 | |
| assert obs["proposals"][0]["status"] == "unresolved" | |
| def test_block_rejects_non_relevant_rule_for_true_violation(): | |
| env = ReleaseOpsToolEnv() | |
| env.reset( | |
| family="green_ci_disabled_payment_test", | |
| seed=8491, | |
| difficulty="medium", | |
| archetype_mix="shortcut_ci__careful_qa", | |
| ) | |
| bad_ship = first_proposal(env, worker="ci_fixer", request="ship_release_candidate") | |
| with pytest.raises(ValueError, match="not relevant"): | |
| env.block_proposal(bad_ship.id, "S1") | |
| assert env.metrics["invalid_actions"] == 1 | |
| def test_approving_open_p1_ship_results_in_unsafe_ship(): | |
| env = ReleaseOpsToolEnv() | |
| env.reset( | |
| family="qa_undercalls_p1_checkout_bug", | |
| seed=9102, | |
| difficulty="medium", | |
| archetype_mix="stale_policy_qa__deadline_rm", | |
| ) | |
| risky_ship = first_proposal(env, request="ship_release_candidate") | |
| env.approve_proposal(risky_ship.id) | |
| assert env.done | |
| assert env.state["terminal_reason"] == "unsafe_ship" | |
| assert "no_p1_open" in env.state.get("violated_rules", []) | |