Spaces:
Sleeping
Sleeping
| """Additional functionality tests for the Navis web environment.""" | |
| from __future__ import annotations | |
| import os | |
| import sys | |
| from fastapi.testclient import TestClient | |
| ROOT = os.path.join(os.path.dirname(__file__), "..") | |
| if ROOT not in sys.path: | |
| sys.path.insert(0, ROOT) | |
| from navis_web_env.server.app import app | |
| from navis_web_env.server.navis_web_environment import NavisWebEnvironment | |
| from navis_web_env.site_loader import list_curriculum_task_ids, list_task_ids, shortest_path_length, load_task | |
| def _post_step(client: TestClient, click_link_id: str): | |
| response = client.post("/step", json={"click_link_id": click_link_id}) | |
| if response.status_code == 422: | |
| response = client.post("/step", json={"action": {"click_link_id": click_link_id}}) | |
| return response | |
| def _post_step_with_optional_session(client: TestClient, click_link_id: str, session_id: str | None): | |
| payload = {"click_link_id": click_link_id} | |
| if session_id: | |
| payload["session_id"] = session_id | |
| response = client.post("/step", json=payload) | |
| if response.status_code == 422: | |
| action_payload = {"action": {"click_link_id": click_link_id}} | |
| if session_id: | |
| action_payload["session_id"] = session_id | |
| response = client.post("/step", json=action_payload) | |
| return response | |
| def _unwrap_observation_payload(payload: dict): | |
| if "observation" in payload: | |
| return payload["observation"] | |
| if "result" in payload and isinstance(payload["result"], dict) and "observation" in payload["result"]: | |
| return payload["result"]["observation"] | |
| return payload | |
| def _unwrap_info_payload(payload: dict): | |
| if "info" in payload and isinstance(payload["info"], dict): | |
| return payload["info"] | |
| if "result" in payload and isinstance(payload["result"], dict): | |
| result = payload["result"] | |
| if "info" in result and isinstance(result["info"], dict): | |
| return result["info"] | |
| return {} | |
| def _looks_like_state(payload: dict) -> bool: | |
| return any(key in payload for key in ("task_id", "current_page_id", "page_id", "visited_pages", "step_count")) | |
| def test_state_tracks_task_metadata_after_reset(): | |
| env = NavisWebEnvironment(default_task_id="hard") | |
| env.reset(task_id="hard") | |
| state = env.state | |
| assert state.task_id == "hard" | |
| assert state.current_page_id == "dashboard" | |
| assert state.target_page_id == "emergency_access_reset_playbook" | |
| assert state.visited_pages == ["dashboard"] | |
| assert state.visited_counts == {"dashboard": 1} | |
| assert state.shortest_distance_to_target == 5 | |
| def test_state_updates_after_valid_transition(): | |
| env = NavisWebEnvironment(default_task_id="easy") | |
| env.reset(task_id="easy") | |
| env.step(action=type("Action", (), {"click_link_id": "home_support"})()) | |
| state = env.state | |
| assert state.step_count == 1 | |
| assert state.current_page_id == "support_center" | |
| assert state.visited_pages == ["home", "support_center"] | |
| assert state.visited_counts["support_center"] == 1 | |
| assert state.last_action_valid is True | |
| assert state.shortest_distance_to_target == 1 | |
| def test_loop_cap_termination_sets_reason_and_penalty(): | |
| env = NavisWebEnvironment(default_task_id="hard") | |
| env.reset(task_id="hard") | |
| observation = None | |
| for link_id in [ | |
| "dash_remote_work", | |
| "remote_dashboard", | |
| "dash_remote_work", | |
| "remote_dashboard", | |
| "dash_remote_work", | |
| "remote_dashboard", | |
| ]: | |
| observation = env.step(action=type("Action", (), {"click_link_id": link_id})()) | |
| if observation.done: | |
| break | |
| assert observation is not None | |
| assert observation.done is True | |
| assert env.state.termination_reason == "loop_cap_exceeded" | |
| assert env.get_last_info()["termination_reason"] == "loop_cap_exceeded" | |
| def test_task_catalog_and_shortest_paths_are_deterministic(): | |
| assert list_task_ids() == [ | |
| "easy", | |
| "medium", | |
| "hard", | |
| "expert", | |
| "adversarial", | |
| "bank_dispute", | |
| "it_access_recovery", | |
| "permit_renewal", | |
| ] | |
| assert list_curriculum_task_ids() == ["curriculum_easy", "curriculum_medium", "curriculum_hard"] | |
| easy = load_task("easy") | |
| medium = load_task("medium") | |
| hard = load_task("hard") | |
| expert = load_task("expert") | |
| adversarial = load_task("adversarial") | |
| bank_dispute = load_task("bank_dispute") | |
| it_access_recovery = load_task("it_access_recovery") | |
| permit_renewal = load_task("permit_renewal") | |
| curriculum_medium = load_task("curriculum_medium") | |
| assert shortest_path_length(easy, easy.start_page_id) == 2 | |
| assert shortest_path_length(medium, medium.start_page_id) == 4 | |
| assert shortest_path_length(hard, hard.start_page_id) == 5 | |
| assert shortest_path_length(expert, expert.start_page_id) == 6 | |
| assert shortest_path_length(adversarial, adversarial.start_page_id) == 6 | |
| assert shortest_path_length(bank_dispute, bank_dispute.start_page_id) == 4 | |
| assert shortest_path_length(it_access_recovery, it_access_recovery.start_page_id) == 4 | |
| assert shortest_path_length(permit_renewal, permit_renewal.start_page_id) == 4 | |
| assert shortest_path_length(curriculum_medium, curriculum_medium.start_page_id) == 5 | |
| def test_curriculum_tasks_are_deterministic_and_include_required_checkpoints(): | |
| first = load_task("curriculum_hard") | |
| second = load_task("curriculum_hard") | |
| assert first.goal_instruction == second.goal_instruction | |
| assert first.target_page_id == second.target_page_id | |
| assert first.required_page_ids == ["intake_hub", "workflow_center"] | |
| assert first.difficulty == "hard" | |
| def test_http_endpoints_expose_health_schema_and_state(): | |
| client = TestClient(app) | |
| health_response = client.get("/health") | |
| assert health_response.status_code == 200 | |
| assert health_response.json()["status"] in {"ok", "healthy"} | |
| schema_response = client.get("/schema") | |
| assert schema_response.status_code == 200 | |
| schema_payload = schema_response.json() | |
| assert ("action_schema" in schema_payload and "observation_schema" in schema_payload) or ( | |
| "action" in schema_payload and "observation" in schema_payload | |
| ) | |
| reset_response = client.post("/reset", json={"task_id": "easy"}) | |
| assert reset_response.status_code == 200 | |
| reset_payload = reset_response.json() | |
| observation_payload = _unwrap_observation_payload(reset_payload) | |
| assert observation_payload["page_id"] == "home" | |
| state_response = client.get("/state") | |
| assert state_response.status_code == 200 | |
| state_payload = state_response.json() | |
| assert isinstance(state_payload, dict) | |
| assert _looks_like_state(state_payload) | |
| def test_http_step_returns_info_summary_on_success(): | |
| client = TestClient(app) | |
| reset_response = client.post("/reset", json={"task_id": "easy"}) | |
| session_id = reset_response.json().get("session_id") | |
| step_response = _post_step(client, "home_support") | |
| assert step_response.status_code == 200 | |
| payload = step_response.json() | |
| observation_payload = _unwrap_observation_payload(payload) | |
| info_payload = _unwrap_info_payload(payload) | |
| assert payload.get("session_id") == session_id | |
| assert observation_payload["page_id"] in {"support_center", "contact_support"} | |
| if info_payload: | |
| assert "grade" in info_payload or "reached_target" in info_payload or "task_id" in info_payload | |
| def test_http_session_id_persists_state_across_steps(): | |
| client = TestClient(app) | |
| reset_response = client.post("/reset", json={"task_id": "easy"}) | |
| assert reset_response.status_code == 200 | |
| reset_payload = reset_response.json() | |
| session_id = reset_payload.get("session_id") or reset_payload.get("episode_id") | |
| if not session_id: | |
| first_step = _post_step_with_optional_session(client, "home_support", None) | |
| assert first_step.status_code == 200 | |
| assert _unwrap_observation_payload(first_step.json())["page_id"] == "support_center" | |
| return | |
| first_step = _post_step_with_optional_session(client, "home_support", session_id) | |
| assert first_step.status_code == 200 | |
| first_payload = first_step.json() | |
| assert first_payload.get("session_id") == session_id | |
| assert _unwrap_observation_payload(first_payload)["page_id"] == "support_center" | |
| second_step = _post_step_with_optional_session(client, "support_contact", session_id) | |
| assert second_step.status_code == 200 | |
| second_payload = second_step.json() | |
| second_observation = _unwrap_observation_payload(second_payload) | |
| assert second_observation["page_id"] == "contact_support" | |
| assert second_observation["done"] is True | |
| state_params = {"session_id": session_id} if session_id else None | |
| state_response = client.get("/state", params=state_params) | |
| assert state_response.status_code == 200 | |
| assert state_response.json()["step_count"] == 2 | |