""" Local test script — verifies the environment works without needing an LLM API. Run: uv run python test_local.py """ import json import sys # Test imports print("=" * 60) print("Testing Policy-to-Logic RL Environment") print("=" * 60) # ── Test 1: DSL Engine ──────────────────────────────────────────── print("\n[1/5] Testing DSL Engine...") from policy_to_logic_env.server.dsl_engine import parse_rules, execute_rules, validate_rules rules_json = json.dumps({ "rules": [ { "if": [ {"field": "time", "op": ">=", "value": 18}, {"field": "data_type", "op": "==", "value": "sensitive"} ], "then": "DENY" }, { "if": [ {"field": "time", "op": "<", "value": 9}, {"field": "data_type", "op": "==", "value": "sensitive"} ], "then": "DENY" }, { "if": [ {"field": "time", "op": ">=", "value": 18}, {"field": "data_type", "op": "==", "value": "internal"} ], "then": "DENY" }, { "if": [ {"field": "time", "op": "<", "value": 9}, {"field": "data_type", "op": "==", "value": "internal"} ], "then": "DENY" } ], "default": "ALLOW" }) rules_data, errors = parse_rules(rules_json) assert rules_data is not None, f"Parse failed: {errors}" assert len(errors) == 0 # Test execution result = execute_rules(rules_data, {"time": 20, "data_type": "sensitive"}) assert result == "DENY", f"Expected DENY, got {result}" result = execute_rules(rules_data, {"time": 12, "data_type": "sensitive"}) assert result == "ALLOW", f"Expected ALLOW, got {result}" result = execute_rules(rules_data, {"time": 22, "data_type": "public"}) assert result == "ALLOW", f"Expected ALLOW, got {result}" print(" ✅ DSL Engine working correctly") # ── Test 2: Scenario Generator ──────────────────────────────────── print("\n[2/5] Testing Scenario Generator...") from policy_to_logic_env.server.scenario_generator import generate_scenarios for task_name in ["data_access", "resource_access", "transaction_approval"]: scenarios = generate_scenarios(task_name) assert len(scenarios) > 0, f"No scenarios for {task_name}" assert all("expected_decision" in s for s in scenarios), f"Missing expected_decision in {task_name}" print(f" ✅ {task_name}: {len(scenarios)} scenarios generated") # ── Test 3: Ground Truth ───────────────────────────────────────── print("\n[3/5] Testing Ground Truth Engine...") from policy_to_logic_env.server.ground_truth import evaluate_ground_truth, answer_clarification # Test data_access assert evaluate_ground_truth("data_access", {"time": 20, "data_type": "sensitive"}) == "DENY" assert evaluate_ground_truth("data_access", {"time": 12, "data_type": "sensitive"}) == "ALLOW" assert evaluate_ground_truth("data_access", {"time": 3, "data_type": "public"}) == "ALLOW" # Test resource_access assert evaluate_ground_truth("resource_access", {"role": "senior", "time": 3, "document_type": "confidential"}) == "ALLOW" assert evaluate_ground_truth("resource_access", {"role": "contractor", "time": 12, "document_type": "internal"}) == "DENY" assert evaluate_ground_truth("resource_access", {"role": "junior", "time": 12, "document_type": "internal"}) == "ALLOW" # Test transaction_approval assert evaluate_ground_truth("transaction_approval", {"amount": 100, "transfer_type": "international", "time": 12, "initiator_role": "employee"}) == "COMPLIANCE_REVIEW" assert evaluate_ground_truth("transaction_approval", {"amount": 10000, "transfer_type": "domestic", "time": 20, "initiator_role": "employee"}) == "HOLD" assert evaluate_ground_truth("transaction_approval", {"amount": 6000, "transfer_type": "domestic", "time": 12, "initiator_role": "manager"}) == "APPROVE" # Test clarification oracle answer = answer_clarification("transaction_approval", "What is the standard limit?") assert "5,000" in answer print(" ✅ Ground Truth and Oracle working correctly") # ── Test 4: Graders ─────────────────────────────────────────────── print("\n[4/5] Testing Graders...") from policy_to_logic_env.server.graders import grade_task # Grade a perfect ruleset for data_access perfect_rules = { "rules": [ { "if": [ {"field": "time", "op": ">=", "value": 18}, {"field": "data_type", "op": "==", "value": "sensitive"} ], "then": "DENY" }, { "if": [ {"field": "time", "op": "<", "value": 9}, {"field": "data_type", "op": "==", "value": "sensitive"} ], "then": "DENY" }, { "if": [ {"field": "time", "op": ">=", "value": 18}, {"field": "data_type", "op": "==", "value": "internal"} ], "then": "DENY" }, { "if": [ {"field": "time", "op": "<", "value": 9}, {"field": "data_type", "op": "==", "value": "internal"} ], "then": "DENY" } ], "default": "ALLOW" } score, details = grade_task("data_access", perfect_rules) print(f" Perfect rules score: {score:.2%} ({details['passed']}/{details['total']})") assert score >= 0.9, f"Perfect rules should score >=0.9, got {score}" # Grade an empty ruleset empty_rules = {"rules": [], "default": "ALLOW"} score_empty, details_empty = grade_task("data_access", empty_rules) print(f" Empty rules score: {score_empty:.2%} ({details_empty['passed']}/{details_empty['total']})") print(" ✅ Graders working correctly") # ── Test 5: Full Environment Loop ───────────────────────────────── print("\n[5/5] Testing Full Environment Loop...") from policy_to_logic_env.server.environment import PolicyToLogicEnvironment from policy_to_logic_env.models import PolicyToLogicAction env = PolicyToLogicEnvironment() # Reset result = env.reset(task_name="data_access") assert not result.done assert result.observation.task_name == "data_access" assert result.observation.step_number == 0 print(f" Reset OK. Policy: {result.observation.policy_text[:60]}...") # Step 1: Ask clarification result = env.step(PolicyToLogicAction( action_type="ask_clarification", content=json.dumps({"question": "What are working hours?"}) )) assert not result.done assert result.observation.clarification_response is not None print(f" Step 1 (clarify): answer='{result.observation.clarification_response[:60]}...', reward={result.reward:.2f}") # Step 2: Propose rules result = env.step(PolicyToLogicAction( action_type="propose_rules", content=json.dumps(perfect_rules) )) print(f" Step 2 (propose): accuracy={result.observation.current_accuracy:.2%}, reward={result.reward:.2f}, done={result.done}") # Check state state = env.state() print(f" State: episode={state.episode_id}, steps={state.step_count}, questions={state.questions_asked}") print(" ✅ Full environment loop working correctly") # ── Summary ─────────────────────────────────────────────────────── print("\n" + "=" * 60) print("🎉 ALL TESTS PASSED! Environment is working correctly.") print("=" * 60) print("\nNext steps:") print(" 1. Start server: uv run python main.py") print(" 2. Test API: curl -X POST http://localhost:7860/reset -H 'Content-Type: application/json' -d '{}'") print(" 3. Run inference: HF_TOKEN=xxx uv run python inference.py")