Policy2Logic / test_local.py
Godreign-Y
intial commit
743203e
"""
Local test script β€” verifies the environment works without needing an LLM API.
Run: uv run python test_local.py
"""
import json
import sys
# Test imports
print("=" * 60)
print("Testing Policy-to-Logic RL Environment")
print("=" * 60)
# ── Test 1: DSL Engine ────────────────────────────────────────────
print("\n[1/5] Testing DSL Engine...")
from policy_to_logic_env.server.dsl_engine import parse_rules, execute_rules, validate_rules
rules_json = json.dumps({
"rules": [
{
"if": [
{"field": "time", "op": ">=", "value": 18},
{"field": "data_type", "op": "==", "value": "sensitive"}
],
"then": "DENY"
},
{
"if": [
{"field": "time", "op": "<", "value": 9},
{"field": "data_type", "op": "==", "value": "sensitive"}
],
"then": "DENY"
},
{
"if": [
{"field": "time", "op": ">=", "value": 18},
{"field": "data_type", "op": "==", "value": "internal"}
],
"then": "DENY"
},
{
"if": [
{"field": "time", "op": "<", "value": 9},
{"field": "data_type", "op": "==", "value": "internal"}
],
"then": "DENY"
}
],
"default": "ALLOW"
})
rules_data, errors = parse_rules(rules_json)
assert rules_data is not None, f"Parse failed: {errors}"
assert len(errors) == 0
# Test execution
result = execute_rules(rules_data, {"time": 20, "data_type": "sensitive"})
assert result == "DENY", f"Expected DENY, got {result}"
result = execute_rules(rules_data, {"time": 12, "data_type": "sensitive"})
assert result == "ALLOW", f"Expected ALLOW, got {result}"
result = execute_rules(rules_data, {"time": 22, "data_type": "public"})
assert result == "ALLOW", f"Expected ALLOW, got {result}"
print(" βœ… DSL Engine working correctly")
# ── Test 2: Scenario Generator ────────────────────────────────────
print("\n[2/5] Testing Scenario Generator...")
from policy_to_logic_env.server.scenario_generator import generate_scenarios
for task_name in ["data_access", "resource_access", "transaction_approval"]:
scenarios = generate_scenarios(task_name)
assert len(scenarios) > 0, f"No scenarios for {task_name}"
assert all("expected_decision" in s for s in scenarios), f"Missing expected_decision in {task_name}"
print(f" βœ… {task_name}: {len(scenarios)} scenarios generated")
# ── Test 3: Ground Truth ─────────────────────────────────────────
print("\n[3/5] Testing Ground Truth Engine...")
from policy_to_logic_env.server.ground_truth import evaluate_ground_truth, answer_clarification
# Test data_access
assert evaluate_ground_truth("data_access", {"time": 20, "data_type": "sensitive"}) == "DENY"
assert evaluate_ground_truth("data_access", {"time": 12, "data_type": "sensitive"}) == "ALLOW"
assert evaluate_ground_truth("data_access", {"time": 3, "data_type": "public"}) == "ALLOW"
# Test resource_access
assert evaluate_ground_truth("resource_access", {"role": "senior", "time": 3, "document_type": "confidential"}) == "ALLOW"
assert evaluate_ground_truth("resource_access", {"role": "contractor", "time": 12, "document_type": "internal"}) == "DENY"
assert evaluate_ground_truth("resource_access", {"role": "junior", "time": 12, "document_type": "internal"}) == "ALLOW"
# Test transaction_approval
assert evaluate_ground_truth("transaction_approval", {"amount": 100, "transfer_type": "international", "time": 12, "initiator_role": "employee"}) == "COMPLIANCE_REVIEW"
assert evaluate_ground_truth("transaction_approval", {"amount": 10000, "transfer_type": "domestic", "time": 20, "initiator_role": "employee"}) == "HOLD"
assert evaluate_ground_truth("transaction_approval", {"amount": 6000, "transfer_type": "domestic", "time": 12, "initiator_role": "manager"}) == "APPROVE"
# Test clarification oracle
answer = answer_clarification("transaction_approval", "What is the standard limit?")
assert "5,000" in answer
print(" βœ… Ground Truth and Oracle working correctly")
# ── Test 4: Graders ───────────────────────────────────────────────
print("\n[4/5] Testing Graders...")
from policy_to_logic_env.server.graders import grade_task
# Grade a perfect ruleset for data_access
perfect_rules = {
"rules": [
{
"if": [
{"field": "time", "op": ">=", "value": 18},
{"field": "data_type", "op": "==", "value": "sensitive"}
],
"then": "DENY"
},
{
"if": [
{"field": "time", "op": "<", "value": 9},
{"field": "data_type", "op": "==", "value": "sensitive"}
],
"then": "DENY"
},
{
"if": [
{"field": "time", "op": ">=", "value": 18},
{"field": "data_type", "op": "==", "value": "internal"}
],
"then": "DENY"
},
{
"if": [
{"field": "time", "op": "<", "value": 9},
{"field": "data_type", "op": "==", "value": "internal"}
],
"then": "DENY"
}
],
"default": "ALLOW"
}
score, details = grade_task("data_access", perfect_rules)
print(f" Perfect rules score: {score:.2%} ({details['passed']}/{details['total']})")
assert score >= 0.9, f"Perfect rules should score >=0.9, got {score}"
# Grade an empty ruleset
empty_rules = {"rules": [], "default": "ALLOW"}
score_empty, details_empty = grade_task("data_access", empty_rules)
print(f" Empty rules score: {score_empty:.2%} ({details_empty['passed']}/{details_empty['total']})")
print(" βœ… Graders working correctly")
# ── Test 5: Full Environment Loop ─────────────────────────────────
print("\n[5/5] Testing Full Environment Loop...")
from policy_to_logic_env.server.environment import PolicyToLogicEnvironment
from policy_to_logic_env.models import PolicyToLogicAction
env = PolicyToLogicEnvironment()
# Reset
result = env.reset(task_name="data_access")
assert not result.done
assert result.observation.task_name == "data_access"
assert result.observation.step_number == 0
print(f" Reset OK. Policy: {result.observation.policy_text[:60]}...")
# Step 1: Ask clarification
result = env.step(PolicyToLogicAction(
action_type="ask_clarification",
content=json.dumps({"question": "What are working hours?"})
))
assert not result.done
assert result.observation.clarification_response is not None
print(f" Step 1 (clarify): answer='{result.observation.clarification_response[:60]}...', reward={result.reward:.2f}")
# Step 2: Propose rules
result = env.step(PolicyToLogicAction(
action_type="propose_rules",
content=json.dumps(perfect_rules)
))
print(f" Step 2 (propose): accuracy={result.observation.current_accuracy:.2%}, reward={result.reward:.2f}, done={result.done}")
# Check state
state = env.state()
print(f" State: episode={state.episode_id}, steps={state.step_count}, questions={state.questions_asked}")
print(" βœ… Full environment loop working correctly")
# ── Summary ───────────────────────────────────────────────────────
print("\n" + "=" * 60)
print("πŸŽ‰ ALL TESTS PASSED! Environment is working correctly.")
print("=" * 60)
print("\nNext steps:")
print(" 1. Start server: uv run python main.py")
print(" 2. Test API: curl -X POST http://localhost:7860/reset -H 'Content-Type: application/json' -d '{}'")
print(" 3. Run inference: HF_TOKEN=xxx uv run python inference.py")