File size: 8,111 Bytes
743203e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
"""
Local test script β€” verifies the environment works without needing an LLM API.

Run: uv run python test_local.py
"""

import json
import sys

# Test imports
print("=" * 60)
print("Testing Policy-to-Logic RL Environment")
print("=" * 60)

# ── Test 1: DSL Engine ────────────────────────────────────────────
print("\n[1/5] Testing DSL Engine...")
from policy_to_logic_env.server.dsl_engine import parse_rules, execute_rules, validate_rules

rules_json = json.dumps({
    "rules": [
        {
            "if": [
                {"field": "time", "op": ">=", "value": 18},
                {"field": "data_type", "op": "==", "value": "sensitive"}
            ],
            "then": "DENY"
        },
        {
            "if": [
                {"field": "time", "op": "<", "value": 9},
                {"field": "data_type", "op": "==", "value": "sensitive"}
            ],
            "then": "DENY"
        },
        {
            "if": [
                {"field": "time", "op": ">=", "value": 18},
                {"field": "data_type", "op": "==", "value": "internal"}
            ],
            "then": "DENY"
        },
        {
            "if": [
                {"field": "time", "op": "<", "value": 9},
                {"field": "data_type", "op": "==", "value": "internal"}
            ],
            "then": "DENY"
        }
    ],
    "default": "ALLOW"
})

rules_data, errors = parse_rules(rules_json)
assert rules_data is not None, f"Parse failed: {errors}"
assert len(errors) == 0

# Test execution
result = execute_rules(rules_data, {"time": 20, "data_type": "sensitive"})
assert result == "DENY", f"Expected DENY, got {result}"

result = execute_rules(rules_data, {"time": 12, "data_type": "sensitive"})
assert result == "ALLOW", f"Expected ALLOW, got {result}"

result = execute_rules(rules_data, {"time": 22, "data_type": "public"})
assert result == "ALLOW", f"Expected ALLOW, got {result}"

print("   βœ… DSL Engine working correctly")


# ── Test 2: Scenario Generator ────────────────────────────────────
print("\n[2/5] Testing Scenario Generator...")
from policy_to_logic_env.server.scenario_generator import generate_scenarios

for task_name in ["data_access", "resource_access", "transaction_approval"]:
    scenarios = generate_scenarios(task_name)
    assert len(scenarios) > 0, f"No scenarios for {task_name}"
    assert all("expected_decision" in s for s in scenarios), f"Missing expected_decision in {task_name}"
    print(f"   βœ… {task_name}: {len(scenarios)} scenarios generated")


# ── Test 3: Ground Truth ─────────────────────────────────────────
print("\n[3/5] Testing Ground Truth Engine...")
from policy_to_logic_env.server.ground_truth import evaluate_ground_truth, answer_clarification

# Test data_access
assert evaluate_ground_truth("data_access", {"time": 20, "data_type": "sensitive"}) == "DENY"
assert evaluate_ground_truth("data_access", {"time": 12, "data_type": "sensitive"}) == "ALLOW"
assert evaluate_ground_truth("data_access", {"time": 3, "data_type": "public"}) == "ALLOW"

# Test resource_access
assert evaluate_ground_truth("resource_access", {"role": "senior", "time": 3, "document_type": "confidential"}) == "ALLOW"
assert evaluate_ground_truth("resource_access", {"role": "contractor", "time": 12, "document_type": "internal"}) == "DENY"
assert evaluate_ground_truth("resource_access", {"role": "junior", "time": 12, "document_type": "internal"}) == "ALLOW"

# Test transaction_approval
assert evaluate_ground_truth("transaction_approval", {"amount": 100, "transfer_type": "international", "time": 12, "initiator_role": "employee"}) == "COMPLIANCE_REVIEW"
assert evaluate_ground_truth("transaction_approval", {"amount": 10000, "transfer_type": "domestic", "time": 20, "initiator_role": "employee"}) == "HOLD"
assert evaluate_ground_truth("transaction_approval", {"amount": 6000, "transfer_type": "domestic", "time": 12, "initiator_role": "manager"}) == "APPROVE"

# Test clarification oracle
answer = answer_clarification("transaction_approval", "What is the standard limit?")
assert "5,000" in answer

print("   βœ… Ground Truth and Oracle working correctly")


# ── Test 4: Graders ───────────────────────────────────────────────
print("\n[4/5] Testing Graders...")
from policy_to_logic_env.server.graders import grade_task

# Grade a perfect ruleset for data_access
perfect_rules = {
    "rules": [
        {
            "if": [
                {"field": "time", "op": ">=", "value": 18},
                {"field": "data_type", "op": "==", "value": "sensitive"}
            ],
            "then": "DENY"
        },
        {
            "if": [
                {"field": "time", "op": "<", "value": 9},
                {"field": "data_type", "op": "==", "value": "sensitive"}
            ],
            "then": "DENY"
        },
        {
            "if": [
                {"field": "time", "op": ">=", "value": 18},
                {"field": "data_type", "op": "==", "value": "internal"}
            ],
            "then": "DENY"
        },
        {
            "if": [
                {"field": "time", "op": "<", "value": 9},
                {"field": "data_type", "op": "==", "value": "internal"}
            ],
            "then": "DENY"
        }
    ],
    "default": "ALLOW"
}

score, details = grade_task("data_access", perfect_rules)
print(f"   Perfect rules score: {score:.2%} ({details['passed']}/{details['total']})")
assert score >= 0.9, f"Perfect rules should score >=0.9, got {score}"

# Grade an empty ruleset
empty_rules = {"rules": [], "default": "ALLOW"}
score_empty, details_empty = grade_task("data_access", empty_rules)
print(f"   Empty rules score: {score_empty:.2%} ({details_empty['passed']}/{details_empty['total']})")

print("   βœ… Graders working correctly")


# ── Test 5: Full Environment Loop ─────────────────────────────────
print("\n[5/5] Testing Full Environment Loop...")
from policy_to_logic_env.server.environment import PolicyToLogicEnvironment
from policy_to_logic_env.models import PolicyToLogicAction

env = PolicyToLogicEnvironment()

# Reset
result = env.reset(task_name="data_access")
assert not result.done
assert result.observation.task_name == "data_access"
assert result.observation.step_number == 0
print(f"   Reset OK. Policy: {result.observation.policy_text[:60]}...")

# Step 1: Ask clarification
result = env.step(PolicyToLogicAction(
    action_type="ask_clarification",
    content=json.dumps({"question": "What are working hours?"})
))
assert not result.done
assert result.observation.clarification_response is not None
print(f"   Step 1 (clarify): answer='{result.observation.clarification_response[:60]}...', reward={result.reward:.2f}")

# Step 2: Propose rules
result = env.step(PolicyToLogicAction(
    action_type="propose_rules",
    content=json.dumps(perfect_rules)
))
print(f"   Step 2 (propose): accuracy={result.observation.current_accuracy:.2%}, reward={result.reward:.2f}, done={result.done}")

# Check state
state = env.state()
print(f"   State: episode={state.episode_id}, steps={state.step_count}, questions={state.questions_asked}")

print("   βœ… Full environment loop working correctly")


# ── Summary ───────────────────────────────────────────────────────
print("\n" + "=" * 60)
print("πŸŽ‰ ALL TESTS PASSED! Environment is working correctly.")
print("=" * 60)
print("\nNext steps:")
print("  1. Start server:  uv run python main.py")
print("  2. Test API:      curl -X POST http://localhost:7860/reset -H 'Content-Type: application/json' -d '{}'")
print("  3. Run inference:  HF_TOKEN=xxx uv run python inference.py")