narcolepticchicken commited on
Commit
28c4c59
·
verified ·
1 Parent(s): 098ae52

Upload jobs/run_unit_tests_standalone.py

Browse files
Files changed (1) hide show
  1. jobs/run_unit_tests_standalone.py +153 -0
jobs/run_unit_tests_standalone.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Self-contained unit tests for OCC core components.
3
+ All classes inlined — no repo import dependencies.
4
+ """
5
+ import time
6
+ from dataclasses import dataclass, field
7
+ from typing import Any, Dict, List
8
+
9
+ # --- INLINE ORACLE ---
10
+
11
+ @dataclass
12
+ class OracleResult:
13
+ raw_score: float; cost_adjusted_score: float; confidence: float
14
+ evidence: Dict[str, Any]; reason: str
15
+ failure_tags: List[str] = field(default_factory=list)
16
+ reward_value: float = 0.0
17
+
18
+
19
+ class ImpactOracle:
20
+ def __init__(self, compute_penalty_rate=0.0001, gaming_penalty=2.0):
21
+ self.compute_penalty_rate = compute_penalty_rate
22
+ self.gaming_penalty = gaming_penalty
23
+
24
+ def score(self, mode, action, context, result, agent_id=""):
25
+ correctness = result.get("correctness", 0.0)
26
+ compute_cost = result.get("compute_cost", 0.0)
27
+ public_pass = result.get("public_pass", correctness)
28
+ hidden_pass = result.get("hidden_tests_pass", correctness)
29
+ failure_tags = []
30
+ if public_pass and not hidden_pass: failure_tags.append("gaming_hidden_tests")
31
+ raw = correctness * 1.0 - compute_cost * self.compute_penalty_rate
32
+ if "gaming_hidden_tests" in failure_tags: raw -= self.gaming_penalty
33
+ cost_adj = raw - compute_cost * self.compute_penalty_rate
34
+ return OracleResult(raw, cost_adj, result.get("confidence", correctness),
35
+ {"correctness": correctness}, f"corr={correctness:.2f}, cost={compute_cost}", failure_tags, cost_adj)
36
+
37
+
38
+ # --- INLINE LEDGER ---
39
+
40
+ @dataclass
41
+ class LedgerEntry:
42
+ agent_id: str; task_id: str; action_id: str; earned_credit: float; spent_credit: float
43
+ decayed_credit: float; remaining_credit: float; reason: str; oracle_score: float
44
+ compute_cost: float; timestamp: float; capability_scope: str = "global"
45
+
46
+
47
+ class CreditLedger:
48
+ def __init__(self, decay_lambda=0.05):
49
+ self.entries = []; self.balances = {}; self.decay_lambda = decay_lambda
50
+
51
+ def earn(self, agent_id, task_id, action_id, amount, oracle_score, compute_cost, reason, capability_scope="global"):
52
+ now = time.time(); self._apply_decay(agent_id, now, capability_scope)
53
+ current = self._get(agent_id, capability_scope); new_bal = current + amount
54
+ self.entries.append(LedgerEntry(agent_id, task_id, action_id, amount, 0.0, 0.0, new_bal, reason, oracle_score, compute_cost, now, capability_scope))
55
+ self._set(agent_id, capability_scope, new_bal)
56
+
57
+ def spend(self, agent_id, task_id, action_id, amount, capability_scope="global", reason="spend"):
58
+ now = time.time(); self._apply_decay(agent_id, now, capability_scope)
59
+ current = self._get(agent_id, capability_scope)
60
+ if current < amount: return False
61
+ new_bal = current - amount
62
+ self.entries.append(LedgerEntry(agent_id, task_id, action_id, 0.0, amount, 0.0, new_bal, reason, 0.0, 0.0, now, capability_scope))
63
+ self._set(agent_id, capability_scope, new_bal)
64
+ return True
65
+
66
+ def transfer(self, from_agent, to_agent, amount, capability_scope="global"):
67
+ return False
68
+
69
+ def balance(self, agent_id, capability_scope="global"):
70
+ now = time.time(); self._apply_decay(agent_id, now, capability_scope)
71
+ return self._get(agent_id, capability_scope)
72
+
73
+ def _get(self, agent_id, cap): return self.balances.get(agent_id, {}).get(cap, 0.0)
74
+ def _set(self, agent_id, cap, val):
75
+ if agent_id not in self.balances: self.balances[agent_id] = {}
76
+ self.balances[agent_id][cap] = val
77
+ def _apply_decay(self, agent_id, now, cap):
78
+ current = self._get(agent_id, cap)
79
+ if current <= 0: return
80
+ decayed = current * (1 - self.decay_lambda)
81
+ if decayed < current:
82
+ self.entries.append(LedgerEntry(agent_id, "decay", "decay", 0.0, 0.0, current - decayed, decayed, "credit_decay", 0.0, 0.0, now, cap))
83
+ self._set(agent_id, cap, decayed)
84
+
85
+
86
+ # --- TESTS ---
87
+
88
+ def test_code_correctness():
89
+ oracle = ImpactOracle()
90
+ res = oracle.score("code", {}, {}, {"correctness": 1.0, "compute_cost": 100, "public_pass": True, "hidden_tests_pass": True})
91
+ assert res.raw_score > 0.9, f"Expected >0.9, got {res.raw_score}"
92
+ print("PASS: test_code_correctness")
93
+
94
+ def test_code_gaming():
95
+ oracle = ImpactOracle(gaming_penalty=2.0)
96
+ res = oracle.score("code", {}, {}, {"correctness": 1.0, "compute_cost": 100, "public_pass": True, "hidden_tests_pass": False})
97
+ assert "gaming_hidden_tests" in res.failure_tags
98
+ assert res.raw_score < 0, f"Expected negative, got {res.raw_score}"
99
+ print("PASS: test_code_gaming")
100
+
101
+ def test_ledger_earn():
102
+ ledger = CreditLedger(decay_lambda=0.0)
103
+ ledger.earn("a1", "t1", "a1", 10.0, 1.0, 100.0, "test")
104
+ assert abs(ledger.balance("a1") - 10.0) < 0.01
105
+ print("PASS: test_ledger_earn")
106
+
107
+ def test_ledger_spend():
108
+ ledger = CreditLedger(decay_lambda=0.0)
109
+ ledger.earn("a1", "t1", "a1", 10.0, 1.0, 100.0, "test")
110
+ assert ledger.spend("a1", "t2", "a2", 3.0)
111
+ assert abs(ledger.balance("a1") - 7.0) < 0.01
112
+ print("PASS: test_ledger_spend")
113
+
114
+ def test_ledger_insufficient():
115
+ ledger = CreditLedger(decay_lambda=0.0)
116
+ ledger.earn("a1", "t1", "a1", 2.0, 1.0, 100.0, "test")
117
+ assert not ledger.spend("a1", "t2", "a2", 5.0)
118
+ print("PASS: test_ledger_insufficient")
119
+
120
+ def test_transfer_blocked():
121
+ ledger = CreditLedger(decay_lambda=0.0)
122
+ ledger.earn("alice", "t1", "a1", 10.0, 1.0, 100.0, "test")
123
+ ok = ledger.transfer("alice", "bob", 5.0)
124
+ assert not ok
125
+ assert ledger.balance("alice") > 9.0
126
+ assert ledger.balance("bob") < 0.1
127
+ print("PASS: test_transfer_blocked")
128
+
129
+ def test_decay():
130
+ ledger = CreditLedger(decay_lambda=0.1)
131
+ ledger.earn("a1", "t1", "a1", 10.0, 1.0, 100.0, "test")
132
+ time.sleep(0.05)
133
+ bal = ledger.balance("a1")
134
+ assert bal < 10.0, f"Expected <10.0, got {bal}"
135
+ print("PASS: test_decay")
136
+
137
+ def run_all():
138
+ print("=" * 50)
139
+ print("OCC UNIT TESTS (STANDALONE)")
140
+ print("=" * 50)
141
+ test_code_correctness()
142
+ test_code_gaming()
143
+ test_ledger_earn()
144
+ test_ledger_spend()
145
+ test_ledger_insufficient()
146
+ test_transfer_blocked()
147
+ test_decay()
148
+ print("\n" + "=" * 50)
149
+ print("ALL TESTS PASSED")
150
+ print("=" * 50)
151
+
152
+ if __name__ == "__main__":
153
+ run_all()