Upload jobs/run_unit_tests_standalone.py
Browse files
jobs/run_unit_tests_standalone.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Self-contained unit tests for OCC core components.
|
| 3 |
+
All classes inlined — no repo import dependencies.
|
| 4 |
+
"""
|
| 5 |
+
import time
|
| 6 |
+
from dataclasses import dataclass, field
|
| 7 |
+
from typing import Any, Dict, List
|
| 8 |
+
|
| 9 |
+
# --- INLINE ORACLE ---
|
| 10 |
+
|
| 11 |
+
@dataclass
|
| 12 |
+
class OracleResult:
|
| 13 |
+
raw_score: float; cost_adjusted_score: float; confidence: float
|
| 14 |
+
evidence: Dict[str, Any]; reason: str
|
| 15 |
+
failure_tags: List[str] = field(default_factory=list)
|
| 16 |
+
reward_value: float = 0.0
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
class ImpactOracle:
|
| 20 |
+
def __init__(self, compute_penalty_rate=0.0001, gaming_penalty=2.0):
|
| 21 |
+
self.compute_penalty_rate = compute_penalty_rate
|
| 22 |
+
self.gaming_penalty = gaming_penalty
|
| 23 |
+
|
| 24 |
+
def score(self, mode, action, context, result, agent_id=""):
|
| 25 |
+
correctness = result.get("correctness", 0.0)
|
| 26 |
+
compute_cost = result.get("compute_cost", 0.0)
|
| 27 |
+
public_pass = result.get("public_pass", correctness)
|
| 28 |
+
hidden_pass = result.get("hidden_tests_pass", correctness)
|
| 29 |
+
failure_tags = []
|
| 30 |
+
if public_pass and not hidden_pass: failure_tags.append("gaming_hidden_tests")
|
| 31 |
+
raw = correctness * 1.0 - compute_cost * self.compute_penalty_rate
|
| 32 |
+
if "gaming_hidden_tests" in failure_tags: raw -= self.gaming_penalty
|
| 33 |
+
cost_adj = raw - compute_cost * self.compute_penalty_rate
|
| 34 |
+
return OracleResult(raw, cost_adj, result.get("confidence", correctness),
|
| 35 |
+
{"correctness": correctness}, f"corr={correctness:.2f}, cost={compute_cost}", failure_tags, cost_adj)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# --- INLINE LEDGER ---
|
| 39 |
+
|
| 40 |
+
@dataclass
|
| 41 |
+
class LedgerEntry:
|
| 42 |
+
agent_id: str; task_id: str; action_id: str; earned_credit: float; spent_credit: float
|
| 43 |
+
decayed_credit: float; remaining_credit: float; reason: str; oracle_score: float
|
| 44 |
+
compute_cost: float; timestamp: float; capability_scope: str = "global"
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
class CreditLedger:
|
| 48 |
+
def __init__(self, decay_lambda=0.05):
|
| 49 |
+
self.entries = []; self.balances = {}; self.decay_lambda = decay_lambda
|
| 50 |
+
|
| 51 |
+
def earn(self, agent_id, task_id, action_id, amount, oracle_score, compute_cost, reason, capability_scope="global"):
|
| 52 |
+
now = time.time(); self._apply_decay(agent_id, now, capability_scope)
|
| 53 |
+
current = self._get(agent_id, capability_scope); new_bal = current + amount
|
| 54 |
+
self.entries.append(LedgerEntry(agent_id, task_id, action_id, amount, 0.0, 0.0, new_bal, reason, oracle_score, compute_cost, now, capability_scope))
|
| 55 |
+
self._set(agent_id, capability_scope, new_bal)
|
| 56 |
+
|
| 57 |
+
def spend(self, agent_id, task_id, action_id, amount, capability_scope="global", reason="spend"):
|
| 58 |
+
now = time.time(); self._apply_decay(agent_id, now, capability_scope)
|
| 59 |
+
current = self._get(agent_id, capability_scope)
|
| 60 |
+
if current < amount: return False
|
| 61 |
+
new_bal = current - amount
|
| 62 |
+
self.entries.append(LedgerEntry(agent_id, task_id, action_id, 0.0, amount, 0.0, new_bal, reason, 0.0, 0.0, now, capability_scope))
|
| 63 |
+
self._set(agent_id, capability_scope, new_bal)
|
| 64 |
+
return True
|
| 65 |
+
|
| 66 |
+
def transfer(self, from_agent, to_agent, amount, capability_scope="global"):
|
| 67 |
+
return False
|
| 68 |
+
|
| 69 |
+
def balance(self, agent_id, capability_scope="global"):
|
| 70 |
+
now = time.time(); self._apply_decay(agent_id, now, capability_scope)
|
| 71 |
+
return self._get(agent_id, capability_scope)
|
| 72 |
+
|
| 73 |
+
def _get(self, agent_id, cap): return self.balances.get(agent_id, {}).get(cap, 0.0)
|
| 74 |
+
def _set(self, agent_id, cap, val):
|
| 75 |
+
if agent_id not in self.balances: self.balances[agent_id] = {}
|
| 76 |
+
self.balances[agent_id][cap] = val
|
| 77 |
+
def _apply_decay(self, agent_id, now, cap):
|
| 78 |
+
current = self._get(agent_id, cap)
|
| 79 |
+
if current <= 0: return
|
| 80 |
+
decayed = current * (1 - self.decay_lambda)
|
| 81 |
+
if decayed < current:
|
| 82 |
+
self.entries.append(LedgerEntry(agent_id, "decay", "decay", 0.0, 0.0, current - decayed, decayed, "credit_decay", 0.0, 0.0, now, cap))
|
| 83 |
+
self._set(agent_id, cap, decayed)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
# --- TESTS ---
|
| 87 |
+
|
| 88 |
+
def test_code_correctness():
|
| 89 |
+
oracle = ImpactOracle()
|
| 90 |
+
res = oracle.score("code", {}, {}, {"correctness": 1.0, "compute_cost": 100, "public_pass": True, "hidden_tests_pass": True})
|
| 91 |
+
assert res.raw_score > 0.9, f"Expected >0.9, got {res.raw_score}"
|
| 92 |
+
print("PASS: test_code_correctness")
|
| 93 |
+
|
| 94 |
+
def test_code_gaming():
|
| 95 |
+
oracle = ImpactOracle(gaming_penalty=2.0)
|
| 96 |
+
res = oracle.score("code", {}, {}, {"correctness": 1.0, "compute_cost": 100, "public_pass": True, "hidden_tests_pass": False})
|
| 97 |
+
assert "gaming_hidden_tests" in res.failure_tags
|
| 98 |
+
assert res.raw_score < 0, f"Expected negative, got {res.raw_score}"
|
| 99 |
+
print("PASS: test_code_gaming")
|
| 100 |
+
|
| 101 |
+
def test_ledger_earn():
|
| 102 |
+
ledger = CreditLedger(decay_lambda=0.0)
|
| 103 |
+
ledger.earn("a1", "t1", "a1", 10.0, 1.0, 100.0, "test")
|
| 104 |
+
assert abs(ledger.balance("a1") - 10.0) < 0.01
|
| 105 |
+
print("PASS: test_ledger_earn")
|
| 106 |
+
|
| 107 |
+
def test_ledger_spend():
|
| 108 |
+
ledger = CreditLedger(decay_lambda=0.0)
|
| 109 |
+
ledger.earn("a1", "t1", "a1", 10.0, 1.0, 100.0, "test")
|
| 110 |
+
assert ledger.spend("a1", "t2", "a2", 3.0)
|
| 111 |
+
assert abs(ledger.balance("a1") - 7.0) < 0.01
|
| 112 |
+
print("PASS: test_ledger_spend")
|
| 113 |
+
|
| 114 |
+
def test_ledger_insufficient():
|
| 115 |
+
ledger = CreditLedger(decay_lambda=0.0)
|
| 116 |
+
ledger.earn("a1", "t1", "a1", 2.0, 1.0, 100.0, "test")
|
| 117 |
+
assert not ledger.spend("a1", "t2", "a2", 5.0)
|
| 118 |
+
print("PASS: test_ledger_insufficient")
|
| 119 |
+
|
| 120 |
+
def test_transfer_blocked():
|
| 121 |
+
ledger = CreditLedger(decay_lambda=0.0)
|
| 122 |
+
ledger.earn("alice", "t1", "a1", 10.0, 1.0, 100.0, "test")
|
| 123 |
+
ok = ledger.transfer("alice", "bob", 5.0)
|
| 124 |
+
assert not ok
|
| 125 |
+
assert ledger.balance("alice") > 9.0
|
| 126 |
+
assert ledger.balance("bob") < 0.1
|
| 127 |
+
print("PASS: test_transfer_blocked")
|
| 128 |
+
|
| 129 |
+
def test_decay():
|
| 130 |
+
ledger = CreditLedger(decay_lambda=0.1)
|
| 131 |
+
ledger.earn("a1", "t1", "a1", 10.0, 1.0, 100.0, "test")
|
| 132 |
+
time.sleep(0.05)
|
| 133 |
+
bal = ledger.balance("a1")
|
| 134 |
+
assert bal < 10.0, f"Expected <10.0, got {bal}"
|
| 135 |
+
print("PASS: test_decay")
|
| 136 |
+
|
| 137 |
+
def run_all():
|
| 138 |
+
print("=" * 50)
|
| 139 |
+
print("OCC UNIT TESTS (STANDALONE)")
|
| 140 |
+
print("=" * 50)
|
| 141 |
+
test_code_correctness()
|
| 142 |
+
test_code_gaming()
|
| 143 |
+
test_ledger_earn()
|
| 144 |
+
test_ledger_spend()
|
| 145 |
+
test_ledger_insufficient()
|
| 146 |
+
test_transfer_blocked()
|
| 147 |
+
test_decay()
|
| 148 |
+
print("\n" + "=" * 50)
|
| 149 |
+
print("ALL TESTS PASSED")
|
| 150 |
+
print("=" * 50)
|
| 151 |
+
|
| 152 |
+
if __name__ == "__main__":
|
| 153 |
+
run_all()
|