narcolepticchicken
/

occ-stack

ml-intern

Model card Files Files and versions

xet

Community

narcolepticchicken commited on 26 days ago

Commit

28c4c59

verified ·

1 Parent(s): 098ae52

Upload jobs/run_unit_tests_standalone.py

Browse files

Files changed (1) hide show

jobs/run_unit_tests_standalone.py +153 -0

jobs/run_unit_tests_standalone.py ADDED Viewed

	@@ -0,0 +1,153 @@

+"""
+Self-contained unit tests for OCC core components.
+All classes inlined — no repo import dependencies.
+"""
+import time
+from dataclasses import dataclass, field
+from typing import Any, Dict, List
+# --- INLINE ORACLE ---
+@dataclass
+class OracleResult:
+    raw_score: float; cost_adjusted_score: float; confidence: float
+    evidence: Dict[str, Any]; reason: str
+    failure_tags: List[str] = field(default_factory=list)
+    reward_value: float = 0.0
+class ImpactOracle:
+    def __init__(self, compute_penalty_rate=0.0001, gaming_penalty=2.0):
+        self.compute_penalty_rate = compute_penalty_rate
+        self.gaming_penalty = gaming_penalty
+    def score(self, mode, action, context, result, agent_id=""):
+        correctness = result.get("correctness", 0.0)
+        compute_cost = result.get("compute_cost", 0.0)
+        public_pass = result.get("public_pass", correctness)
+        hidden_pass = result.get("hidden_tests_pass", correctness)
+        failure_tags = []
+        if public_pass and not hidden_pass: failure_tags.append("gaming_hidden_tests")
+        raw = correctness * 1.0 - compute_cost * self.compute_penalty_rate
+        if "gaming_hidden_tests" in failure_tags: raw -= self.gaming_penalty
+        cost_adj = raw - compute_cost * self.compute_penalty_rate
+        return OracleResult(raw, cost_adj, result.get("confidence", correctness),
+                            {"correctness": correctness}, f"corr={correctness:.2f}, cost={compute_cost}", failure_tags, cost_adj)
+# --- INLINE LEDGER ---
+@dataclass
+class LedgerEntry:
+    agent_id: str; task_id: str; action_id: str; earned_credit: float; spent_credit: float
+    decayed_credit: float; remaining_credit: float; reason: str; oracle_score: float
+    compute_cost: float; timestamp: float; capability_scope: str = "global"
+class CreditLedger:
+    def __init__(self, decay_lambda=0.05):
+        self.entries = []; self.balances = {}; self.decay_lambda = decay_lambda
+    def earn(self, agent_id, task_id, action_id, amount, oracle_score, compute_cost, reason, capability_scope="global"):
+        now = time.time(); self._apply_decay(agent_id, now, capability_scope)
+        current = self._get(agent_id, capability_scope); new_bal = current + amount
+        self.entries.append(LedgerEntry(agent_id, task_id, action_id, amount, 0.0, 0.0, new_bal, reason, oracle_score, compute_cost, now, capability_scope))
+        self._set(agent_id, capability_scope, new_bal)
+    def spend(self, agent_id, task_id, action_id, amount, capability_scope="global", reason="spend"):
+        now = time.time(); self._apply_decay(agent_id, now, capability_scope)
+        current = self._get(agent_id, capability_scope)
+        if current < amount: return False
+        new_bal = current - amount
+        self.entries.append(LedgerEntry(agent_id, task_id, action_id, 0.0, amount, 0.0, new_bal, reason, 0.0, 0.0, now, capability_scope))
+        self._set(agent_id, capability_scope, new_bal)
+        return True
+    def transfer(self, from_agent, to_agent, amount, capability_scope="global"):
+        return False
+    def balance(self, agent_id, capability_scope="global"):
+        now = time.time(); self._apply_decay(agent_id, now, capability_scope)
+        return self._get(agent_id, capability_scope)
+    def _get(self, agent_id, cap): return self.balances.get(agent_id, {}).get(cap, 0.0)
+    def _set(self, agent_id, cap, val):
+        if agent_id not in self.balances: self.balances[agent_id] = {}
+        self.balances[agent_id][cap] = val
+    def _apply_decay(self, agent_id, now, cap):
+        current = self._get(agent_id, cap)
+        if current <= 0: return
+        decayed = current * (1 - self.decay_lambda)
+        if decayed < current:
+            self.entries.append(LedgerEntry(agent_id, "decay", "decay", 0.0, 0.0, current - decayed, decayed, "credit_decay", 0.0, 0.0, now, cap))
+            self._set(agent_id, cap, decayed)
+# --- TESTS ---
+def test_code_correctness():
+    oracle = ImpactOracle()
+    res = oracle.score("code", {}, {}, {"correctness": 1.0, "compute_cost": 100, "public_pass": True, "hidden_tests_pass": True})
+    assert res.raw_score > 0.9, f"Expected >0.9, got {res.raw_score}"
+    print("PASS: test_code_correctness")
+def test_code_gaming():
+    oracle = ImpactOracle(gaming_penalty=2.0)
+    res = oracle.score("code", {}, {}, {"correctness": 1.0, "compute_cost": 100, "public_pass": True, "hidden_tests_pass": False})
+    assert "gaming_hidden_tests" in res.failure_tags
+    assert res.raw_score < 0, f"Expected negative, got {res.raw_score}"
+    print("PASS: test_code_gaming")
+def test_ledger_earn():
+    ledger = CreditLedger(decay_lambda=0.0)
+    ledger.earn("a1", "t1", "a1", 10.0, 1.0, 100.0, "test")
+    assert abs(ledger.balance("a1") - 10.0) < 0.01
+    print("PASS: test_ledger_earn")
+def test_ledger_spend():
+    ledger = CreditLedger(decay_lambda=0.0)
+    ledger.earn("a1", "t1", "a1", 10.0, 1.0, 100.0, "test")
+    assert ledger.spend("a1", "t2", "a2", 3.0)
+    assert abs(ledger.balance("a1") - 7.0) < 0.01
+    print("PASS: test_ledger_spend")
+def test_ledger_insufficient():
+    ledger = CreditLedger(decay_lambda=0.0)
+    ledger.earn("a1", "t1", "a1", 2.0, 1.0, 100.0, "test")
+    assert not ledger.spend("a1", "t2", "a2", 5.0)
+    print("PASS: test_ledger_insufficient")
+def test_transfer_blocked():
+    ledger = CreditLedger(decay_lambda=0.0)
+    ledger.earn("alice", "t1", "a1", 10.0, 1.0, 100.0, "test")
+    ok = ledger.transfer("alice", "bob", 5.0)
+    assert not ok
+    assert ledger.balance("alice") > 9.0
+    assert ledger.balance("bob") < 0.1
+    print("PASS: test_transfer_blocked")
+def test_decay():
+    ledger = CreditLedger(decay_lambda=0.1)
+    ledger.earn("a1", "t1", "a1", 10.0, 1.0, 100.0, "test")
+    time.sleep(0.05)
+    bal = ledger.balance("a1")
+    assert bal < 10.0, f"Expected <10.0, got {bal}"
+    print("PASS: test_decay")
+def run_all():
+    print("=" * 50)
+    print("OCC UNIT TESTS (STANDALONE)")
+    print("=" * 50)
+    test_code_correctness()
+    test_code_gaming()
+    test_ledger_earn()
+    test_ledger_spend()
+    test_ledger_insufficient()
+    test_transfer_blocked()
+    test_decay()
+    print("\n" + "=" * 50)
+    print("ALL TESTS PASSED")
+    print("=" * 50)
+if __name__ == "__main__":
+    run_all()