{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# OCC Stack Walkthrough\n", "\n", "This notebook demonstrates the Oracle-Credit-Compute (OCC) stack for agentic compute allocation.\n", "\n", "**Repository:** https://huggingface.co/narcolepticchicken/occ-stack" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "from pathlib import Path\n", "sys.path.insert(0, str(Path.cwd()))\n", "\n", "from oracle.oracle import ImpactOracle\n", "from ledger.ledger import CreditLedger\n", "from broker.broker import ResourceBroker, Decision\n", "from rl.reward import RewardHook, OfflinePolicyComparator" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Impact Oracle\n", "\n", "The oracle scores whether an action produced measurable marginal value.\n", "Modes: `code`, `retrieval_qa`, `debate`" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "oracle = ImpactOracle(compute_budget=1e5)\n", "\n", "# Score a code attempt that passes hidden tests\n", "result = oracle.score(\n", " mode=\"code\",\n", " action={\"attempt\": 1},\n", " context={\"difficulty\": 0.5},\n", " result={\"correctness\": 1.0, \"pass_at_k\": 1.0, \"regression\": False, \"compute_cost\": 50.0, \"public_pass\": True, \"hidden_tests_pass\": True},\n", " agent_id=\"agent_1\"\n", ")\n", "print(f\"Raw score: {result.raw_score:.3f}\")\n", "print(f\"Cost-adjusted: {result.cost_adjusted_score:.3f}\")\n", "print(f\"Reward: {result.reward_value:.3f}\")\n", "print(f\"Reason: {result.reason}\")\n", "print(f\"Failure tags: {result.failure_tags}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Credit Ledger\n", "\n", "Credits are **non-transferable**, **decaying**, and **capability-scoped**." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ledger = CreditLedger(decay_lambda=0.05)\n", "\n", "# Agent earns credits for a successful action\n", "ledger.earn(\n", " agent_id=\"agent_1\",\n", " task_id=\"task_1\",\n", " action_id=\"attempt_1\",\n", " amount=10.0,\n", " oracle_score=1.0,\n", " compute_cost=50.0,\n", " reason=\"pass_hidden_test\",\n", " capability_scope=\"model_call\"\n", ")\n", "print(f\"Balance after earn: {ledger.balance('agent_1', 'model_call', 'global'):.2f}\")\n", "\n", "# Try to transfer (blocked by design)\n", "success = ledger.transfer(\"agent_1\", \"agent_2\", 5.0)\n", "print(f\"Transfer succeeded: {success}\")\n", "\n", "# Spend credits\n", "ok = ledger.spend(\"agent_1\", \"task_1\", \"retrieval_call\", 3.0, capability_scope=\"model_call\", reason=\"retrieval\")\n", "print(f\"Spend succeeded: {ok}, remaining: {ledger.balance('agent_1', 'model_call', 'global'):.2f}\")\n", "\n", "# Provenance\n", "entries = ledger.provenance(\"agent_1\")\n", "for e in entries:\n", " print(f\" {e.reason}: earn={e.earned_credit:.1f}, spend={e.spent_credit:.1f}, balance={e.remaining_credit:.1f}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Resource Broker\n", "\n", "The broker grants capability-based rights based on credit balance and risk." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "broker = ResourceBroker()\n", "\n", "# Low credit -> deny\n", "dec = broker.request(\"model_call\", \"agent_1\", 1.0)\n", "print(f\"Low credit: {dec.decision.value} - {dec.reason}\")\n", "\n", "# High credit -> allow\n", "dec = broker.request(\"model_call\", \"agent_1\", 50.0)\n", "print(f\"High credit: {dec.decision.value} - {dec.reason}\")\n", "\n", "# High-risk with gaming flags -> deny\n", "dec = broker.request(\"file_write\", \"agent_1\", 100.0, gaming_flags=[\"confidence_manipulation\"])\n", "print(f\"Gaming flagged: {dec.decision.value} - {dec.reason}\")\n", "\n", "# List allowed capabilities\n", "allowed = broker.get_allowed_capabilities(\"agent_1\", 50.0)\n", "print(f\"Allowed capabilities: {allowed}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. GRPO Reward Hook\n", "\n", "Connects the oracle to RL reward computation.\n", "\n", "Usage with TRL:\n", "```python\n", "from grpo_hook import make_occ_reward_func\n", "from trl import GRPOTrainer\n", "\n", "reward_fn = make_occ_reward_func(mode='code', compute_budget=1e5)\n", "trainer = GRPOTrainer(\n", " model='Qwen/Qwen2.5-0.5B-Instruct',\n", " reward_funcs=reward_fn,\n", " train_dataset=ds,\n", ")\n", "```" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "hook = RewardHook(oracle=oracle, mode=\"code\")\n", "\n", "prompts = [\"def add(a, b):\\n return\"] * 3\n", "completions = [\"a + b\", \"a * b\", \"a + b + 0\"]\n", "answers = [\"a + b\", \"a * b\", \"a + b + 0\"]\n", "gold_answers = [\"a + b\"] * 3\n", "confidences = [0.9, 0.9, 0.6]\n", "compute_costs = [5.0, 5.0, 8.0]\n", "\n", "rewards = hook.compute_rewards(\n", " prompts=prompts,\n", " completions=completions,\n", " answers=answers,\n", " gold_answers=gold_answers,\n", " confidences=confidences,\n", " compute_costs=compute_costs,\n", ")\n", "print(\"Rewards:\", rewards)\n", "\n", "# Offline comparison of two policies\n", "comparator = OfflinePolicyComparator(reward_hook=hook)\n", "policy_a = [{\"reward\": 0.5 + i*0.02, \"failure_tags\": []} for i in range(10)]\n", "policy_b = [{\"reward\": 0.4 + i*0.01, \"failure_tags\": []} for i in range(10)]\n", "result = comparator.compare(policy_a, policy_b)\n", "print(result)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. Code Benchmark (Simulated)\n", "\n", "Run the compute allocation benchmark with tiered agents." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from benchmarks.benchmark_code import CodeBenchmark\n", "\n", "bench = CodeBenchmark(n_problems=50, seed=42)\n", "results = bench.run_all()\n", "\n", "print(f\"{'Strategy':<25} {'pass@1':>8} {'Hidden':>8} {'Compute':>10} {'Savings':>8}\")\n", "for label, res in results.items():\n", " p1 = res.get('pass_at_1', 0.0)\n", " hid = res.get('hidden_pass', 0.0)\n", " comp = res.get('total_compute', 0.0)\n", " sav = res.get('compute_savings', None)\n", " sav_str = f\"{sav:.1%}\" if sav is not None else \"-\"\n", " print(f\"{label:<25} {p1:>8.3f} {hid:>8.3f} {comp:>10.0f} {sav_str:>8}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6. Debate Benchmark v2 (Adversarial Agents)\n", "\n", "Run multi-agent debate with variable-cost agents and adversarial participants." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from benchmarks.benchmark_debate_v2 import DebateBenchmarkV2\n", "\n", "bench = DebateBenchmarkV2(n_topics=50, n_agents=5, adversarial_fraction=0.4, seed=42)\n", "bench.generate_topics()\n", "results = bench.run_all()\n", "\n", "print(f\"{'Strategy':<25} {'Acc':>6} {'Comp/T':>8} {'Turns':>6} {'AdvT':>6} {'Contain':>8}\")\n", "for key in ['A_equal_turns', 'B_majority_vote', 'C_confidence_weighted', 'E_occ', 'F_occ_no_decay']:\n", " r = results[key]\n", " print(f\"{r['label']:<25} {r['accuracy']:>6.3f} {r['mean_compute_per_topic']:>8.0f} {r['mean_turns']:>6.1f} {r['mean_adv_turns']:>6.1f} {r['bad_agent_containment']:>8.2f}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 7. Anti-Gaming Tests\n", "\n", "Test the credit system against adversarial attacks." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from eval_runner import AblationRunner\n", "\n", "runner = AblationRunner(seed=42)\n", "anti = runner.anti_gaming_tests()\n", "\n", "for k, v in anti.items():\n", " if 'accuracy' in v:\n", " print(f\"{k:25s}: acc={v['accuracy']:.3f}, compute={v.get('total_compute', 'N/A')}\")\n", " elif 'pass_at_1' in v or 'pass@1' in v:\n", " p1 = v.get('pass_at_1', v.get('pass@1', 'N/A'))\n", " print(f\"{k:25s}: pass@1={p1:.3f}, compute={v.get('total_compute', 'N/A')}\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.10.0" } }, "nbformat": 4, "nbformat_minor": 4 }