Upload notebook_walkthrough.ipynb
Browse files- notebook_walkthrough.ipynb +220 -0
notebook_walkthrough.ipynb
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"metadata": {},
|
| 6 |
+
"source": [
|
| 7 |
+
"# OCC Stack Walkthrough\n",
|
| 8 |
+
"\n",
|
| 9 |
+
"This notebook demonstrates the Oracle-Credit-Compute (OCC) stack for agentic compute allocation."
|
| 10 |
+
]
|
| 11 |
+
},
|
| 12 |
+
{
|
| 13 |
+
"cell_type": "code",
|
| 14 |
+
"execution_count": null,
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [],
|
| 17 |
+
"source": [
|
| 18 |
+
"import sys\n",
|
| 19 |
+
"from pathlib import Path\n",
|
| 20 |
+
"sys.path.insert(0, str(Path.cwd()))\n",
|
| 21 |
+
"\n",
|
| 22 |
+
"from oracle.oracle import ImpactOracle\n",
|
| 23 |
+
"from ledger.ledger import CreditLedger\n",
|
| 24 |
+
"from broker.broker import ResourceBroker, Decision\n",
|
| 25 |
+
"from rl.reward import RewardHook, OfflineComparator"
|
| 26 |
+
]
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"cell_type": "markdown",
|
| 30 |
+
"metadata": {},
|
| 31 |
+
"source": [
|
| 32 |
+
"## 1. Impact Oracle\n",
|
| 33 |
+
"\n",
|
| 34 |
+
"The oracle scores whether an action produced measurable marginal value."
|
| 35 |
+
]
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"cell_type": "code",
|
| 39 |
+
"execution_count": null,
|
| 40 |
+
"metadata": {},
|
| 41 |
+
"outputs": [],
|
| 42 |
+
"source": [
|
| 43 |
+
"oracle = ImpactOracle(compute_budget=1e5)\n",
|
| 44 |
+
"\n",
|
| 45 |
+
"# Score a code attempt\n",
|
| 46 |
+
"result = oracle.score(\n",
|
| 47 |
+
" mode=\"code\",\n",
|
| 48 |
+
" action={\"tokens_used\": 50},\n",
|
| 49 |
+
" context={\"previous_passed\": False},\n",
|
| 50 |
+
" result={\"passed\": True, \"hidden_passed\": True, \"compute_cost\": 50},\n",
|
| 51 |
+
" agent_id=\"agent_1\"\n",
|
| 52 |
+
")\n",
|
| 53 |
+
"print(f\"Raw score: {result.raw_score}\")\n",
|
| 54 |
+
"print(f\"Cost-adjusted: {result.cost_adjusted_score}\")\n",
|
| 55 |
+
"print(f\"Reward: {result.reward_value}\")\n",
|
| 56 |
+
"print(f\"Reason: {result.reason}\")"
|
| 57 |
+
]
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
"cell_type": "markdown",
|
| 61 |
+
"metadata": {},
|
| 62 |
+
"source": [
|
| 63 |
+
"## 2. Credit Ledger\n",
|
| 64 |
+
"\n",
|
| 65 |
+
"Credits are non-transferable, decaying, and capability-scoped."
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
{
|
| 69 |
+
"cell_type": "code",
|
| 70 |
+
"execution_count": null,
|
| 71 |
+
"metadata": {},
|
| 72 |
+
"outputs": [],
|
| 73 |
+
"source": [
|
| 74 |
+
"ledger = CreditLedger(decay_lambda=0.05)\n",
|
| 75 |
+
"\n",
|
| 76 |
+
"# Agent earns credits\n",
|
| 77 |
+
"ledger.earn(\n",
|
| 78 |
+
" agent_id=\"agent_1\",\n",
|
| 79 |
+
" task_id=\"task_1\",\n",
|
| 80 |
+
" action_id=\"attempt_1\",\n",
|
| 81 |
+
" amount=10.0,\n",
|
| 82 |
+
" oracle_score=1.0,\n",
|
| 83 |
+
" compute_cost=50.0,\n",
|
| 84 |
+
" reason=\"pass_hidden_test\"\n",
|
| 85 |
+
")\n",
|
| 86 |
+
"\n",
|
| 87 |
+
"print(f\"Balance: {ledger.balance('agent_1')}\")\n",
|
| 88 |
+
"\n",
|
| 89 |
+
"# Try to transfer (blocked)\n",
|
| 90 |
+
"success = ledger.transfer(\"agent_1\", \"agent_2\", 5.0)\n",
|
| 91 |
+
"print(f\"Transfer succeeded: {success}\")\n",
|
| 92 |
+
"\n",
|
| 93 |
+
"# Spend credits\n",
|
| 94 |
+
"ok, entry = ledger.spend(\"agent_1\", \"task_1\", \"action_2\", 3.0, reason=\"retrieval_call\")\n",
|
| 95 |
+
"print(f\"Spend succeeded: {ok}, remaining: {ledger.balance('agent_1')}\")"
|
| 96 |
+
]
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"cell_type": "markdown",
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"source": [
|
| 102 |
+
"## 3. Resource Broker\n",
|
| 103 |
+
"\n",
|
| 104 |
+
"The broker grants capability-based rights based on credit balance and risk."
|
| 105 |
+
]
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"cell_type": "code",
|
| 109 |
+
"execution_count": null,
|
| 110 |
+
"metadata": {},
|
| 111 |
+
"outputs": [],
|
| 112 |
+
"source": [
|
| 113 |
+
"broker = ResourceBroker()\n",
|
| 114 |
+
"\n",
|
| 115 |
+
"# Low credit -> deny\n",
|
| 116 |
+
"dec = broker.request(\"model_call_large\", \"agent_1\", 1.0)\n",
|
| 117 |
+
"print(f\"Low credit: {dec.decision.value} - {dec.reason}\")\n",
|
| 118 |
+
"\n",
|
| 119 |
+
"# High credit -> allow (with approval for high-risk)\n",
|
| 120 |
+
"dec = broker.request(\"model_call_large\", \"agent_1\", 50.0)\n",
|
| 121 |
+
"print(f\"High credit: {dec.decision.value} - {dec.reason}\")\n",
|
| 122 |
+
"\n",
|
| 123 |
+
"# Gaming detected -> escalate\n",
|
| 124 |
+
"dec = broker.request(\"file_write\", \"agent_1\", 100.0, agent_flags={\"gaming_score\": 0.6})\n",
|
| 125 |
+
"print(f\"Gaming: {dec.decision.value} - {dec.reason}\")"
|
| 126 |
+
]
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"cell_type": "markdown",
|
| 130 |
+
"metadata": {},
|
| 131 |
+
"source": [
|
| 132 |
+
"## 4. GRPO Reward Hook\n",
|
| 133 |
+
"\n",
|
| 134 |
+
"Connects the oracle to RL reward computation."
|
| 135 |
+
]
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"cell_type": "code",
|
| 139 |
+
"execution_count": null,
|
| 140 |
+
"metadata": {},
|
| 141 |
+
"outputs": [],
|
| 142 |
+
"source": [
|
| 143 |
+
"hook = RewardHook(oracle, ledger, broker, mode=\"code\", agent_id=\"rl_agent\")\n",
|
| 144 |
+
"\n",
|
| 145 |
+
"prompts = [\"def add(a, b):\\n return\"] * 3\n",
|
| 146 |
+
"completions = [\"a + b\", \"a * b\", \"a + b + 0\"]\n",
|
| 147 |
+
"oracle_inputs = [\n",
|
| 148 |
+
" {\"action\": {}, \"context\": {}, \"result\": {\"passed\": True, \"hidden_passed\": True, \"compute_cost\": 5}, \"task_id\": \"t1\", \"action_id\": \"a1\"},\n",
|
| 149 |
+
" {\"action\": {}, \"context\": {}, \"result\": {\"passed\": False, \"hidden_passed\": False, \"compute_cost\": 5}, \"task_id\": \"t1\", \"action_id\": \"a2\"},\n",
|
| 150 |
+
" {\"action\": {}, \"context\": {}, \"result\": {\"passed\": True, \"hidden_passed\": True, \"compute_cost\": 5}, \"task_id\": \"t1\", \"action_id\": \"a3\"},\n",
|
| 151 |
+
"]\n",
|
| 152 |
+
"\n",
|
| 153 |
+
"rewards = hook.compute_rewards(prompts, completions, oracle_inputs)\n",
|
| 154 |
+
"print(\"Rewards:\", rewards)"
|
| 155 |
+
]
|
| 156 |
+
},
|
| 157 |
+
{
|
| 158 |
+
"cell_type": "markdown",
|
| 159 |
+
"metadata": {},
|
| 160 |
+
"source": [
|
| 161 |
+
"## 5. Code Benchmark\n",
|
| 162 |
+
"\n",
|
| 163 |
+
"Run the compute allocation benchmark."
|
| 164 |
+
]
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"cell_type": "code",
|
| 168 |
+
"execution_count": null,
|
| 169 |
+
"metadata": {},
|
| 170 |
+
"outputs": [],
|
| 171 |
+
"source": [
|
| 172 |
+
"from benchmarks.benchmark_code import CodeBenchmark\n",
|
| 173 |
+
"\n",
|
| 174 |
+
"bench = CodeBenchmark(max_problems=50, seed=42)\n",
|
| 175 |
+
"bench.load_data()\n",
|
| 176 |
+
"results = bench.run_all()\n",
|
| 177 |
+
"\n",
|
| 178 |
+
"for label, res in results.items():\n",
|
| 179 |
+
" print(f\"{label:20s}: pass@1={res['pass@1']:.3f}, compute/problem={res['compute_per_problem']:.0f}\")"
|
| 180 |
+
]
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"cell_type": "markdown",
|
| 184 |
+
"metadata": {},
|
| 185 |
+
"source": [
|
| 186 |
+
"## 6. Ablation Study\n",
|
| 187 |
+
"\n",
|
| 188 |
+
"Compare OCC with ablated configurations."
|
| 189 |
+
]
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"cell_type": "code",
|
| 193 |
+
"execution_count": null,
|
| 194 |
+
"metadata": {},
|
| 195 |
+
"outputs": [],
|
| 196 |
+
"source": [
|
| 197 |
+
"from eval_runner import AblationRunner\n",
|
| 198 |
+
"\n",
|
| 199 |
+
"runner = AblationRunner(seed=42)\n",
|
| 200 |
+
"code_ablations = runner.ablation_code()\n",
|
| 201 |
+
"\n",
|
| 202 |
+
"for k, v in code_ablations.items():\n",
|
| 203 |
+
" print(f\"{k:20s}: pass@1={v.get('pass@1', 'N/A'):.3f}, compute={v.get('total_compute', 'N/A'):.0f}\")"
|
| 204 |
+
]
|
| 205 |
+
}
|
| 206 |
+
],
|
| 207 |
+
"metadata": {
|
| 208 |
+
"kernelspec": {
|
| 209 |
+
"display_name": "Python 3",
|
| 210 |
+
"language": "python",
|
| 211 |
+
"name": "python3"
|
| 212 |
+
},
|
| 213 |
+
"language_info": {
|
| 214 |
+
"name": "python",
|
| 215 |
+
"version": "3.10.0"
|
| 216 |
+
}
|
| 217 |
+
},
|
| 218 |
+
"nbformat": 4,
|
| 219 |
+
"nbformat_minor": 4
|
| 220 |
+
}
|