narcolepticchicken
/

occ-stack

ml-intern

Model card Files Files and versions

xet

Community

narcolepticchicken commited on 27 days ago

Commit

944b77c

verified ·

1 Parent(s): b2c7131

Upload notebook_walkthrough.ipynb

Browse files

Files changed (1) hide show

notebook_walkthrough.ipynb +220 -0

notebook_walkthrough.ipynb ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# OCC Stack Walkthrough\n",
+    "\n",
+    "This notebook demonstrates the Oracle-Credit-Compute (OCC) stack for agentic compute allocation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "from pathlib import Path\n",
+    "sys.path.insert(0, str(Path.cwd()))\n",
+    "\n",
+    "from oracle.oracle import ImpactOracle\n",
+    "from ledger.ledger import CreditLedger\n",
+    "from broker.broker import ResourceBroker, Decision\n",
+    "from rl.reward import RewardHook, OfflineComparator"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Impact Oracle\n",
+    "\n",
+    "The oracle scores whether an action produced measurable marginal value."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "oracle = ImpactOracle(compute_budget=1e5)\n",
+    "\n",
+    "# Score a code attempt\n",
+    "result = oracle.score(\n",
+    "    mode=\"code\",\n",
+    "    action={\"tokens_used\": 50},\n",
+    "    context={\"previous_passed\": False},\n",
+    "    result={\"passed\": True, \"hidden_passed\": True, \"compute_cost\": 50},\n",
+    "    agent_id=\"agent_1\"\n",
+    ")\n",
+    "print(f\"Raw score: {result.raw_score}\")\n",
+    "print(f\"Cost-adjusted: {result.cost_adjusted_score}\")\n",
+    "print(f\"Reward: {result.reward_value}\")\n",
+    "print(f\"Reason: {result.reason}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Credit Ledger\n",
+    "\n",
+    "Credits are non-transferable, decaying, and capability-scoped."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ledger = CreditLedger(decay_lambda=0.05)\n",
+    "\n",
+    "# Agent earns credits\n",
+    "ledger.earn(\n",
+    "    agent_id=\"agent_1\",\n",
+    "    task_id=\"task_1\",\n",
+    "    action_id=\"attempt_1\",\n",
+    "    amount=10.0,\n",
+    "    oracle_score=1.0,\n",
+    "    compute_cost=50.0,\n",
+    "    reason=\"pass_hidden_test\"\n",
+    ")\n",
+    "\n",
+    "print(f\"Balance: {ledger.balance('agent_1')}\")\n",
+    "\n",
+    "# Try to transfer (blocked)\n",
+    "success = ledger.transfer(\"agent_1\", \"agent_2\", 5.0)\n",
+    "print(f\"Transfer succeeded: {success}\")\n",
+    "\n",
+    "# Spend credits\n",
+    "ok, entry = ledger.spend(\"agent_1\", \"task_1\", \"action_2\", 3.0, reason=\"retrieval_call\")\n",
+    "print(f\"Spend succeeded: {ok}, remaining: {ledger.balance('agent_1')}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Resource Broker\n",
+    "\n",
+    "The broker grants capability-based rights based on credit balance and risk."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "broker = ResourceBroker()\n",
+    "\n",
+    "# Low credit -> deny\n",
+    "dec = broker.request(\"model_call_large\", \"agent_1\", 1.0)\n",
+    "print(f\"Low credit: {dec.decision.value} - {dec.reason}\")\n",
+    "\n",
+    "# High credit -> allow (with approval for high-risk)\n",
+    "dec = broker.request(\"model_call_large\", \"agent_1\", 50.0)\n",
+    "print(f\"High credit: {dec.decision.value} - {dec.reason}\")\n",
+    "\n",
+    "# Gaming detected -> escalate\n",
+    "dec = broker.request(\"file_write\", \"agent_1\", 100.0, agent_flags={\"gaming_score\": 0.6})\n",
+    "print(f\"Gaming: {dec.decision.value} - {dec.reason}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. GRPO Reward Hook\n",
+    "\n",
+    "Connects the oracle to RL reward computation."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "hook = RewardHook(oracle, ledger, broker, mode=\"code\", agent_id=\"rl_agent\")\n",
+    "\n",
+    "prompts = [\"def add(a, b):\\n    return\"] * 3\n",
+    "completions = [\"a + b\", \"a * b\", \"a + b + 0\"]\n",
+    "oracle_inputs = [\n",
+    "    {\"action\": {}, \"context\": {}, \"result\": {\"passed\": True, \"hidden_passed\": True, \"compute_cost\": 5}, \"task_id\": \"t1\", \"action_id\": \"a1\"},\n",
+    "    {\"action\": {}, \"context\": {}, \"result\": {\"passed\": False, \"hidden_passed\": False, \"compute_cost\": 5}, \"task_id\": \"t1\", \"action_id\": \"a2\"},\n",
+    "    {\"action\": {}, \"context\": {}, \"result\": {\"passed\": True, \"hidden_passed\": True, \"compute_cost\": 5}, \"task_id\": \"t1\", \"action_id\": \"a3\"},\n",
+    "]\n",
+    "\n",
+    "rewards = hook.compute_rewards(prompts, completions, oracle_inputs)\n",
+    "print(\"Rewards:\", rewards)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Code Benchmark\n",
+    "\n",
+    "Run the compute allocation benchmark."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from benchmarks.benchmark_code import CodeBenchmark\n",
+    "\n",
+    "bench = CodeBenchmark(max_problems=50, seed=42)\n",
+    "bench.load_data()\n",
+    "results = bench.run_all()\n",
+    "\n",
+    "for label, res in results.items():\n",
+    "    print(f\"{label:20s}: pass@1={res['pass@1']:.3f}, compute/problem={res['compute_per_problem']:.0f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. Ablation Study\n",
+    "\n",
+    "Compare OCC with ablated configurations."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from eval_runner import AblationRunner\n",
+    "\n",
+    "runner = AblationRunner(seed=42)\n",
+    "code_ablations = runner.ablation_code()\n",
+    "\n",
+    "for k, v in code_ablations.items():\n",
+    "    print(f\"{k:20s}: pass@1={v.get('pass@1', 'N/A'):.3f}, compute={v.get('total_compute', 'N/A'):.0f}\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}