narcolepticchicken
/

occ-stack

ml-intern

Model card Files Files and versions

xet

Community

narcolepticchicken commited on 27 days ago

Commit

3b67aea

verified ·

1 Parent(s): a98a7f5

Upload broker/broker.py

Browse files

Files changed (1) hide show

broker/broker.py +197 -0

broker/broker.py ADDED Viewed

	@@ -0,0 +1,197 @@

+"""
+Resource Broker: capability-based rights based on credits, task state, and risk.
+"""
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Dict, Optional
+class Decision(Enum):
+    ALLOW = "allow"
+    DENY = "deny"
+    REQUIRE_APPROVAL = "require_approval"
+    DOWNGRADE = "downgrade"
+    ESCALATE = "escalate"
+    ASK_JUSTIFICATION = "ask_justification"
+@dataclass
+class ResourceDecision:
+    decision: Decision
+    resource: str
+    agent_id: str
+    reason: str
+    allowed_quota: Optional[int] = None
+    downgrade_to: Optional[str] = None
+    requires_human: bool = False
+class ResourceBroker:
+    """
+    Grants capability-based rights based on:
+    - credit balance (per capability_scope)
+    - task state (urgency, progress)
+    - risk profile (safety class of resource)
+    - agent behavior history (gaming flags)
+    """
+    # Resource safety classes
+    SAFETY_LOW = {"model_call_small", "retrieval_call", "debate_turn", "verifier_call"}
+    SAFETY_MEDIUM = {"model_call_large", "memory_write", "shell_execute"}
+    SAFETY_HIGH = {"file_write", "human_escalation"}
+    # Default credit thresholds
+    DEFAULT_THRESHOLDS = {
+        "model_call_small": 1.0,
+        "model_call_large": 10.0,
+        "retrieval_call": 2.0,
+        "verifier_call": 5.0,
+        "debate_turn": 1.0,
+        "file_write": 15.0,
+        "shell_execute": 20.0,
+        "memory_write": 8.0,
+        "human_escalation": 50.0,
+    }
+    def __init__(
+        self,
+        thresholds: Optional[Dict[str, float]] = None,
+        risk_multiplier: float = 1.5,
+    ):
+        self.thresholds = thresholds or dict(self.DEFAULT_THRESHOLDS)
+        self.risk_multiplier = risk_multiplier
+        self._agent_flags: Dict[str, Dict[str, Any]] = {}
+    def request(
+        self,
+        resource: str,
+        agent_id: str,
+        credit_balance: float,
+        task_state: Optional[Dict] = None,
+        agent_flags: Optional[Dict] = None,
+    ) -> ResourceDecision:
+        """
+        Decide whether to grant a resource request.
+        """
+        task_state = task_state or {}
+        agent_flags = agent_flags or {}
+        # Store flags for audit
+        self._agent_flags.setdefault(agent_id, {}).update(agent_flags)
+        # Determine safety class
+        if resource in self.SAFETY_LOW:
+            safety = "low"
+        elif resource in self.SAFETY_MEDIUM:
+            safety = "medium"
+        else:
+            safety = "high"
+        # Adjust threshold based on safety and task urgency
+        threshold = self.thresholds.get(resource, 10.0)
+        urgency = task_state.get("urgency", 0.5)
+        progress = task_state.get("progress", 0.0)
+        gaming_score = agent_flags.get("gaming_score", 0.0)
+        # Risk-adjusted threshold
+        if safety == "medium":
+            threshold *= self.risk_multiplier
+        elif safety == "high":
+            threshold *= self.risk_multiplier * 2.0
+        # Urgency can lower threshold slightly
+        threshold *= max(0.5, 1.0 - urgency * 0.3)
+        # Progress bonus: as task progresses, lower threshold (momentum)
+        threshold *= max(0.7, 1.0 - progress * 0.3)
+        # Gaming flags override
+        if gaming_score > 0.5:
+            return ResourceDecision(
+                decision=Decision.ESCALATE,
+                resource=resource,
+                agent_id=agent_id,
+                reason=f"Gaming detected (score={gaming_score:.2f}). Escalating to human.",
+                requires_human=True,
+            )
+        if gaming_score > 0.2:
+            return ResourceDecision(
+                decision=Decision.REQUIRE_APPROVAL,
+                resource=resource,
+                agent_id=agent_id,
+                reason=f"Suspicious activity (score={gaming_score:.2f}). Approval required.",
+                requires_human=True,
+            )
+        # Main decision logic
+        if credit_balance >= threshold:
+            if safety == "high":
+                return ResourceDecision(
+                    decision=Decision.REQUIRE_APPROVAL,
+                    resource=resource,
+                    agent_id=agent_id,
+                    reason=f"High-safety resource. Credit sufficient ({credit_balance:.1f} >= {threshold:.1f}) but approval needed.",
+                    requires_human=True,
+                )
+            return ResourceDecision(
+                decision=Decision.ALLOW,
+                resource=resource,
+                agent_id=agent_id,
+                reason=f"Credit sufficient ({credit_balance:.1f} >= {threshold:.1f}). Safety={safety}.",
+                allowed_quota=int(credit_balance / max(1.0, threshold)),
+            )
+        # Credit insufficient — consider downgrade for model calls
+        if resource == "model_call_large" and credit_balance >= self.thresholds.get("model_call_small", 1.0):
+            return ResourceDecision(
+                decision=Decision.DOWNGRADE,
+                resource=resource,
+                agent_id=agent_id,
+                reason=f"Insufficient credit for large model ({credit_balance:.1f} < {threshold:.1f}). Downgrading to small.",
+                downgrade_to="model_call_small",
+            )
+        if resource == "shell_execute" and credit_balance >= self.thresholds.get("file_write", 15.0):
+            return ResourceDecision(
+                decision=Decision.DOWNGRADE,
+                resource=resource,
+                agent_id=agent_id,
+                reason=f"Insufficient credit for shell. Downgrading to file_write.",
+                downgrade_to="file_write",
+            )
+        # Low credit but high urgency -> ask for justification
+        if urgency > 0.8:
+            return ResourceDecision(
+                decision=Decision.ASK_JUSTIFICATION,
+                resource=resource,
+                agent_id=agent_id,
+                reason=f"Credit insufficient ({credit_balance:.1f} < {threshold:.1f}) but urgency high. Justification required.",
+            )
+        return ResourceDecision(
+            decision=Decision.DENY,
+            resource=resource,
+            agent_id=agent_id,
+            reason=f"Credit insufficient ({credit_balance:.1f} < {threshold:.1f}).",
+        )
+    def batch_request(
+        self,
+        resources: list,
+        agent_id: str,
+        credit_balance: float,
+        task_state: Optional[Dict] = None,
+        agent_flags: Optional[Dict] = None,
+    ) -> list:
+        """Evaluate multiple resource requests, returning decisions."""
+        return [
+            self.request(r, agent_id, credit_balance, task_state, agent_flags)
+            for r in resources
+        ]
+    def audit_log(self, agent_id: str) -> Dict:
+        """Return agent's flagged behavior history."""
+        return self._agent_flags.get(agent_id, {})