narcolepticchicken
/

occ-stack

ml-intern

Model card Files Files and versions

xet

Community

narcolepticchicken commited on 27 days ago

Commit

0da095b

verified ·

1 Parent(s): c5e904d

Upload broker/broker.py

Browse files

Files changed (1) hide show

broker/broker.py +81 -135

broker/broker.py CHANGED Viewed

@@ -1,10 +1,9 @@
 """
-Resource Broker: capability-based rights based on credits, task state, and risk.
 """
 from dataclasses import dataclass
 from enum import Enum
-from typing import Any, Dict, Optional
 class Decision(Enum):
@@ -19,179 +18,126 @@ class Decision(Enum):
 @dataclass
 class ResourceDecision:
     decision: Decision
-    resource: str
-    agent_id: str
     reason: str
-    allowed_quota: Optional[int] = None
     downgrade_to: Optional[str] = None
-    requires_human: bool = False
 class ResourceBroker:
     """
-    Grants capability-based rights based on:
-    - credit balance (per capability_scope)
-    - task state (urgency, progress)
-    - risk profile (safety class of resource)
-    - agent behavior history (gaming flags)
     """
-    # Resource safety classes
-    SAFETY_LOW = {"model_call_small", "retrieval_call", "debate_turn", "verifier_call"}
-    SAFETY_MEDIUM = {"model_call_large", "memory_write", "shell_execute"}
-    SAFETY_HIGH = {"file_write", "human_escalation"}
-    # Default credit thresholds
     DEFAULT_THRESHOLDS = {
-        "model_call_small": 1.0,
-        "model_call_large": 10.0,
-        "retrieval_call": 2.0,
-        "verifier_call": 5.0,
-        "debate_turn": 1.0,
-        "file_write": 15.0,
-        "shell_execute": 20.0,
-        "memory_write": 8.0,
-        "human_escalation": 50.0,
     }
     def __init__(
         self,
         thresholds: Optional[Dict[str, float]] = None,
-        risk_multiplier: float = 1.5,
     ):
-        self.thresholds = thresholds or dict(self.DEFAULT_THRESHOLDS)
-        self.risk_multiplier = risk_multiplier
-        self._agent_flags: Dict[str, Dict[str, Any]] = {}
     def request(
         self,
-        resource: str,
         agent_id: str,
         credit_balance: float,
-        task_state: Optional[Dict] = None,
-        agent_flags: Optional[Dict] = None,
     ) -> ResourceDecision:
-        """
-        Decide whether to grant a resource request.
-        """
         task_state = task_state or {}
-        agent_flags = agent_flags or {}
-        # Store flags for audit
-        self._agent_flags.setdefault(agent_id, {}).update(agent_flags)
-        # Determine safety class
-        if resource in self.SAFETY_LOW:
-            safety = "low"
-        elif resource in self.SAFETY_MEDIUM:
-            safety = "medium"
-        else:
-            safety = "high"
-        # Adjust threshold based on safety and task urgency
-        threshold = self.thresholds.get(resource, 10.0)
-        urgency = task_state.get("urgency", 0.5)
-        progress = task_state.get("progress", 0.0)
-        gaming_score = agent_flags.get("gaming_score", 0.0)
-        # Risk-adjusted threshold
-        if safety == "medium":
-            threshold *= self.risk_multiplier
-        elif safety == "high":
-            threshold *= self.risk_multiplier * 2.0
-        # Urgency can lower threshold slightly
-        threshold *= max(0.5, 1.0 - urgency * 0.3)
-        # Progress bonus: as task progresses, lower threshold (momentum)
-        threshold *= max(0.7, 1.0 - progress * 0.3)
-        # Gaming flags override
-        if gaming_score > 0.5:
             return ResourceDecision(
-                decision=Decision.ESCALATE,
-                resource=resource,
-                agent_id=agent_id,
-                reason=f"Gaming detected (score={gaming_score:.2f}). Escalating to human.",
-                requires_human=True,
             )
-        if gaming_score > 0.2:
             return ResourceDecision(
                 decision=Decision.REQUIRE_APPROVAL,
-                resource=resource,
-                agent_id=agent_id,
-                reason=f"Suspicious activity (score={gaming_score:.2f}). Approval required.",
-                requires_human=True,
             )
-        # Main decision logic
-        if credit_balance >= threshold:
-            if safety == "high":
-                return ResourceDecision(
-                    decision=Decision.REQUIRE_APPROVAL,
-                    resource=resource,
-                    agent_id=agent_id,
-                    reason=f"High-safety resource. Credit sufficient ({credit_balance:.1f} >= {threshold:.1f}) but approval needed.",
-                    requires_human=True,
-                )
             return ResourceDecision(
                 decision=Decision.ALLOW,
-                resource=resource,
-                agent_id=agent_id,
-                reason=f"Credit sufficient ({credit_balance:.1f} >= {threshold:.1f}). Safety={safety}.",
-                allowed_quota=int(credit_balance / max(1.0, threshold)),
-            )
-        # Credit insufficient — consider downgrade for model calls
-        if resource == "model_call_large" and credit_balance >= self.thresholds.get("model_call_small", 1.0):
-            return ResourceDecision(
-                decision=Decision.DOWNGRADE,
-                resource=resource,
-                agent_id=agent_id,
-                reason=f"Insufficient credit for large model ({credit_balance:.1f} < {threshold:.1f}). Downgrading to small.",
-                downgrade_to="model_call_small",
             )
-        if resource == "shell_execute" and credit_balance >= self.thresholds.get("file_write", 15.0):
             return ResourceDecision(
-                decision=Decision.DOWNGRADE,
-                resource=resource,
-                agent_id=agent_id,
-                reason=f"Insufficient credit for shell. Downgrading to file_write.",
-                downgrade_to="file_write",
             )
-        # Low credit but high urgency -> ask for justification
-        if urgency > 0.8:
             return ResourceDecision(
-                decision=Decision.ASK_JUSTIFICATION,
-                resource=resource,
-                agent_id=agent_id,
-                reason=f"Credit insufficient ({credit_balance:.1f} < {threshold:.1f}) but urgency high. Justification required.",
             )
         return ResourceDecision(
             decision=Decision.DENY,
-            resource=resource,
-            agent_id=agent_id,
-            reason=f"Credit insufficient ({credit_balance:.1f} < {threshold:.1f}).",
         )
-    def batch_request(
-        self,
-        resources: list,
-        agent_id: str,
-        credit_balance: float,
-        task_state: Optional[Dict] = None,
-        agent_flags: Optional[Dict] = None,
-    ) -> list:
-        """Evaluate multiple resource requests, returning decisions."""
-        return [
-            self.request(r, agent_id, credit_balance, task_state, agent_flags)
-            for r in resources
-        ]
-    def audit_log(self, agent_id: str) -> Dict:
-        """Return agent's flagged behavior history."""
-        return self._agent_flags.get(agent_id, {})

 """
+Resource Broker - grants capability-based rights based on credits, task state, and risk.
 """
 from dataclasses import dataclass
 from enum import Enum
+from typing import Any, Dict, List, Optional
 class Decision(Enum):
 @dataclass
 class ResourceDecision:
     decision: Decision
     reason: str
+    capability: str
     downgrade_to: Optional[str] = None
 class ResourceBroker:
     """
+    Capability-based access control for agent resources.
+    Risk classes: low (retrieval), medium (model calls), high (file writes, shell).
     """
+    RESOURCE_RISK = {
+        "model_call": "medium",
+        "retrieval_call": "low",
+        "verifier_call": "medium",
+        "debate_turn": "low",
+        "file_write": "high",
+        "shell_execute": "high",
+        "memory_write": "medium",
+        "human_escalation": "high",
+        "larger_model": "medium",
+    }
     DEFAULT_THRESHOLDS = {
+        "low": 0.5,
+        "medium": 2.0,
+        "high": 5.0,
     }
     def __init__(
         self,
         thresholds: Optional[Dict[str, float]] = None,
+        urgency_boost: float = 0.5,
     ):
+        self.thresholds = thresholds or self.DEFAULT_THRESHOLDS.copy()
+        self.urgency_boost = urgency_boost
+        self.denial_history: Dict[str, int] = {}
+        self.approval_history: Dict[str, int] = {}
     def request(
         self,
+        capability: str,
         agent_id: str,
         credit_balance: float,
+        task_state: Optional[Dict[str, Any]] = None,
+        risk_score: float = 0.0,
+        gaming_flags: Optional[List[str]] = None,
     ) -> ResourceDecision:
         task_state = task_state or {}
+        gaming_flags = gaming_flags or []
+        risk_class = self.RESOURCE_RISK.get(capability, "medium")
+        threshold = self.thresholds.get(risk_class, 2.0)
+        # Adjust threshold based on urgency
+        urgency = task_state.get("urgency", 0.0)
+        adjusted_threshold = max(0.1, threshold - urgency * self.urgency_boost)
+        # Gaming detection overrides
+        if gaming_flags:
             return ResourceDecision(
+                decision=Decision.DENY,
+                reason=f"Gaming detected: {gaming_flags}",
+                capability=capability,
             )
+        # High-risk resources with high risk score
+        if risk_class == "high" and risk_score > 0.7:
             return ResourceDecision(
                 decision=Decision.REQUIRE_APPROVAL,
+                reason=f"High risk score {risk_score:.2f} for {capability}",
+                capability=capability,
             )
+        # Credit check
+        if credit_balance >= adjusted_threshold:
             return ResourceDecision(
                 decision=Decision.ALLOW,
+                reason=f"Balance {credit_balance:.2f} >= threshold {adjusted_threshold:.2f}",
+                capability=capability,
             )
+        # Near-threshold: downgrade or ask for justification
+        if credit_balance >= adjusted_threshold * 0.5:
+            if risk_class == "medium":
+                return ResourceDecision(
+                    decision=Decision.DOWNGRADE,
+                    reason=f"Balance {credit_balance:.2f} below threshold, downgrading",
+                    capability=capability,
+                    downgrade_to="retrieval_call" if capability != "retrieval_call" else None,
+                )
             return ResourceDecision(
+                decision=Decision.ASK_JUSTIFICATION,
+                reason=f"Balance {credit_balance:.2f} insufficient, justification required",
+                capability=capability,
             )
+        # Escalation if repeated denials
+        denials = self.denial_history.get(agent_id, 0)
+        if denials > 3:
             return ResourceDecision(
+                decision=Decision.ESCALATE,
+                reason=f"Agent {agent_id} denied {denials} times, escalating",
+                capability=capability,
             )
+        self.denial_history[agent_id] = denials + 1
         return ResourceDecision(
             decision=Decision.DENY,
+            reason=f"Balance {credit_balance:.2f} < threshold {adjusted_threshold:.2f}",
+            capability=capability,
         )
+    def get_allowed_capabilities(self, agent_id: str, credit_balance: float) -> List[str]:
+        """List all capabilities an agent can currently use."""
+        allowed = []
+        for cap in self.RESOURCE_RISK:
+            dec = self.request(cap, agent_id, credit_balance)
+            if dec.decision == Decision.ALLOW:
+                allowed.append(cap)
+        return allowed
+    def set_risk_threshold(self, risk_class: str, threshold: float) -> None:
+        self.thresholds[risk_class] = threshold