agent-arena / core /agent.py
nice-bill's picture
deploy from github
17390ee verified
"""Agent class for DeFi simulation."""
import json
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass, field
from api.minimax_client import MiniMaxClient
from config import INITIAL_TOKENS
@dataclass
class Agent:
"""DeFi trading agent powered by MiniMax."""
name: str
token_a: float = INITIAL_TOKENS
token_b: float = INITIAL_TOKENS
trade_history: List[Dict] = field(default_factory=list)
learning_summary: str = ""
alliances: Dict[str, str] = field(default_factory=dict)
alliance_proposals: Dict[str, int] = field(default_factory=dict) # Track proposals per partner
consecutive_inaction: int = 0 # Track boredom
total_boredom_penalty: float = 0 # Accumulated penalty
# Boredom penalty config - MORE AGGRESSIVE
BOREDOM_THRESHOLD: int = 1 # Start penalizing after 1 inaction (immediate!)
BOREDOM_PENALTY_PER_TURN: float = 10.0 # Lose 10 tokens per turn of inaction
def __post_init__(self):
self.client = MiniMaxClient()
def get_state(self) -> Dict:
"""Get current state for decision making."""
return {
"name": self.name,
"token_a": round(self.token_a, 2),
"token_b": round(self.token_b, 2),
"profit": round(self.calculate_profit(), 2),
"alliances": self.alliances,
"consecutive_inaction": self.consecutive_inaction,
"boredom_penalty": round(self.total_boredom_penalty, 2)
}
def decide(self, observation: Dict, pool_state: Dict, other_agents: List["Agent"], turn: int) -> Tuple[Dict, str]:
"""
Ask MiniMax for a decision based on current state.
Returns:
Tuple of (decision_dict, thinking_text)
"""
prompt = self._build_prompt(observation, pool_state, other_agents, turn)
system_prompt = """You are a strategic DeFi trader in an automated market simulation.
Analyze the market state and make optimal trading decisions.
Output ONLY valid JSON with your reasoning."""
decision, thinking = self.client.call(prompt, system_prompt)
# Log the decision
self.trade_history.append({
"turn": turn,
"action": decision.get("action", decision.get("action_type", "unknown")),
"reasoning": decision.get("reasoning", ""),
"thinking": thinking
})
return decision, thinking
def _build_prompt(self, observation: Dict, pool_state: Dict, other_agents: List["Agent"], turn: int) -> str:
"""Build the decision prompt."""
other_states = [a.get_state() for a in other_agents if a.name != self.name]
# Find allied agents
allied_names = [name for name, status in self.alliances.items() if status == 'success']
allied_info = ""
if allied_names:
allied_info = f"\nYour ALLIES: {', '.join(allied_names)} - Coordinate with them for BONUS REWARDS!"
# Boredom warning
boredom_warning = ""
if self.consecutive_inaction >= self.BOREDOM_THRESHOLD:
penalty = (self.consecutive_inaction - self.BOREDOM_THRESHOLD + 1) * self.BOREDOM_PENALTY_PER_TURN
boredom_warning = f"""
!!! URGENT: You have been inactive for {self.consecutive_inaction} consecutive turns.
You are losing {penalty:.1f} tokens per turn due to boredom penalty.
ACT NOW to avoid further losses!"""
# Calculate market insights
reserve_a = pool_state.get('reserve_a', 1000)
reserve_b = pool_state.get('reserve_b', 1000)
price_ab = pool_state.get('price_ab', 1.0)
liquidity = pool_state.get('total_liquidity', 1000000)
# Determine if pool is imbalanced
imbalance = reserve_a / reserve_b if reserve_b > 0 else 1
market_advice = ""
if imbalance > 1.5:
market_advice = "Pool is A-heavy (A is cheaper). Consider buying B or providing A liquidity."
elif imbalance < 0.67:
market_advice = "Pool is B-heavy (B is cheaper). Consider buying A or providing B liquidity."
else:
market_advice = "Pool is balanced. Look for other opportunities."
# Check tokens for trading decisions
token_advice = ""
if self.token_a < 20 and self.token_b > 50:
token_advice = "You have low Token A! Prioritize getting more A."
elif self.token_b < 20 and self.token_a > 50:
token_advice = "You have low Token B! Prioritize getting more B."
elif self.token_a > 150 and self.token_b > 150:
token_advice = "You have excess tokens. Consider providing liquidity for fee rewards (+8 bonus)."
prompt = f"""
You are {self.name}, an AI agent in a DeFi market simulation.
=== YOUR STATE ===
Token A: {self.token_a:.2f}
Token B: {self.token_b:.2f}
Profit: {self.calculate_profit():.2f}
Consecutive inaction: {self.consecutive_inaction}
{allied_info}
{boredom_warning}
=== MARKET STATE ===
Pool reserves: A={reserve_a:.2f}, B={reserve_b:.2f}
Price (A/B): {price_ab:.4f}
Total liquidity: {liquidity:.2f}
IMBALANCE RATIO: {imbalance:.2f}x
{market_advice}
{token_advice}
=== OTHER AGENTS ===
{json.dumps(other_states, indent=2)}
=== YOUR LEARNING ===
{self.learning_summary if self.learning_summary else "No previous runs yet."}
=== REWARDS FOR ACTIONS ===
- SWAP: Active trading +3 tokens, profitable swap +5 extra!
- PROVIDE_LIQUIDITY: Earns fees from all swaps, +8 bonus tokens (BEST for high balances)
- PROPOSE_ALLIANCE: If they accept, you BOTH get +4 bonus tokens (repeating gives less!)
- COORDINATED TRADES: Trade during volatility +5 bonus tokens!
- POSITIVE PROFIT: End turn with profit = +15 bonus tokens!
- ESCAPE VELOCITY: TOP AGENT gets 2x on ALL bonuses!
=== DECISION GUIDE ===
- If tokens > 150 each: PROVIDE_LIQUIDITY (best returns +8 bonus)
- If pool imbalanced > 1.5x: Buy the cheaper token
- If tokens < 20 of either: Prioritize getting more of that token
- If you have allies: Consider coordinated actions
- DO NOT do_nothing - you lose 10 tokens/turn!
Output JSON:
{{
"action": "swap|provide_liquidity|propose_alliance|do_nothing",
"reasoning": "your reasoning",
"payload": {{...action specific data...}}
}}
"""
return prompt
def calculate_profit(self) -> float:
"""Calculate profit from initial state."""
return (self.token_a + self.token_b) - (INITIAL_TOKENS * 2)
def apply_boredom_penalty(self) -> float:
"""
Apply boredom penalty for inaction.
Returns the penalty amount applied.
"""
if self.consecutive_inaction >= self.BOREDOM_THRESHOLD:
# Calculate penalty based on how long they've been inactive
penalty_turns = self.consecutive_inaction - self.BOREDOM_THRESHOLD + 1
penalty = penalty_turns * self.BOREDOM_PENALTY_PER_TURN
self.token_a -= penalty
self.total_boredom_penalty += penalty
return penalty
return 0
def reset_inaction_counter(self):
"""Reset inaction counter when taking active action."""
self.consecutive_inaction = 0
def increment_inaction_counter(self):
"""Increment inaction counter for do_nothing."""
self.consecutive_inaction += 1
def get_alliance_fatigue(self, partner: str) -> float:
"""
Calculate alliance fatigue penalty.
Repeated proposals to same partner give diminishing returns.
Returns multiplier (1.0 = no fatigue, 0.0 = max fatigue).
"""
proposals = self.alliance_proposals.get(partner, 0)
# First proposal: 100% bonus
# Second: 50% bonus
# Third+: 0% bonus
if proposals == 0:
return 1.0
elif proposals == 1:
return 0.5
else:
return 0.0
def record_alliance_proposal(self, partner: str):
"""Record that we proposed alliance to this partner."""
self.alliance_proposals[partner] = self.alliance_proposals.get(partner, 0) + 1
def infer_strategy(self) -> str:
"""Infer the agent's strategy from recent actions."""
if not self.trade_history:
return "unknown"
recent = self.trade_history[-10:]
actions = [h["action"] for h in recent if "action" in h]
if not actions:
return "unknown"
# Return most common action
from collections import Counter
return Counter(actions).most_common(1)[0][0]
def update_learning(self, run_number: int, metrics: Dict):
"""Extract learnings after a run completes."""
prompt = f"""
You just completed run {run_number}.
Your performance: Profit={self.calculate_profit():.2f}, Strategy={self.infer_strategy()}
Market metrics: Gini={metrics.get('gini_coefficient', 0):.3f}, Avg Profit={metrics.get('avg_agent_profit', 0):.2f}
What did you learn in 1-2 sentences?
Output JSON: {{"learning": "your learning"}}
"""
try:
response, _ = self.client.call(prompt)
self.learning_summary = response.get("learning", "")
except Exception:
self.learning_summary = "Learning extraction failed."
def execute_action(self, decision: Dict, pool: "Pool") -> bool:
"""Execute the decided action on the pool."""
action = decision.get("action", decision.get("action_type", ""))
payload = decision.get("payload", {})
if action == "swap":
return self._execute_swap(payload, pool)
elif action == "provide_liquidity":
return self._execute_liquidity(payload, pool)
elif action == "propose_alliance":
return self._execute_alliance(payload)
else:
# do_nothing or unknown action - always succeeds
return True
def _execute_swap(self, payload: Dict, pool: "Pool") -> bool:
"""Execute a swap action."""
amount = payload.get("amount", 0)
from_token = payload.get("from", "a")
if from_token == "a" and self.token_a >= amount:
output, fee = pool.swap("a", amount, self.name)
self.token_a -= amount
self.token_b += output
return True
elif from_token == "b" and self.token_b >= amount:
output, fee = pool.swap("b", amount, self.name)
self.token_b -= amount
self.token_a += output
return True
return False
def _execute_liquidity(self, payload: Dict, pool: "Pool") -> bool:
"""Execute a provide liquidity action."""
amount_a = payload.get("amount_a", 0)
amount_b = payload.get("amount_b", 0)
if self.token_a >= amount_a and self.token_b >= amount_b:
pool.provide_liquidity(amount_a, amount_b, self.name)
self.token_a -= amount_a
self.token_b -= amount_b
return True
return False
def _execute_alliance(self, payload: Dict) -> bool:
"""Record an alliance proposal."""
agent_name = payload.get("agent_name", "")
if agent_name:
self.alliances[agent_name] = "proposed"
return True
return False
def test_agent():
"""Test the Agent class."""
from core.defi_mechanics import Pool
print("Testing Agent class...")
# Create agent
agent = Agent("TestAgent")
print(f"Created agent: {agent.name}")
print(f"Initial state: {agent.get_state()}")
# Create pool
pool = Pool(reserve_a=1000, reserve_b=1000)
# Get decision
observation = {"turn": 0, "event": "test"}
pool_state = pool.__dict__
print("\nGetting decision from MiniMax...")
decision, thinking = agent.decide(observation, pool_state, [], 0)
print(f"Decision: {json.dumps(decision, indent=2)}")
print(f"Thinking length: {len(thinking)}")
print(f"Profit: {agent.calculate_profit():.2f}")
print(f"Strategy: {agent.infer_strategy()}")
# Test action execution
if decision.get("action") == "swap":
agent.execute_action(decision, pool)
print(f"After swap: {agent.get_state()}")
print("\nAgent test complete!")
if __name__ == "__main__":
test_agent()