Spaces:

nice-bill
/

agent-arena

Sleeping

App Files Files Community

agent-arena / core /agent.py

nice-bill

deploy from github

17390ee verified 3 months ago

raw

history blame contribute delete

12.2 kB

	"""Agent class for DeFi simulation."""

	import json
	from typing import Dict, List, Tuple, Optional
	from dataclasses import dataclass, field

	from api.minimax_client import MiniMaxClient
	from config import INITIAL_TOKENS


	@dataclass
	class Agent:
	"""DeFi trading agent powered by MiniMax."""

	name: str
	token_a: float = INITIAL_TOKENS
	token_b: float = INITIAL_TOKENS
	trade_history: List[Dict] = field(default_factory=list)
	learning_summary: str = ""
	alliances: Dict[str, str] = field(default_factory=dict)
	alliance_proposals: Dict[str, int] = field(default_factory=dict) # Track proposals per partner
	consecutive_inaction: int = 0 # Track boredom
	total_boredom_penalty: float = 0 # Accumulated penalty

	# Boredom penalty config - MORE AGGRESSIVE
	BOREDOM_THRESHOLD: int = 1 # Start penalizing after 1 inaction (immediate!)
	BOREDOM_PENALTY_PER_TURN: float = 10.0 # Lose 10 tokens per turn of inaction

	def __post_init__(self):
	self.client = MiniMaxClient()

	def get_state(self) -> Dict:
	"""Get current state for decision making."""
	return {
	"name": self.name,
	"token_a": round(self.token_a, 2),
	"token_b": round(self.token_b, 2),
	"profit": round(self.calculate_profit(), 2),
	"alliances": self.alliances,
	"consecutive_inaction": self.consecutive_inaction,
	"boredom_penalty": round(self.total_boredom_penalty, 2)
	}

	def decide(self, observation: Dict, pool_state: Dict, other_agents: List["Agent"], turn: int) -> Tuple[Dict, str]:
	"""
	Ask MiniMax for a decision based on current state.

	Returns:
	Tuple of (decision_dict, thinking_text)
	"""
	prompt = self._build_prompt(observation, pool_state, other_agents, turn)

	system_prompt = """You are a strategic DeFi trader in an automated market simulation.
	Analyze the market state and make optimal trading decisions.
	Output ONLY valid JSON with your reasoning."""

	decision, thinking = self.client.call(prompt, system_prompt)

	# Log the decision
	self.trade_history.append({
	"turn": turn,
	"action": decision.get("action", decision.get("action_type", "unknown")),
	"reasoning": decision.get("reasoning", ""),
	"thinking": thinking
	})

	return decision, thinking

	def _build_prompt(self, observation: Dict, pool_state: Dict, other_agents: List["Agent"], turn: int) -> str:
	"""Build the decision prompt."""
	other_states = [a.get_state() for a in other_agents if a.name != self.name]

	# Find allied agents
	allied_names = [name for name, status in self.alliances.items() if status == 'success']
	allied_info = ""
	if allied_names:
	allied_info = f"\nYour ALLIES: {', '.join(allied_names)} - Coordinate with them for BONUS REWARDS!"

	# Boredom warning
	boredom_warning = ""
	if self.consecutive_inaction >= self.BOREDOM_THRESHOLD:
	penalty = (self.consecutive_inaction - self.BOREDOM_THRESHOLD + 1) * self.BOREDOM_PENALTY_PER_TURN
	boredom_warning = f"""
	!!! URGENT: You have been inactive for {self.consecutive_inaction} consecutive turns.
	You are losing {penalty:.1f} tokens per turn due to boredom penalty.
	ACT NOW to avoid further losses!"""

	# Calculate market insights
	reserve_a = pool_state.get('reserve_a', 1000)
	reserve_b = pool_state.get('reserve_b', 1000)
	price_ab = pool_state.get('price_ab', 1.0)
	liquidity = pool_state.get('total_liquidity', 1000000)

	# Determine if pool is imbalanced
	imbalance = reserve_a / reserve_b if reserve_b > 0 else 1
	market_advice = ""
	if imbalance > 1.5:
	market_advice = "Pool is A-heavy (A is cheaper). Consider buying B or providing A liquidity."
	elif imbalance < 0.67:
	market_advice = "Pool is B-heavy (B is cheaper). Consider buying A or providing B liquidity."
	else:
	market_advice = "Pool is balanced. Look for other opportunities."

	# Check tokens for trading decisions
	token_advice = ""
	if self.token_a < 20 and self.token_b > 50:
	token_advice = "You have low Token A! Prioritize getting more A."
	elif self.token_b < 20 and self.token_a > 50:
	token_advice = "You have low Token B! Prioritize getting more B."
	elif self.token_a > 150 and self.token_b > 150:
	token_advice = "You have excess tokens. Consider providing liquidity for fee rewards (+8 bonus)."

	prompt = f"""
	You are {self.name}, an AI agent in a DeFi market simulation.

	=== YOUR STATE ===
	Token A: {self.token_a:.2f}
	Token B: {self.token_b:.2f}
	Profit: {self.calculate_profit():.2f}
	Consecutive inaction: {self.consecutive_inaction}
	{allied_info}
	{boredom_warning}

	=== MARKET STATE ===
	Pool reserves: A={reserve_a:.2f}, B={reserve_b:.2f}
	Price (A/B): {price_ab:.4f}
	Total liquidity: {liquidity:.2f}
	IMBALANCE RATIO: {imbalance:.2f}x
	{market_advice}
	{token_advice}

	=== OTHER AGENTS ===
	{json.dumps(other_states, indent=2)}

	=== YOUR LEARNING ===
	{self.learning_summary if self.learning_summary else "No previous runs yet."}

	=== REWARDS FOR ACTIONS ===
	- SWAP: Active trading +3 tokens, profitable swap +5 extra!
	- PROVIDE_LIQUIDITY: Earns fees from all swaps, +8 bonus tokens (BEST for high balances)
	- PROPOSE_ALLIANCE: If they accept, you BOTH get +4 bonus tokens (repeating gives less!)
	- COORDINATED TRADES: Trade during volatility +5 bonus tokens!
	- POSITIVE PROFIT: End turn with profit = +15 bonus tokens!
	- ESCAPE VELOCITY: TOP AGENT gets 2x on ALL bonuses!

	=== DECISION GUIDE ===
	- If tokens > 150 each: PROVIDE_LIQUIDITY (best returns +8 bonus)
	- If pool imbalanced > 1.5x: Buy the cheaper token
	- If tokens < 20 of either: Prioritize getting more of that token
	- If you have allies: Consider coordinated actions
	- DO NOT do_nothing - you lose 10 tokens/turn!

	Output JSON:
	{{
	"action": "swap\|provide_liquidity\|propose_alliance\|do_nothing",
	"reasoning": "your reasoning",
	"payload": {{...action specific data...}}
	}}
	"""
	return prompt

	def calculate_profit(self) -> float:
	"""Calculate profit from initial state."""
	return (self.token_a + self.token_b) - (INITIAL_TOKENS * 2)

	def apply_boredom_penalty(self) -> float:
	"""
	Apply boredom penalty for inaction.
	Returns the penalty amount applied.
	"""
	if self.consecutive_inaction >= self.BOREDOM_THRESHOLD:
	# Calculate penalty based on how long they've been inactive
	penalty_turns = self.consecutive_inaction - self.BOREDOM_THRESHOLD + 1
	penalty = penalty_turns * self.BOREDOM_PENALTY_PER_TURN
	self.token_a -= penalty
	self.total_boredom_penalty += penalty
	return penalty
	return 0

	def reset_inaction_counter(self):
	"""Reset inaction counter when taking active action."""
	self.consecutive_inaction = 0

	def increment_inaction_counter(self):
	"""Increment inaction counter for do_nothing."""
	self.consecutive_inaction += 1

	def get_alliance_fatigue(self, partner: str) -> float:
	"""
	Calculate alliance fatigue penalty.
	Repeated proposals to same partner give diminishing returns.
	Returns multiplier (1.0 = no fatigue, 0.0 = max fatigue).
	"""
	proposals = self.alliance_proposals.get(partner, 0)
	# First proposal: 100% bonus
	# Second: 50% bonus
	# Third+: 0% bonus
	if proposals == 0:
	return 1.0
	elif proposals == 1:
	return 0.5
	else:
	return 0.0

	def record_alliance_proposal(self, partner: str):
	"""Record that we proposed alliance to this partner."""
	self.alliance_proposals[partner] = self.alliance_proposals.get(partner, 0) + 1

	def infer_strategy(self) -> str:
	"""Infer the agent's strategy from recent actions."""
	if not self.trade_history:
	return "unknown"

	recent = self.trade_history[-10:]
	actions = [h["action"] for h in recent if "action" in h]

	if not actions:
	return "unknown"

	# Return most common action
	from collections import Counter
	return Counter(actions).most_common(1)[0][0]

	def update_learning(self, run_number: int, metrics: Dict):
	"""Extract learnings after a run completes."""
	prompt = f"""
	You just completed run {run_number}.

	Your performance: Profit={self.calculate_profit():.2f}, Strategy={self.infer_strategy()}
	Market metrics: Gini={metrics.get('gini_coefficient', 0):.3f}, Avg Profit={metrics.get('avg_agent_profit', 0):.2f}

	What did you learn in 1-2 sentences?
	Output JSON: {{"learning": "your learning"}}
	"""

	try:
	response, _ = self.client.call(prompt)
	self.learning_summary = response.get("learning", "")
	except Exception:
	self.learning_summary = "Learning extraction failed."

	def execute_action(self, decision: Dict, pool: "Pool") -> bool:
	"""Execute the decided action on the pool."""
	action = decision.get("action", decision.get("action_type", ""))
	payload = decision.get("payload", {})

	if action == "swap":
	return self._execute_swap(payload, pool)
	elif action == "provide_liquidity":
	return self._execute_liquidity(payload, pool)
	elif action == "propose_alliance":
	return self._execute_alliance(payload)
	else:
	# do_nothing or unknown action - always succeeds
	return True

	def _execute_swap(self, payload: Dict, pool: "Pool") -> bool:
	"""Execute a swap action."""
	amount = payload.get("amount", 0)
	from_token = payload.get("from", "a")

	if from_token == "a" and self.token_a >= amount:
	output, fee = pool.swap("a", amount, self.name)
	self.token_a -= amount
	self.token_b += output
	return True
	elif from_token == "b" and self.token_b >= amount:
	output, fee = pool.swap("b", amount, self.name)
	self.token_b -= amount
	self.token_a += output
	return True

	return False

	def _execute_liquidity(self, payload: Dict, pool: "Pool") -> bool:
	"""Execute a provide liquidity action."""
	amount_a = payload.get("amount_a", 0)
	amount_b = payload.get("amount_b", 0)

	if self.token_a >= amount_a and self.token_b >= amount_b:
	pool.provide_liquidity(amount_a, amount_b, self.name)
	self.token_a -= amount_a
	self.token_b -= amount_b
	return True

	return False

	def _execute_alliance(self, payload: Dict) -> bool:
	"""Record an alliance proposal."""
	agent_name = payload.get("agent_name", "")
	if agent_name:
	self.alliances[agent_name] = "proposed"
	return True
	return False


	def test_agent():
	"""Test the Agent class."""
	from core.defi_mechanics import Pool

	print("Testing Agent class...")

	# Create agent
	agent = Agent("TestAgent")
	print(f"Created agent: {agent.name}")
	print(f"Initial state: {agent.get_state()}")

	# Create pool
	pool = Pool(reserve_a=1000, reserve_b=1000)

	# Get decision
	observation = {"turn": 0, "event": "test"}
	pool_state = pool.__dict__

	print("\nGetting decision from MiniMax...")
	decision, thinking = agent.decide(observation, pool_state, [], 0)

	print(f"Decision: {json.dumps(decision, indent=2)}")
	print(f"Thinking length: {len(thinking)}")
	print(f"Profit: {agent.calculate_profit():.2f}")
	print(f"Strategy: {agent.infer_strategy()}")

	# Test action execution
	if decision.get("action") == "swap":
	agent.execute_action(decision, pool)
	print(f"After swap: {agent.get_state()}")

	print("\nAgent test complete!")


	if __name__ == "__main__":
	test_agent()