import os import json import random import threading import time from dataclasses import dataclass, asdict from typing import List, Tuple, Dict, Any, Optional import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # ========================== # CONFIGURAZIONE GLOBALE # ========================== DEFAULT_MODEL_ID = "Mattimax/DACMini-IT" DEFAULT_NUM_AGENTS = 5 MEMORY_DIR = "memories" LOG_DIR = "logs" os.makedirs(MEMORY_DIR, exist_ok=True) os.makedirs(LOG_DIR, exist_ok=True) GRID_SIZE = 10 # griglia 10x10 WORLD_SIZE_PX = 480 # dimensione SVG in pixel # Stato simulazione SIMULATION_RUNNING = True SIMULATION_DELAY = 1.0 # secondi tra uno step e l'altro # ========================== # CLASSI DI DOMINIO # ========================== @dataclass class Agent: agent_id: int name: str role: str interests: str memory_file: str x: int y: int lifetime_steps: int = 0 total_reward: float = 0.0 alive: bool = True last_action: str = "" last_reward: float = 0.0 def to_dict(self) -> Dict[str, Any]: return asdict(self) def append_memory(self, text: str): with open(self.memory_file, "a", encoding="utf-8") as f: f.write(text + "\n") class World: def __init__(self, model_id: str = DEFAULT_MODEL_ID, num_agents: int = DEFAULT_NUM_AGENTS): self.model_id = model_id self.tokenizer, self.model = load_model(model_id) self.step = 0 self.agents: List[Agent] = [] self.initialize_agents(num_agents) def initialize_agents(self, num_agents: int): self.agents = [] self.step = 0 for i in range(num_agents): agent = self._create_random_agent(i) self.agents.append(agent) def _create_random_agent(self, idx: int) -> Agent: names = ["Astra", "Nexus", "Orion", "Lyra", "Helix", "Nova", "Echo", "Vega"] roles = ["Ricercatore", "Artista", "Esploratore", "Filosofo", "Ingegnere", "Narratore"] interests = [ "intelligenza artificiale, etica, società", "arte generativa, musica, poesia", "esplorazione spaziale, mondi virtuali", "filosofia della mente, coscienza", "robotica, sistemi complessi", "storie, miti, narrazioni collettive", ] name = random.choice(names) + f"_{idx}" role = random.choice(roles) intr = random.choice(interests) memory_file = os.path.join(MEMORY_DIR, f"agent_{idx}.txt") with open(memory_file, "w", encoding="utf-8") as f: f.write(f"Identità dell'agente {name}\n") f.write(f"Ruolo: {role}\n") f.write(f"Interessi: {intr}\n\n") x = random.randint(0, GRID_SIZE - 1) y = random.randint(0, GRID_SIZE - 1) return Agent( agent_id=idx, name=name, role=role, interests=intr, memory_file=memory_file, x=x, y=y, ) def reset_world(self, num_agents: int, model_id: str): self.model_id = model_id self.tokenizer, self.model = load_model(model_id) self.initialize_agents(num_agents) def step_world(self) -> None: """Esegue un tick di simulazione: ogni agente vivo genera un’azione e si muove.""" self.step += 1 world_state_summary = self._build_world_state_summary() for agent in self.agents: if not agent.alive: continue action = generate_action( agent=agent, world_state=world_state_summary, tokenizer=self.tokenizer, model=self.model, ) self._update_agent_position(agent, action) reward = self.compute_reward(agent, action, world_state_summary) agent.last_action = action agent.last_reward = reward agent.lifetime_steps += 1 agent.total_reward += reward mem_entry = f"[Step {self.step}] Pos=({agent.x},{agent.y}) Azione: {action} | Reward: {reward:.3f}" agent.append_memory(mem_entry) self.log_transition(agent, action, reward) avg_reward = agent.total_reward / max(1, agent.lifetime_steps) if avg_reward < -0.2 or agent.lifetime_steps > 80: agent.alive = False for i, agent in enumerate(self.agents): if not agent.alive: self.agents[i] = self._create_random_agent(agent.agent_id) def _update_agent_position(self, agent: Agent, action: str): """ Movimento fisico: - se l’azione contiene parole chiave, muoviti in quella direzione - altrimenti random walk """ dx, dy = 0, 0 text = action.lower() moved_by_text = False if "su" in text or "nord" in text: dy = -1 moved_by_text = True elif "giu" in text or "giù" in text or "sud" in text: dy = 1 moved_by_text = True elif "sinistra" in text or "ovest" in text: dx = -1 moved_by_text = True elif "destra" in text or "est" in text: dx = 1 moved_by_text = True elif "avvicino" in text or "raggiungo" in text: target = self._closest_agent(agent) if target is not None: moved_by_text = True if target.x > agent.x: dx = 1 elif target.x < agent.x: dx = -1 if target.y > agent.y: dy = 1 elif target.y < agent.y: dy = -1 if not moved_by_text: dx, dy = random.choice([(1, 0), (-1, 0), (0, 1), (0, -1), (0, 0)]) new_x = max(0, min(GRID_SIZE - 1, agent.x + dx)) new_y = max(0, min(GRID_SIZE - 1, agent.y + dy)) agent.x = new_x agent.y = new_y def _closest_agent(self, agent: Agent) -> Optional[Agent]: others = [a for a in self.agents if a.agent_id != agent.agent_id and a.alive] if not others: return None best = None best_dist = 9999 for o in others: d = abs(o.x - agent.x) + abs(o.y - agent.y) if d < best_dist: best_dist = d best = o return best def _build_world_state_summary(self) -> str: lines = [f"Step globale: {self.step}", "Stato degli agenti:"] for a in self.agents: status = "vivo" if a.alive else "morto" lines.append( f"- {a.name} ({a.role}), interessi: {a.interests}, " f"posizione: ({a.x},{a.y}), step di vita: {a.lifetime_steps}, " f"reward totale: {a.total_reward:.2f}, stato: {status}" ) return "\n".join(lines) def compute_reward(self, agent: Agent, action: str, world_state: str) -> float: reward = 0.0 length = len(action.strip()) if length >= 20: reward += 1.0 else: reward -= 0.5 other_names = [a.name for a in self.agents if a.agent_id != agent.agent_id] if any(name in action for name in other_names): reward += 1.0 text = action.lower() if any(k in text for k in ["cammino", "muovo", "sposto", "avvicino", "raggiungo", "esploro"]): reward += 0.5 return reward def log_transition(self, agent: Agent, action: str, reward: float): log_path = os.path.join(LOG_DIR, f"agent_{agent.agent_id}_log.jsonl") transition = { "step": self.step, "agent_id": agent.agent_id, "name": agent.name, "role": agent.role, "interests": agent.interests, "x": agent.x, "y": agent.y, "action": action, "reward": reward, "lifetime_steps": agent.lifetime_steps, "total_reward": agent.total_reward, } with open(log_path, "a", encoding="utf-8") as f: f.write(json.dumps(transition, ensure_ascii=False) + "\n") # ========================== # MODELLO LLM # ========================== _model_cache: Dict[str, Tuple[Any, Any]] = {} def load_model(model_id: str) -> Tuple[Any, Any]: if model_id in _model_cache: return _model_cache[model_id] tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) _model_cache[model_id] = (tokenizer, model) return tokenizer, model def build_agent_prompt(agent: Agent, world_state: str) -> str: prompt = f""" Sei un agente in un mondo simulato su una griglia 2D. Identità: - Nome: {agent.name} - Ruolo: {agent.role} - Interessi: {agent.interests} Stato del mondo: {world_state} Compito: In una sola breve azione (1-3 frasi), descrivi cosa fai ora per interagire con questo mondo e con gli altri agenti. Puoi muoverti (su/giù/sinistra/destra), avvicinarti a qualcuno, esplorare, osservare. Sii coerente con il tuo ruolo e i tuoi interessi. Rispondi SOLO con l'azione, senza spiegazioni meta. """ return prompt.strip() def generate_action(agent: Agent, world_state: str, tokenizer, model) -> str: prompt = build_agent_prompt(agent, world_state) inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate( **inputs, max_new_tokens=80, do_sample=True, temperature=0.9, top_p=0.95, ) text = tokenizer.decode(outputs[0], skip_special_tokens=True) if text.startswith(prompt): action = text[len(prompt):].strip() else: action = text.strip() return action # ========================== # RENDERING SVG + HTML # ========================== def world_to_svg(world: World) -> str: size = WORLD_SIZE_PX cell = size // GRID_SIZE # auto-refresh HTML/JS: ricarica la pagina ogni secondo refresh_js = """ """ svg = [ refresh_js, f'") return "".join(svg) def world_to_html_cards(world: World) -> str: css = """ """ header = f"""
{world.model_id} · Agenti: {len(world.agents)}