import os import json import random import threading import time from dataclasses import dataclass, asdict from typing import List, Tuple, Dict, Any, Optional import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM # ========================== # CONFIGURAZIONE GLOBALE # ========================== DEFAULT_MODEL_ID = "Mattimax/DACMini-IT" DEFAULT_NUM_AGENTS = 5 MEMORY_DIR = "memories" LOG_DIR = "logs" os.makedirs(MEMORY_DIR, exist_ok=True) os.makedirs(LOG_DIR, exist_ok=True) GRID_SIZE = 10 # griglia 10x10 WORLD_SIZE_PX = 480 # dimensione SVG in pixel # Stato simulazione SIMULATION_RUNNING = True SIMULATION_DELAY = 1.0 # secondi tra uno step e l'altro # ========================== # CLASSI DI DOMINIO # ========================== @dataclass class Agent: agent_id: int name: str role: str interests: str memory_file: str x: int y: int lifetime_steps: int = 0 total_reward: float = 0.0 alive: bool = True last_action: str = "" last_reward: float = 0.0 def to_dict(self) -> Dict[str, Any]: return asdict(self) def append_memory(self, text: str): with open(self.memory_file, "a", encoding="utf-8") as f: f.write(text + "\n") class World: def __init__(self, model_id: str = DEFAULT_MODEL_ID, num_agents: int = DEFAULT_NUM_AGENTS): self.model_id = model_id self.tokenizer, self.model = load_model(model_id) self.step = 0 self.agents: List[Agent] = [] self.initialize_agents(num_agents) def initialize_agents(self, num_agents: int): self.agents = [] self.step = 0 for i in range(num_agents): agent = self._create_random_agent(i) self.agents.append(agent) def _create_random_agent(self, idx: int) -> Agent: names = ["Astra", "Nexus", "Orion", "Lyra", "Helix", "Nova", "Echo", "Vega"] roles = ["Ricercatore", "Artista", "Esploratore", "Filosofo", "Ingegnere", "Narratore"] interests = [ "intelligenza artificiale, etica, società", "arte generativa, musica, poesia", "esplorazione spaziale, mondi virtuali", "filosofia della mente, coscienza", "robotica, sistemi complessi", "storie, miti, narrazioni collettive", ] name = random.choice(names) + f"_{idx}" role = random.choice(roles) intr = random.choice(interests) memory_file = os.path.join(MEMORY_DIR, f"agent_{idx}.txt") with open(memory_file, "w", encoding="utf-8") as f: f.write(f"Identità dell'agente {name}\n") f.write(f"Ruolo: {role}\n") f.write(f"Interessi: {intr}\n\n") x = random.randint(0, GRID_SIZE - 1) y = random.randint(0, GRID_SIZE - 1) return Agent( agent_id=idx, name=name, role=role, interests=intr, memory_file=memory_file, x=x, y=y, ) def reset_world(self, num_agents: int, model_id: str): self.model_id = model_id self.tokenizer, self.model = load_model(model_id) self.initialize_agents(num_agents) def step_world(self) -> None: """Esegue un tick di simulazione: ogni agente vivo genera un’azione e si muove.""" self.step += 1 world_state_summary = self._build_world_state_summary() for agent in self.agents: if not agent.alive: continue action = generate_action( agent=agent, world_state=world_state_summary, tokenizer=self.tokenizer, model=self.model, ) self._update_agent_position(agent, action) reward = self.compute_reward(agent, action, world_state_summary) agent.last_action = action agent.last_reward = reward agent.lifetime_steps += 1 agent.total_reward += reward mem_entry = f"[Step {self.step}] Pos=({agent.x},{agent.y}) Azione: {action} | Reward: {reward:.3f}" agent.append_memory(mem_entry) self.log_transition(agent, action, reward) avg_reward = agent.total_reward / max(1, agent.lifetime_steps) if avg_reward < -0.2 or agent.lifetime_steps > 80: agent.alive = False for i, agent in enumerate(self.agents): if not agent.alive: self.agents[i] = self._create_random_agent(agent.agent_id) def _update_agent_position(self, agent: Agent, action: str): """ Movimento fisico: - se l’azione contiene parole chiave, muoviti in quella direzione - altrimenti random walk """ dx, dy = 0, 0 text = action.lower() moved_by_text = False if "su" in text or "nord" in text: dy = -1 moved_by_text = True elif "giu" in text or "giù" in text or "sud" in text: dy = 1 moved_by_text = True elif "sinistra" in text or "ovest" in text: dx = -1 moved_by_text = True elif "destra" in text or "est" in text: dx = 1 moved_by_text = True elif "avvicino" in text or "raggiungo" in text: target = self._closest_agent(agent) if target is not None: moved_by_text = True if target.x > agent.x: dx = 1 elif target.x < agent.x: dx = -1 if target.y > agent.y: dy = 1 elif target.y < agent.y: dy = -1 if not moved_by_text: dx, dy = random.choice([(1, 0), (-1, 0), (0, 1), (0, -1), (0, 0)]) new_x = max(0, min(GRID_SIZE - 1, agent.x + dx)) new_y = max(0, min(GRID_SIZE - 1, agent.y + dy)) agent.x = new_x agent.y = new_y def _closest_agent(self, agent: Agent) -> Optional[Agent]: others = [a for a in self.agents if a.agent_id != agent.agent_id and a.alive] if not others: return None best = None best_dist = 9999 for o in others: d = abs(o.x - agent.x) + abs(o.y - agent.y) if d < best_dist: best_dist = d best = o return best def _build_world_state_summary(self) -> str: lines = [f"Step globale: {self.step}", "Stato degli agenti:"] for a in self.agents: status = "vivo" if a.alive else "morto" lines.append( f"- {a.name} ({a.role}), interessi: {a.interests}, " f"posizione: ({a.x},{a.y}), step di vita: {a.lifetime_steps}, " f"reward totale: {a.total_reward:.2f}, stato: {status}" ) return "\n".join(lines) def compute_reward(self, agent: Agent, action: str, world_state: str) -> float: reward = 0.0 length = len(action.strip()) if length >= 20: reward += 1.0 else: reward -= 0.5 other_names = [a.name for a in self.agents if a.agent_id != agent.agent_id] if any(name in action for name in other_names): reward += 1.0 text = action.lower() if any(k in text for k in ["cammino", "muovo", "sposto", "avvicino", "raggiungo", "esploro"]): reward += 0.5 return reward def log_transition(self, agent: Agent, action: str, reward: float): log_path = os.path.join(LOG_DIR, f"agent_{agent.agent_id}_log.jsonl") transition = { "step": self.step, "agent_id": agent.agent_id, "name": agent.name, "role": agent.role, "interests": agent.interests, "x": agent.x, "y": agent.y, "action": action, "reward": reward, "lifetime_steps": agent.lifetime_steps, "total_reward": agent.total_reward, } with open(log_path, "a", encoding="utf-8") as f: f.write(json.dumps(transition, ensure_ascii=False) + "\n") # ========================== # MODELLO LLM # ========================== _model_cache: Dict[str, Tuple[Any, Any]] = {} def load_model(model_id: str) -> Tuple[Any, Any]: if model_id in _model_cache: return _model_cache[model_id] tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) _model_cache[model_id] = (tokenizer, model) return tokenizer, model def build_agent_prompt(agent: Agent, world_state: str) -> str: prompt = f""" Sei un agente in un mondo simulato su una griglia 2D. Identità: - Nome: {agent.name} - Ruolo: {agent.role} - Interessi: {agent.interests} Stato del mondo: {world_state} Compito: In una sola breve azione (1-3 frasi), descrivi cosa fai ora per interagire con questo mondo e con gli altri agenti. Puoi muoverti (su/giù/sinistra/destra), avvicinarti a qualcuno, esplorare, osservare. Sii coerente con il tuo ruolo e i tuoi interessi. Rispondi SOLO con l'azione, senza spiegazioni meta. """ return prompt.strip() def generate_action(agent: Agent, world_state: str, tokenizer, model) -> str: prompt = build_agent_prompt(agent, world_state) inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate( **inputs, max_new_tokens=80, do_sample=True, temperature=0.9, top_p=0.95, ) text = tokenizer.decode(outputs[0], skip_special_tokens=True) if text.startswith(prompt): action = text[len(prompt):].strip() else: action = text.strip() return action # ========================== # RENDERING SVG + HTML # ========================== def world_to_svg(world: World) -> str: size = WORLD_SIZE_PX cell = size // GRID_SIZE # auto-refresh HTML/JS: ricarica la pagina ogni secondo refresh_js = """ """ svg = [ refresh_js, f'' ] svg.append(""" """) for i in range(GRID_SIZE + 1): x = i * cell y = i * cell svg.append( f'' ) svg.append( f'' ) for agent in world.agents: ax = agent.x * cell ay = agent.y * cell cx = ax + cell / 2 cy = ay + cell / 2 base_color = "#22c55e" if agent.alive else "#f97316" svg.append( f'' ) svg.append( f'' ) svg.append( f'' f'{agent.name}' ) svg.append("") return "".join(svg) def world_to_html_cards(world: World) -> str: css = """ """ header = f"""
Mondo Simulato – Step {world.step}
Modello: {world.model_id} · Agenti: {len(world.agents)}
""" cards = [] for agent in world.agents: badge_class = "badge-alive" if agent.alive else "badge-dead" badge_text = "Vivo" if agent.alive else "Rinascita in corso" avg_reward = agent.total_reward / max(1, agent.lifetime_steps) card = f"""
{agent.name}
{agent.role}
{badge_text}
Pos: ({agent.x},{agent.y}) Step vita: {agent.lifetime_steps} Reward tot: {agent.total_reward:.2f} Reward medio: {avg_reward:.2f}
Interessi: {agent.interests}
Ultima azione
{agent.last_action or "Nessuna azione ancora."}
""" cards.append(card) grid = f'
{"".join(cards)}
' return css + header + grid # ========================== # STATO & SIMULAZIONE # ========================== GLOBAL_WORLD: World = World(model_id=DEFAULT_MODEL_ID, num_agents=DEFAULT_NUM_AGENTS) def simulation_loop(): global GLOBAL_WORLD, SIMULATION_RUNNING, SIMULATION_DELAY while True: if SIMULATION_RUNNING: GLOBAL_WORLD.step_world() time.sleep(max(0.1, SIMULATION_DELAY)) def serialize_world(world: World) -> dict: return { "model_id": world.model_id, "step": world.step, "agents": [a.to_dict() for a in world.agents], } # ========================== # FUNZIONI GRADIO # ========================== def ui_refresh(): svg = world_to_svg(GLOBAL_WORLD) html_cards = world_to_html_cards(GLOBAL_WORLD) state = serialize_world(GLOBAL_WORLD) return svg, html_cards, state def ui_reset(model_id: str, num_agents: int): global GLOBAL_WORLD GLOBAL_WORLD.reset_world(num_agents=num_agents, model_id=model_id) return ui_refresh() def ui_toggle_run(run: bool, delay: float): global SIMULATION_RUNNING, SIMULATION_DELAY SIMULATION_RUNNING = run SIMULATION_DELAY = delay return f"Simulazione: {'ON' if run else 'PAUSA'} · Delay: {delay:.2f}s" # ========================== # COSTRUZIONE INTERFACCIA # ========================== with gr.Blocks(title="AIWorld – Conway-Like LLM Life") as demo: gr.Markdown( """ # 🏆 AIWorld – Conway-Like LLM Life Un piccolo mondo simulato dove agenti LLM: - vivono su una griglia 2D, - hanno identità, ruoli, interessi e memoria, - generano azioni testuali, - si muovono nello spazio, - ricevono un reward semplice, - possono "morire" e rinascere con nuove identità. La simulazione gira in **background** in modo continuo. La vista si aggiorna automaticamente ogni ~1 secondo. """ ) with gr.Row(): with gr.Column(scale=1): model_id_in = gr.Textbox( label="Modello Hugging Face", value=DEFAULT_MODEL_ID, info="Esempio: Mattimax/DACMini-IT", ) num_agents_in = gr.Slider( label="Numero di agenti", minimum=1, maximum=30, step=1, value=DEFAULT_NUM_AGENTS, ) reset_btn = gr.Button("Reset mondo", variant="primary") run_toggle = gr.Checkbox( label="Simulazione in esecuzione", value=True, ) delay_slider = gr.Slider( label="Delay tra step (secondi)", minimum=0.2, maximum=5.0, step=0.2, value=SIMULATION_DELAY, ) status_text = gr.Markdown("Simulazione: ON") with gr.Column(scale=2): svg_out = gr.HTML(label="Mondo fisico (griglia)") html_out = gr.HTML(label="Dettagli agenti") state_out = gr.State(serialize_world(GLOBAL_WORLD)) reset_btn.click( fn=ui_reset, inputs=[model_id_in, num_agents_in], outputs=[svg_out, html_out, state_out], ) run_toggle.change( fn=ui_toggle_run, inputs=[run_toggle, delay_slider], outputs=[status_text], ) delay_slider.change( fn=ui_toggle_run, inputs=[run_toggle, delay_slider], outputs=[status_text], ) # avvio thread di simulazione in background threading.Thread(target=simulation_loop, daemon=True).start() if __name__ == "__main__": demo.launch()