Spaces:

MGalli
/

AIWorld

Sleeping

App Files Files Community

MGalli commited on Feb 2

Commit

dc82457

verified ·

1 Parent(s): da82af8

Update app.py

Browse files

Files changed (1) hide show

app.py +462 -165

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import json
 import random
 import threading
 import time
 from dataclasses import dataclass, asdict
@@ -14,18 +15,32 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 # CONFIGURAZIONE GLOBALE
 # ==========================
 DEFAULT_MODEL_ID = "Mattimax/DACMini-IT"
-DEFAULT_NUM_AGENTS = 5
 MEMORY_DIR = "memories"
 LOG_DIR = "logs"
 os.makedirs(MEMORY_DIR, exist_ok=True)
 os.makedirs(LOG_DIR, exist_ok=True)
-GRID_SIZE = 10          # griglia 10x10
-WORLD_SIZE_PX = 480     # dimensione SVG in pixel
 # Stato simulazione
 SIMULATION_RUNNING = True
 SIMULATION_DELAY = 1.0  # secondi tra uno step e l'altro
@@ -44,11 +59,15 @@ class Agent:
     memory_file: str
     x: int
     y: int
     lifetime_steps: int = 0
     total_reward: float = 0.0
     alive: bool = True
     last_action: str = ""
     last_reward: float = 0.0
     def to_dict(self) -> Dict[str, Any]:
         return asdict(self)
@@ -56,22 +75,74 @@ class Agent:
     def append_memory(self, text: str):
         with open(self.memory_file, "a", encoding="utf-8") as f:
             f.write(text + "\n")
 class World:
     def __init__(self, model_id: str = DEFAULT_MODEL_ID, num_agents: int = DEFAULT_NUM_AGENTS):
         self.model_id = model_id
         self.tokenizer, self.model = load_model(model_id)
-        self.step = 0
         self.agents: List[Agent] = []
-        self.initialize_agents(num_agents)
-    def initialize_agents(self, num_agents: int):
         self.agents = []
-        self.step = 0
         for i in range(num_agents):
             agent = self._create_random_agent(i)
             self.agents.append(agent)
     def _create_random_agent(self, idx: int) -> Agent:
         names = ["Astra", "Nexus", "Orion", "Lyra", "Helix", "Nova", "Echo", "Vega"]
@@ -95,9 +166,9 @@ class World:
             f.write(f"Ruolo: {role}\n")
             f.write(f"Interessi: {intr}\n\n")
-        # posizione iniziale casuale nella griglia
-        x = random.randint(0, GRID_SIZE - 1)
-        y = random.randint(0, GRID_SIZE - 1)
         return Agent(
             agent_id=idx,
@@ -107,125 +178,269 @@ class World:
             memory_file=memory_file,
             x=x,
             y=y,
         )
     def reset_world(self, num_agents: int, model_id: str):
         self.model_id = model_id
-        self.tokenizer, self.model = load_model(model_id)
-        self.initialize_agents(num_agents)
     def step_world(self) -> None:
-        """Esegue un tick di simulazione: ogni agente vivo genera un’azione e si muove."""
-        self.step += 1
         world_state_summary = self._build_world_state_summary()
         for agent in self.agents:
             if not agent.alive:
                 continue
-            # azione testuale
-            action = generate_action(
                 agent=agent,
                 world_state=world_state_summary,
                 tokenizer=self.tokenizer,
                 model=self.model,
             )
-            # movimento fisico semplice
-            self._update_agent_position(agent, action)
-            reward = self.compute_reward(agent, action, world_state_summary)
-            agent.last_action = action
             agent.last_reward = reward
             agent.lifetime_steps += 1
             agent.total_reward += reward
-            mem_entry = f"[Step {self.step}] Pos=({agent.x},{agent.y}) Azione: {action} | Reward: {reward:.3f}"
             agent.append_memory(mem_entry)
-            self.log_transition(agent, action, reward)
-            avg_reward = agent.total_reward / max(1, agent.lifetime_steps)
-            if avg_reward < -0.2 or agent.lifetime_steps > 80:
                 agent.alive = False
         # rinasciamo gli agenti morti
         for i, agent in enumerate(self.agents):
-            if not agent.alive:
                 self.agents[i] = self._create_random_agent(agent.agent_id)
-    def _update_agent_position(self, agent: Agent, action: str):
-        """
-        Movimento fisico molto semplice:
-        - se l’azione contiene parole chiave, muoviti in una direzione
-        - altrimenti random walk
-        """
-        dx, dy = 0, 0
-        text = action.lower()
-        # euristiche semplici
-        if "su" in text or "nord" in text:
-            dy = -1
-        elif "giu" in text or "giù" in text or "sud" in text:
-            dy = 1
-        elif "sinistra" in text or "ovest" in text:
-            dx = -1
-        elif "destra" in text or "est" in text:
-            dx = 1
-        elif "avvicino" in text or "raggiungo" in text:
-            # muoviti verso l’agente più vicino
-            target = self._closest_agent(agent)
-            if target is not None:
-                if target.x > agent.x:
-                    dx = 1
-                elif target.x < agent.x:
-                    dx = -1
-                if target.y > agent.y:
-                    dy = 1
-                elif target.y < agent.y:
-                    dy = -1
-        else:
-            # random walk
-            choice = random.choice([(1,0), (-1,0), (0,1), (0,-1), (0,0)])
-            dx, dy = choice
-        new_x = max(0, min(GRID_SIZE - 1, agent.x + dx))
-        new_y = max(0, min(GRID_SIZE - 1, agent.y + dy))
-        agent.x = new_x
-        agent.y = new_y
-    def _closest_agent(self, agent: Agent) -> Agent | None:
-        others = [a for a in self.agents if a.agent_id != agent.agent_id and a.alive]
-        if not others:
-            return None
-        best = None
-        best_dist = 9999
-        for o in others:
-            d = abs(o.x - agent.x) + abs(o.y - agent.y)
-            if d < best_dist:
-                best_dist = d
-                best = o
-        return best
     def _build_world_state_summary(self) -> str:
-        lines = [f"Step globale: {self.step}", "Stato degli agenti:"]
         for a in self.agents:
-            status = "vivo" if a.alive else "morto"
             lines.append(
-                f"- {a.name} ({a.role}), interessi: {a.interests}, "
-                f"posizione: ({a.x},{a.y}), step di vita: {a.lifetime_steps}, "
-                f"reward totale: {a.total_reward:.2f}, stato: {status}"
             )
         return "\n".join(lines)
-    def compute_reward(self, agent: Agent, action: str, world_state: str) -> float:
         """
-        Reward semplice:
-        - +1 se l’azione è abbastanza lunga (>= 20 caratteri)
-        - +1 se cita almeno un altro agente
-        - +0.5 se si è mosso (posizione cambiata rispetto allo step precedente, approssimato)
-        - -0.5 se è troppo corta
         """
         reward = 0.0
         length = len(action.strip())
@@ -239,7 +454,6 @@ class World:
         if any(name in action for name in other_names):
             reward += 1.0
-        # piccolo bonus per movimento (approssimato: se l’azione contiene parole di movimento)
         text = action.lower()
         if any(k in text for k in ["cammino", "muovo", "sposto", "avvicino", "raggiungo", "esploro"]):
             reward += 0.5
@@ -249,13 +463,17 @@ class World:
     def log_transition(self, agent: Agent, action: str, reward: float):
         log_path = os.path.join(LOG_DIR, f"agent_{agent.agent_id}_log.jsonl")
         transition = {
-            "step": self.step,
             "agent_id": agent.agent_id,
             "name": agent.name,
             "role": agent.role,
             "interests": agent.interests,
             "x": agent.x,
             "y": agent.y,
             "action": action,
             "reward": reward,
             "lifetime_steps": agent.lifetime_steps,
@@ -287,20 +505,23 @@ def build_agent_prompt(agent: Agent, world_state: str) -> str:
     prompt = f"""
 Sei un agente in un mondo simulato su una griglia 2D.
-Identità:
 - Nome: {agent.name}
-- Ruolo: {agent.role}
 - Interessi: {agent.interests}
-Stato del mondo:
 {world_state}
-Compito:
-In una sola breve azione (1-3 frasi), descrivi cosa fai ora per interagire con questo mondo
-e con gli altri agenti. Puoi muoverti (su/giù/sinistra/destra), avvicinarti a qualcuno, esplorare, osservare.
-Sii coerente con il tuo ruolo e i tuoi interessi.
-Rispondi SOLO con l'azione, senza spiegazioni meta.
 """
     return prompt.strip()
@@ -328,45 +549,76 @@ def generate_action(agent: Agent, world_state: str, tokenizer, model) -> str:
 # ==========================
 def world_to_svg(world: World) -> str:
-    size = WORLD_SIZE_PX
-    cell = size // GRID_SIZE
-    svg = [
-        f'<svg width="{size}" height="{size}" viewBox="0 0 {size} {size}" '
-        f'style="background:#020617;border-radius:12px;border:1px solid #1f2937;">'
     ]
     # griglia
-    for i in range(GRID_SIZE + 1):
-        x = i * cell
-        y = i * cell
-        svg.append(
-            f'<line x1="{x}" y1="0" x2="{x}" y2="{size}" stroke="#1f2937" stroke-width="1" />'
         )
-        svg.append(
-            f'<line x1="0" y1="{y}" x2="{size}" y2="{y}" stroke="#1f2937" stroke-width="1" />'
         )
-    # agenti
-    for agent in world.agents:
-        ax = agent.x * cell
-        ay = agent.y * cell
-        cx = ax + cell / 2
-        cy = ay + cell / 2
-        color = "#22c55e" if agent.alive else "#f97316"
-        svg.append(
-            f'<circle cx="{cx}" cy="{cy}" r="{cell*0.3}" fill="{color}" '
-            f'stroke="#e5e7eb" stroke-width="2" />'
         )
-        svg.append(
-            f'<text x="{cx}" y="{cy+4}" font-size="10" text-anchor="middle" '
-            f'fill="#e5e7eb" style="font-family:system-ui;">{agent.name}</text>'
         )
-    svg.append("</svg>")
-    return "".join(svg)
 def world_to_html_cards(world: World) -> str:
@@ -395,13 +647,13 @@ def world_to_html_cards(world: World) -> str:
     }
     .agents-grid {
         display: grid;
-        grid-template-columns: repeat(auto-fill, minmax(260px, 1fr));
-        gap: 10px;
     }
     .agent-card {
         background: radial-gradient(circle at top left, #1f2937, #020617);
         border-radius: 10px;
-        padding: 10px 12px;
         border: 1px solid rgba(148, 163, 184, 0.35);
         box-shadow: 0 8px 20px rgba(15, 23, 42, 0.8);
     }
@@ -409,7 +661,7 @@ def world_to_html_cards(world: World) -> str:
         display: flex;
         justify-content: space-between;
         align-items: center;
-        margin-bottom: 6px;
     }
     .agent-name {
         font-size: 16px;
@@ -422,7 +674,7 @@ def world_to_html_cards(world: World) -> str:
         letter-spacing: 0.06em;
     }
     .agent-badge {
-        padding: 3px 7px;
         border-radius: 999px;
         font-size: 10px;
         font-weight: 500;
@@ -437,35 +689,51 @@ def world_to_html_cards(world: World) -> str:
         color: #fecaca;
         border: 1px solid rgba(248, 113, 113, 0.6);
     }
-    .agent-meta {
         font-size: 11px;
-        color: #e5e7eb;
-        margin-bottom: 6px;
     }
-    .agent-meta span {
-        display: inline-block;
-        margin-right: 8px;
     }
     .agent-interests {
         font-size: 11px;
         color: #9ca3af;
-        margin-bottom: 6px;
     }
     .agent-action {
         font-size: 12px;
         color: #e5e7eb;
         background: rgba(15, 23, 42, 0.7);
         border-radius: 8px;
-        padding: 6px;
         border: 1px solid rgba(55, 65, 81, 0.8);
-        min-height: 32px;
     }
     .agent-action-label {
         font-size: 10px;
         text-transform: uppercase;
         letter-spacing: 0.08em;
         color: #9ca3af;
-        margin-bottom: 3px;
     }
     </style>
     """
@@ -473,9 +741,10 @@ def world_to_html_cards(world: World) -> str:
     header = f"""
     <div class="world-header">
         <div>
-            <div class="world-title">Mondo Simulato – Step {world.step}</div>
             <div class="world-subtitle">
-                Modello: <code>{world.model_id}</code> · Agenti: {len(world.agents)}
             </div>
         </div>
     </div>
@@ -484,26 +753,51 @@ def world_to_html_cards(world: World) -> str:
     cards = []
     for agent in world.agents:
         badge_class = "badge-alive" if agent.alive else "badge-dead"
-        badge_text = "Vivo" if agent.alive else "Rinascita in corso"
         avg_reward = agent.total_reward / max(1, agent.lifetime_steps)
         card = f"""
         <div class="agent-card">
             <div class="agent-header">
                 <div>
                     <div class="agent-name">{agent.name}</div>
                     <div class="agent-role">{agent.role}</div>
                 </div>
                 <div class="agent-badge {badge_class}">{badge_text}</div>
             </div>
-            <div class="agent-meta">
-                <span>Pos: <strong>({agent.x},{agent.y})</strong></span>
-                <span>Step vita: <strong>{agent.lifetime_steps}</strong></span>
-                <span>Reward tot: <strong>{agent.total_reward:.2f}</strong></span>
-                <span>Reward medio: <strong>{avg_reward:.2f}</strong></span>
             </div>
             <div class="agent-interests">
                 <strong>Interessi:</strong> {agent.interests}
             </div>
             <div class="agent-action">
                 <div class="agent-action-label">Ultima azione</div>
                 <div>{agent.last_action or "<i>Nessuna azione ancora.</i>"}</div>
@@ -535,8 +829,12 @@ def simulation_loop():
 def serialize_world(world: World) -> dict:
     return {
         "model_id": world.model_id,
-        "step": world.step,
         "agents": [a.to_dict() for a in world.agents],
     }
@@ -571,15 +869,14 @@ def ui_toggle_run(run: bool, delay: float):
 with gr.Blocks(title="AIWorld – Conway-Like LLM Life") as demo:
     gr.Markdown(
         """
-# 🏆 AIWorld – Conway-Like LLM Life
-Un piccolo mondo simulato dove agenti LLM:
-- vivono su una griglia 2D,
-- hanno identità, ruoli, interessi e memoria,
-- generano azioni testuali,
-- si muovono nello spazio,
-- ricevono un reward semplice,
-- possono "morire" e rinascere con nuove identità.
 La simulazione gira in **background** in modo continuo.
 """
@@ -617,7 +914,7 @@ La simulazione gira in **background** in modo continuo.
             refresh_btn = gr.Button("Aggiorna vista")
         with gr.Column(scale=2):
-            svg_out = gr.HTML(label="Mondo fisico (griglia)")
             html_out = gr.HTML(label="Dettagli agenti")
             state_out = gr.State(serialize_world(GLOBAL_WORLD))
@@ -650,4 +947,4 @@ La simulazione gira in **background** in modo continuo.
 threading.Thread(target=simulation_loop, daemon=True).start()
 if __name__ == "__main__":
-    demo.launch()

 import os
 import json
 import random
+import math
 import threading
 import time
 from dataclasses import dataclass, asdict
 # CONFIGURAZIONE GLOBALE
 # ==========================
+GRID_W = 20
+GRID_H = 20
+CELL_SIZE = 24  # pixel per cella
 DEFAULT_MODEL_ID = "Mattimax/DACMini-IT"
+DEFAULT_NUM_AGENTS = 10
+# RL rewards
+FOOD_REWARD = 1.0
+DANGER_PENALTY = -1.0
+RESOURCE_REWARD = 0.2
+BUILD_REWARD = 0.5
+COOP_GIVE_REWARD = 0.3
+COOP_RECEIVE_REWARD = 0.1
+DEATH_PENALTY = -0.5
+STEP_COST = -0.001
+COOP_RADIUS = 2.0        # in celle (distanza euclidea)
+INTERACT_RADIUS = 1.0
+DANGER_RADIUS = 1.0
 MEMORY_DIR = "memories"
 LOG_DIR = "logs"
 os.makedirs(MEMORY_DIR, exist_ok=True)
 os.makedirs(LOG_DIR, exist_ok=True)
 # Stato simulazione
 SIMULATION_RUNNING = True
 SIMULATION_DELAY = 1.0  # secondi tra uno step e l'altro
     memory_file: str
     x: int
     y: int
+    energy: float = 1.0
+    wood: int = 0
+    stone: int = 0
     lifetime_steps: int = 0
     total_reward: float = 0.0
     alive: bool = True
     last_action: str = ""
     last_reward: float = 0.0
+    physical_role: str = "gatherer"  # 'gatherer' o 'builder' per meccaniche fisiche
     def to_dict(self) -> Dict[str, Any]:
         return asdict(self)
     def append_memory(self, text: str):
         with open(self.memory_file, "a", encoding="utf-8") as f:
             f.write(text + "\n")
+    def distance2(self, x: int, y: int) -> float:
+        return (self.x - x) ** 2 + (self.y - y) ** 2
+    def move_towards(self, tx: int, ty: int):
+        dx = tx - self.x
+        dy = ty - self.y
+        if abs(dx) > abs(dy):
+            self.x += 1 if dx > 0 else -1
+        elif dy != 0:
+            self.y += 1 if dy > 0 else -1
+        self.x = max(0, min(GRID_W - 1, self.x))
+        self.y = max(0, min(GRID_H - 1, self.y))
+    def move_away(self, tx: int, ty: int):
+        dx = self.x - tx
+        dy = self.y - ty
+        if abs(dx) > abs(dy):
+            self.x += 1 if dx > 0 else -1
+        elif dy != 0:
+            self.y += 1 if dy > 0 else -1
+        self.x = max(0, min(GRID_W - 1, self.x))
+        self.y = max(0, min(GRID_H - 1, self.y))
+    def move_random(self):
+        direction = random.choice([(1,0),(-1,0),(0,1),(0,-1),(0,0)])
+        self.x = max(0, min(GRID_W - 1, self.x + direction[0]))
+        self.y = max(0, min(GRID_H - 1, self.y + direction[1]))
 class World:
     def __init__(self, model_id: str = DEFAULT_MODEL_ID, num_agents: int = DEFAULT_NUM_AGENTS):
         self.model_id = model_id
         self.tokenizer, self.model = load_model(model_id)
+        self.step_count = 0
         self.agents: List[Agent] = []
+        self.food: List[Tuple[int, int]] = []
+        self.danger: List[Tuple[int, int]] = []
+        self.resources: List[Tuple[int, int, str]] = []
+        self.structures: List[Tuple[int, int]] = []
+        self.initialize_world(num_agents)
+    def initialize_world(self, num_agents: int):
+        # Inizializza agenti
         self.agents = []
+        self.step_count = 0
         for i in range(num_agents):
             agent = self._create_random_agent(i)
             self.agents.append(agent)
+        # Inizializza risorse del mondo
+        self.food = [
+            (random.randint(0, GRID_W - 1), random.randint(0, GRID_H - 1))
+            for _ in range(25)
+        ]
+        self.danger = [
+            (random.randint(0, GRID_W - 1), random.randint(0, GRID_H - 1))
+            for _ in range(12)
+        ]
+        self.resources = [
+            (
+                random.randint(0, GRID_W - 1),
+                random.randint(0, GRID_H - 1),
+                random.choice(["wood", "stone"]),
+            )
+            for _ in range(20)
+        ]
+        self.structures = []
     def _create_random_agent(self, idx: int) -> Agent:
         names = ["Astra", "Nexus", "Orion", "Lyra", "Helix", "Nova", "Echo", "Vega"]
             f.write(f"Ruolo: {role}\n")
             f.write(f"Interessi: {intr}\n\n")
+        # Posizione iniziale casuale
+        x = random.randint(0, GRID_W - 1)
+        y = random.randint(0, GRID_H - 1)
         return Agent(
             agent_id=idx,
             memory_file=memory_file,
             x=x,
             y=y,
+            physical_role=random.choice(["gatherer", "builder"]),
         )
     def reset_world(self, num_agents: int, model_id: str):
         self.model_id = model_id
+        if model_id != self.model_id:
+            self.tokenizer, self.model = load_model(model_id)
+        self.initialize_world(num_agents)
+    # ---------- NEIGHBORHOOD UTILS ----------
+    def nearest_food(self, agent: Agent):
+        best = None
+        best_d2 = float("inf")
+        for (fx, fy) in self.food:
+            d2 = agent.distance2(fx, fy)
+            if d2 < best_d2:
+                best_d2 = d2
+                best = (fx, fy, d2)
+        return best
+    def nearest_danger(self, agent: Agent):
+        best = None
+        best_d2 = float("inf")
+        for (dx, dy) in self.danger:
+            d2 = agent.distance2(dx, dy)
+            if d2 < best_d2:
+                best_d2 = d2
+                best = (dx, dy, d2)
+        return best
+    def nearest_resource(self, agent: Agent):
+        best = None
+        best_d2 = float("inf")
+        for (rx, ry, rtype) in self.resources:
+            d2 = agent.distance2(rx, ry)
+            if d2 < best_d2:
+                best_d2 = d2
+                best = (rx, ry, rtype, d2)
+        return best
+    def nearest_ally(self, agent: Agent):
+        best = None
+        best_d2 = float("inf")
+        for other in self.agents:
+            if other is agent:
+                continue
+            d2 = agent.distance2(other.x, other.y)
+            if d2 < best_d2:
+                best_d2 = d2
+                best = (other, d2)
+        return best
+    # ---------- POLICY MISTA: LLM + EURISTICA ----------
+    def choose_physical_action(self, agent: Agent):
+        """
+        Azioni discrete per la fisica del mondo:
+        0: wander
+        1: move_to_food
+        2: move_away_from_danger
+        3: move_to_resource
+        4: move_to_ally_for_coop
+        5: build_here
+        """
+        food = self.nearest_food(agent)
+        danger = self.nearest_danger(agent)
+        res = self.nearest_resource(agent)
+        ally = self.nearest_ally(agent)
+        # pericolo vicino
+        if danger and danger[2] <= DANGER_RADIUS ** 2:
+            return 2, danger
+        # energia bassa → cibo
+        if agent.energy < 0.5 and food:
+            return 1, food
+        # builder con risorse → costruisci
+        if agent.physical_role == "builder" and agent.wood >= 2 and agent.stone >= 1:
+            return 5, None
+        # risorse disponibili → vai a risorse
+        if res:
+            return 3, res
+        # cooperazione se vicino ad alleato e con surplus
+        if ally and ally[1] <= COOP_RADIUS ** 2:
+            other, _ = ally
+            if (agent.wood > other.wood + 1) or (agent.stone > other.stone + 1):
+                return 4, ally
+        # default
+        return 0, None
+    # ---------- STEP DI SIMULAZIONE ----------
     def step_world(self) -> None:
+        self.step_count += 1
         world_state_summary = self._build_world_state_summary()
+        new_food = []
+        new_resources = []
+        food_used = [False] * len(self.food)
+        res_used = [False] * len(self.resources)
+        # prima passata: movimento + interazioni locali
         for agent in self.agents:
             if not agent.alive:
                 continue
+            # Genera azione testuale dall'LLM
+            text_action = generate_action(
                 agent=agent,
                 world_state=world_state_summary,
                 tokenizer=self.tokenizer,
                 model=self.model,
             )
+            # Scegli azione fisica basata su euristica RL
+            phys_action, phys_info = self.choose_physical_action(agent)
+            reward = 0.0
+            reward += STEP_COST
+            agent.energy -= 0.02
+            # Esegui azione fisica
+            if phys_action == 1 and phys_info:  # move_to_food
+                fx, fy, _ = phys_info
+                agent.move_towards(fx, fy)
+                agent.last_action = f"Vado verso cibo a ({fx},{fy}) - {text_action}"
+            elif phys_action == 2 and phys_info:  # move_away_from_danger
+                dx, dy, _ = phys_info
+                agent.move_away(dx, dy)
+                agent.last_action = f"Scappo da pericolo a ({dx},{dy}) - {text_action}"
+            elif phys_action == 3 and phys_info:  # move_to_resource
+                rx, ry, rtype, d2 = phys_info
+                if d2 > INTERACT_RADIUS ** 2:
+                    agent.move_towards(rx, ry)
+                agent.last_action = f"Raccolgo {rtype} a ({rx},{ry}) - {text_action}"
+            elif phys_action == 4 and phys_info:  # move_to_ally_for_coop
+                other, d2 = phys_info
+                if d2 > INTERACT_RADIUS ** 2:
+                    agent.move_towards(other.x, other.y)
+                agent.last_action = f"Coopero con {other.name} - {text_action}"
+            elif phys_action == 5:
+                agent.last_action = f"Costruisco struttura - {text_action}"
+            else:
+                agent.move_random()
+                agent.last_action = f"Esploro - {text_action}"
+            # pericoli
+            for (dx, dy) in self.danger:
+                if agent.distance2(dx, dy) <= DANGER_RADIUS ** 2:
+                    reward += DANGER_PENALTY
+                    agent.energy -= 0.2
+            # cibo
+            for i, (fx, fy) in enumerate(self.food):
+                if not food_used[i] and agent.distance2(fx, fy) <= INTERACT_RADIUS ** 2:
+                    food_used[i] = True
+                    reward += FOOD_REWARD
+                    agent.energy = min(1.0, agent.energy + 0.5)
+            # risorse
+            for i, (rx, ry, rtype) in enumerate(self.resources):
+                if not res_used[i] and agent.distance2(rx, ry) <= INTERACT_RADIUS ** 2:
+                    res_used[i] = True
+                    reward += RESOURCE_REWARD
+                    if rtype == "wood":
+                        agent.wood += 1
+                    else:
+                        agent.stone += 1
+            # costruzione
+            if phys_action == 5 and agent.wood >= 2 and agent.stone >= 1:
+                self.structures.append((agent.x, agent.y))
+                agent.wood -= 2
+                agent.stone -= 1
+                reward += BUILD_REWARD
+            # Reward per azione testuale
+            text_reward = self.compute_text_reward(agent, text_action, world_state_summary)
+            reward += text_reward
             agent.last_reward = reward
             agent.lifetime_steps += 1
             agent.total_reward += reward
+            # Salva in memoria
+            mem_entry = f"[Step {self.step_count}] Pos=({agent.x},{agent.y}) Azione: {agent.last_action} | Reward: {reward:.3f}"
             agent.append_memory(mem_entry)
+        # cooperazione (seconda passata)
+        for agent in self.agents:
+            if not agent.alive:
+                continue
+            ally_info = self.nearest_ally(agent)
+            if not ally_info:
+                continue
+            other, d2 = ally_info
+            if d2 <= COOP_RADIUS ** 2:
+                if agent.wood > other.wood + 1:
+                    agent.wood -= 1
+                    other.wood += 1
+                    agent.last_reward += COOP_GIVE_REWARD
+                    other.last_reward += COOP_RECEIVE_REWARD
+                if agent.stone > other.stone + 1:
+                    agent.stone -= 1
+                    other.stone += 1
+                    agent.last_reward += COOP_GIVE_REWARD
+                    other.last_reward += COOP_RECEIVE_REWARD
+        # cibo e risorse rimanenti
+        for i, (fx, fy) in enumerate(self.food):
+            if not food_used[i]:
+                new_food.append((fx, fy))
+        for i, (rx, ry, rtype) in enumerate(self.resources):
+            if not res_used[i]:
+                new_resources.append((rx, ry, rtype))
+        self.food = new_food
+        self.resources = new_resources
+        # respawn agenti morti
+        for agent in self.agents:
+            if agent.energy <= 0:
                 agent.alive = False
+                agent.last_action = "MORTO - attendo rinascita"
+                agent.last_reward += DEATH_PENALTY
         # rinasciamo gli agenti morti
         for i, agent in enumerate(self.agents):
+            if not agent.alive and self.step_count % 10 == 0:  # Rinasce ogni 10 step
                 self.agents[i] = self._create_random_agent(agent.agent_id)
     def _build_world_state_summary(self) -> str:
+        lines = [
+            f"Step globale: {self.step_count}",
+            f"Grid: {GRID_W}x{GRID_H}",
+            "STATO DEL MONDO FISICO:",
+            f"- Cibo disponibile: {len(self.food)} unità",
+            f"- Pericoli attivi: {len(self.danger)}",
+            f"- Risorse: {len(self.resources)} (legno/pietra)",
+            f"- Strutture costruite: {len(self.structures)}",
+            "",
+            "STATO DEGLI AGENTI:"
+        ]
         for a in self.agents:
+            status = "VIVO" if a.alive else "MORTO"
             lines.append(
+                f"- {a.name} ({a.role}) - Ruolo fisico: {a.physical_role}, "
+                f"Pos: ({a.x},{a.y}), Energia: {a.energy:.2f}, "
+                f"Legno: {a.wood}, Pietra: {a.stone}, "
+                f"Reward tot: {a.total_reward:.2f}, Stato: {status}"
             )
         return "\n".join(lines)
+    def compute_text_reward(self, agent: Agent, action: str, world_state: str) -> float:
         """
+        Reward per l'azione testuale (dal secondo codice)
         """
         reward = 0.0
         length = len(action.strip())
         if any(name in action for name in other_names):
             reward += 1.0
         text = action.lower()
         if any(k in text for k in ["cammino", "muovo", "sposto", "avvicino", "raggiungo", "esploro"]):
             reward += 0.5
     def log_transition(self, agent: Agent, action: str, reward: float):
         log_path = os.path.join(LOG_DIR, f"agent_{agent.agent_id}_log.jsonl")
         transition = {
+            "step": self.step_count,
             "agent_id": agent.agent_id,
             "name": agent.name,
             "role": agent.role,
+            "physical_role": agent.physical_role,
             "interests": agent.interests,
             "x": agent.x,
             "y": agent.y,
+            "energy": agent.energy,
+            "wood": agent.wood,
+            "stone": agent.stone,
             "action": action,
             "reward": reward,
             "lifetime_steps": agent.lifetime_steps,
     prompt = f"""
 Sei un agente in un mondo simulato su una griglia 2D.
+IDENTITÀ:
 - Nome: {agent.name}
+- Ruolo mentale: {agent.role}
 - Interessi: {agent.interests}
+- Ruolo fisico: {agent.physical_role}
+STATO DEL MONDO:
 {world_state}
+COMPITO:
+Descrivi in 1-3 frasi cosa fai ora per interagire con questo mondo.
+Considera che:
+1. Hai energia: {agent.energy:.2f}, legno: {agent.wood}, pietra: {agent.stone}
+2. Puoi muoverti, raccogliere cibo/risorse, evitare pericoli, costruire, cooperare
+3. Il tuo ruolo fisico è: {agent.physical_role}
+Rispondi SOLO con l'azione descrittiva, senza spiegazioni meta.
 """
     return prompt.strip()
 # ==========================
 def world_to_svg(world: World) -> str:
+    width = GRID_W * CELL_SIZE
+    height = GRID_H * CELL_SIZE
+    svg_parts = [
+        f'<svg width="{width}" height="{height}" viewBox="0 0 {width} {height}" '
+        f'style="background:#020617;border:1px solid #444;font-family:monospace;">'
     ]
     # griglia
+    for x in range(GRID_W):
+        for y in range(GRID_H):
+            px = x * CELL_SIZE
+            py = y * CELL_SIZE
+            svg_parts.append(
+                f'<rect x="{px}" y="{py}" width="{CELL_SIZE}" height="{CELL_SIZE}" '
+                f'style="fill:#020617;stroke:#1f2937;stroke-width:1" />'
+            )
+    # cibo (giallo)
+    for (fx, fy) in world.food:
+        px = fx * CELL_SIZE + CELL_SIZE / 2
+        py = fy * CELL_SIZE + CELL_SIZE / 2
+        svg_parts.append(
+            f'<circle cx="{px}" cy="{py}" r="{CELL_SIZE/5}" fill="#ffd700" />'
         )
+    # pericoli (rosso)
+    for (dx, dy) in world.danger:
+        px = dx * CELL_SIZE + CELL_SIZE / 2
+        py = dy * CELL_SIZE + CELL_SIZE / 2
+        svg_parts.append(
+            f'<rect x="{px - CELL_SIZE/4}" y="{py - CELL_SIZE/4}" '
+            f'width="{CELL_SIZE/2}" height="{CELL_SIZE/2}" fill="#ff3333" />'
         )
+    # risorse (blu)
+    for (rx, ry, rtype) in world.resources:
+        px = rx * CELL_SIZE + CELL_SIZE / 2
+        py = ry * CELL_SIZE + CELL_SIZE / 2
+        color = "#33aaff" if rtype == "wood" else "#3366ff"
+        svg_parts.append(
+            f'<circle cx="{px}" cy="{py}" r="{CELL_SIZE/6}" fill="{color}" />'
+        )
+    # strutture (bianco)
+    for (sx, sy) in world.structures:
+        px = sx * CELL_SIZE + CELL_SIZE / 2
+        py = sy * CELL_SIZE + CELL_SIZE / 2
+        svg_parts.append(
+            f'<rect x="{px - CELL_SIZE/4}" y="{py - CELL_SIZE/4}" '
+            f'width="{CELL_SIZE/2}" height="{CELL_SIZE/2}" fill="#ffffff" />'
+        )
+    # agenti
+    for a in world.agents:
+        if not a.alive:
+            continue
+        px = a.x * CELL_SIZE + CELL_SIZE / 2
+        py = a.y * CELL_SIZE + CELL_SIZE / 2
+        color = "#00ff7f" if a.physical_role == "gatherer" else "#00e5ff"
+        svg_parts.append(
+            f'<circle cx="{px}" cy="{py}" r="{CELL_SIZE/3}" fill="{color}" stroke="#e5e7eb" stroke-width="2" />'
         )
+        svg_parts.append(
+            f'<text x="{px}" y="{py+4}" font-size="10" text-anchor="middle" '
+            f'fill="#e5e7eb" style="font-family:monospace;">{a.name}</text>'
         )
+    svg_parts.append("</svg>")
+    return "".join(svg_parts)
 def world_to_html_cards(world: World) -> str:
     }
     .agents-grid {
         display: grid;
+        grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
+        gap: 12px;
     }
     .agent-card {
         background: radial-gradient(circle at top left, #1f2937, #020617);
         border-radius: 10px;
+        padding: 12px 14px;
         border: 1px solid rgba(148, 163, 184, 0.35);
         box-shadow: 0 8px 20px rgba(15, 23, 42, 0.8);
     }
         display: flex;
         justify-content: space-between;
         align-items: center;
+        margin-bottom: 8px;
     }
     .agent-name {
         font-size: 16px;
         letter-spacing: 0.06em;
     }
     .agent-badge {
+        padding: 3px 8px;
         border-radius: 999px;
         font-size: 10px;
         font-weight: 500;
         color: #fecaca;
         border: 1px solid rgba(248, 113, 113, 0.6);
     }
+    .agent-physical-role {
         font-size: 11px;
+        color: #c4b5fd;
+        margin-bottom: 4px;
+    }
+    .agent-stats {
+        display: grid;
+        grid-template-columns: repeat(2, 1fr);
+        gap: 6px;
+        margin-bottom: 8px;
+        font-size: 11px;
+    }
+    .stat-item {
+        background: rgba(15, 23, 42, 0.7);
+        padding: 4px 8px;
+        border-radius: 6px;
+        border: 1px solid rgba(55, 65, 81, 0.8);
+    }
+    .stat-label {
+        color: #9ca3af;
     }
+    .stat-value {
+        color: #e5e7eb;
+        font-weight: 600;
     }
     .agent-interests {
         font-size: 11px;
         color: #9ca3af;
+        margin-bottom: 8px;
     }
     .agent-action {
         font-size: 12px;
         color: #e5e7eb;
         background: rgba(15, 23, 42, 0.7);
         border-radius: 8px;
+        padding: 8px;
         border: 1px solid rgba(55, 65, 81, 0.8);
+        min-height: 40px;
     }
     .agent-action-label {
         font-size: 10px;
         text-transform: uppercase;
         letter-spacing: 0.08em;
         color: #9ca3af;
+        margin-bottom: 4px;
     }
     </style>
     """
     header = f"""
     <div class="world-header">
         <div>
+            <div class="world-title">Mondo Simulato – Step {world.step_count}</div>
             <div class="world-subtitle">
+                Modello: <code>{world.model_id}</code> · Agenti: {len([a for a in world.agents if a.alive])}/{len(world.agents)} ·
+                Cibo: {len(world.food)} · Risorse: {len(world.resources)} · Strutture: {len(world.structures)}
             </div>
         </div>
     </div>
     cards = []
     for agent in world.agents:
         badge_class = "badge-alive" if agent.alive else "badge-dead"
+        badge_text = "VIVO" if agent.alive else "MORTO"
         avg_reward = agent.total_reward / max(1, agent.lifetime_steps)
         card = f"""
         <div class="agent-card">
             <div class="agent-header">
                 <div>
                     <div class="agent-name">{agent.name}</div>
                     <div class="agent-role">{agent.role}</div>
+                    <div class="agent-physical-role">Ruolo fisico: {agent.physical_role}</div>
                 </div>
                 <div class="agent-badge {badge_class}">{badge_text}</div>
             </div>
+            <div class="agent-stats">
+                <div class="stat-item">
+                    <div class="stat-label">Posizione</div>
+                    <div class="stat-value">({agent.x},{agent.y})</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-label">Energia</div>
+                    <div class="stat-value">{agent.energy:.2f}</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-label">Legno</div>
+                    <div class="stat-value">{agent.wood}</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-label">Pietra</div>
+                    <div class="stat-value">{agent.stone}</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-label">Step vita</div>
+                    <div class="stat-value">{agent.lifetime_steps}</div>
+                </div>
+                <div class="stat-item">
+                    <div class="stat-label">Reward step</div>
+                    <div class="stat-value">{agent.last_reward:.3f}</div>
+                </div>
             </div>
             <div class="agent-interests">
                 <strong>Interessi:</strong> {agent.interests}
             </div>
             <div class="agent-action">
                 <div class="agent-action-label">Ultima azione</div>
                 <div>{agent.last_action or "<i>Nessuna azione ancora.</i>"}</div>
 def serialize_world(world: World) -> dict:
     return {
         "model_id": world.model_id,
+        "step": world.step_count,
         "agents": [a.to_dict() for a in world.agents],
+        "food": world.food,
+        "danger": world.danger,
+        "resources": world.resources,
+        "structures": world.structures,
     }
 with gr.Blocks(title="AIWorld – Conway-Like LLM Life") as demo:
     gr.Markdown(
         """
+# 🏆 AIWorld – Conway-Like LLM Life (Versione Ibrida)
+Un mondo simulato ibrido dove agenti LLM:
+- Hanno identità, ruoli mentali e interessi (LLM)
+- Interagiscono con un mondo fisico con cibo, pericoli, risorse e strutture (sistema RL)
+- Generano azioni testuali descrittive basate sulla loro situazione
+- Ricevono reward sia per le azioni testuali che per le interazioni fisiche
+- Possono morire e rinascere con nuove identità
 La simulazione gira in **background** in modo continuo.
 """
             refresh_btn = gr.Button("Aggiorna vista")
         with gr.Column(scale=2):
+            svg_out = gr.HTML(label="Mondo fisico (griglia 20x20)")
             html_out = gr.HTML(label="Dettagli agenti")
             state_out = gr.State(serialize_world(GLOBAL_WORLD))
 threading.Thread(target=simulation_loop, daemon=True).start()
 if __name__ == "__main__":
+    demo.launch()