MGalli commited on
Commit
dc82457
·
verified ·
1 Parent(s): da82af8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +462 -165
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import json
3
  import random
 
4
  import threading
5
  import time
6
  from dataclasses import dataclass, asdict
@@ -14,18 +15,32 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
14
  # CONFIGURAZIONE GLOBALE
15
  # ==========================
16
 
 
 
 
 
17
  DEFAULT_MODEL_ID = "Mattimax/DACMini-IT"
18
- DEFAULT_NUM_AGENTS = 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  MEMORY_DIR = "memories"
21
  LOG_DIR = "logs"
22
-
23
  os.makedirs(MEMORY_DIR, exist_ok=True)
24
  os.makedirs(LOG_DIR, exist_ok=True)
25
 
26
- GRID_SIZE = 10 # griglia 10x10
27
- WORLD_SIZE_PX = 480 # dimensione SVG in pixel
28
-
29
  # Stato simulazione
30
  SIMULATION_RUNNING = True
31
  SIMULATION_DELAY = 1.0 # secondi tra uno step e l'altro
@@ -44,11 +59,15 @@ class Agent:
44
  memory_file: str
45
  x: int
46
  y: int
 
 
 
47
  lifetime_steps: int = 0
48
  total_reward: float = 0.0
49
  alive: bool = True
50
  last_action: str = ""
51
  last_reward: float = 0.0
 
52
 
53
  def to_dict(self) -> Dict[str, Any]:
54
  return asdict(self)
@@ -56,22 +75,74 @@ class Agent:
56
  def append_memory(self, text: str):
57
  with open(self.memory_file, "a", encoding="utf-8") as f:
58
  f.write(text + "\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
 
61
  class World:
62
  def __init__(self, model_id: str = DEFAULT_MODEL_ID, num_agents: int = DEFAULT_NUM_AGENTS):
63
  self.model_id = model_id
64
  self.tokenizer, self.model = load_model(model_id)
65
- self.step = 0
66
  self.agents: List[Agent] = []
67
- self.initialize_agents(num_agents)
68
-
69
- def initialize_agents(self, num_agents: int):
 
 
 
 
 
70
  self.agents = []
71
- self.step = 0
72
  for i in range(num_agents):
73
  agent = self._create_random_agent(i)
74
  self.agents.append(agent)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  def _create_random_agent(self, idx: int) -> Agent:
77
  names = ["Astra", "Nexus", "Orion", "Lyra", "Helix", "Nova", "Echo", "Vega"]
@@ -95,9 +166,9 @@ class World:
95
  f.write(f"Ruolo: {role}\n")
96
  f.write(f"Interessi: {intr}\n\n")
97
 
98
- # posizione iniziale casuale nella griglia
99
- x = random.randint(0, GRID_SIZE - 1)
100
- y = random.randint(0, GRID_SIZE - 1)
101
 
102
  return Agent(
103
  agent_id=idx,
@@ -107,125 +178,269 @@ class World:
107
  memory_file=memory_file,
108
  x=x,
109
  y=y,
 
110
  )
111
 
112
  def reset_world(self, num_agents: int, model_id: str):
113
  self.model_id = model_id
114
- self.tokenizer, self.model = load_model(model_id)
115
- self.initialize_agents(num_agents)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  def step_world(self) -> None:
118
- """Esegue un tick di simulazione: ogni agente vivo genera un’azione e si muove."""
119
- self.step += 1
120
  world_state_summary = self._build_world_state_summary()
 
 
 
 
121
 
 
122
  for agent in self.agents:
123
  if not agent.alive:
124
  continue
125
 
126
- # azione testuale
127
- action = generate_action(
128
  agent=agent,
129
  world_state=world_state_summary,
130
  tokenizer=self.tokenizer,
131
  model=self.model,
132
  )
133
 
134
- # movimento fisico semplice
135
- self._update_agent_position(agent, action)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
- reward = self.compute_reward(agent, action, world_state_summary)
138
-
139
- agent.last_action = action
140
  agent.last_reward = reward
141
  agent.lifetime_steps += 1
142
  agent.total_reward += reward
143
 
144
- mem_entry = f"[Step {self.step}] Pos=({agent.x},{agent.y}) Azione: {action} | Reward: {reward:.3f}"
 
145
  agent.append_memory(mem_entry)
146
 
147
- self.log_transition(agent, action, reward)
148
-
149
- avg_reward = agent.total_reward / max(1, agent.lifetime_steps)
150
- if avg_reward < -0.2 or agent.lifetime_steps > 80:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  agent.alive = False
 
 
152
 
153
  # rinasciamo gli agenti morti
154
  for i, agent in enumerate(self.agents):
155
- if not agent.alive:
156
  self.agents[i] = self._create_random_agent(agent.agent_id)
157
 
158
- def _update_agent_position(self, agent: Agent, action: str):
159
- """
160
- Movimento fisico molto semplice:
161
- - se l’azione contiene parole chiave, muoviti in una direzione
162
- - altrimenti random walk
163
- """
164
- dx, dy = 0, 0
165
- text = action.lower()
166
-
167
- # euristiche semplici
168
- if "su" in text or "nord" in text:
169
- dy = -1
170
- elif "giu" in text or "giù" in text or "sud" in text:
171
- dy = 1
172
- elif "sinistra" in text or "ovest" in text:
173
- dx = -1
174
- elif "destra" in text or "est" in text:
175
- dx = 1
176
- elif "avvicino" in text or "raggiungo" in text:
177
- # muoviti verso l’agente più vicino
178
- target = self._closest_agent(agent)
179
- if target is not None:
180
- if target.x > agent.x:
181
- dx = 1
182
- elif target.x < agent.x:
183
- dx = -1
184
- if target.y > agent.y:
185
- dy = 1
186
- elif target.y < agent.y:
187
- dy = -1
188
- else:
189
- # random walk
190
- choice = random.choice([(1,0), (-1,0), (0,1), (0,-1), (0,0)])
191
- dx, dy = choice
192
-
193
- new_x = max(0, min(GRID_SIZE - 1, agent.x + dx))
194
- new_y = max(0, min(GRID_SIZE - 1, agent.y + dy))
195
- agent.x = new_x
196
- agent.y = new_y
197
-
198
- def _closest_agent(self, agent: Agent) -> Agent | None:
199
- others = [a for a in self.agents if a.agent_id != agent.agent_id and a.alive]
200
- if not others:
201
- return None
202
- best = None
203
- best_dist = 9999
204
- for o in others:
205
- d = abs(o.x - agent.x) + abs(o.y - agent.y)
206
- if d < best_dist:
207
- best_dist = d
208
- best = o
209
- return best
210
-
211
  def _build_world_state_summary(self) -> str:
212
- lines = [f"Step globale: {self.step}", "Stato degli agenti:"]
 
 
 
 
 
 
 
 
 
 
 
213
  for a in self.agents:
214
- status = "vivo" if a.alive else "morto"
215
  lines.append(
216
- f"- {a.name} ({a.role}), interessi: {a.interests}, "
217
- f"posizione: ({a.x},{a.y}), step di vita: {a.lifetime_steps}, "
218
- f"reward totale: {a.total_reward:.2f}, stato: {status}"
 
219
  )
 
220
  return "\n".join(lines)
221
 
222
- def compute_reward(self, agent: Agent, action: str, world_state: str) -> float:
223
  """
224
- Reward semplice:
225
- - +1 se l’azione è abbastanza lunga (>= 20 caratteri)
226
- - +1 se cita almeno un altro agente
227
- - +0.5 se si è mosso (posizione cambiata rispetto allo step precedente, approssimato)
228
- - -0.5 se è troppo corta
229
  """
230
  reward = 0.0
231
  length = len(action.strip())
@@ -239,7 +454,6 @@ class World:
239
  if any(name in action for name in other_names):
240
  reward += 1.0
241
 
242
- # piccolo bonus per movimento (approssimato: se l’azione contiene parole di movimento)
243
  text = action.lower()
244
  if any(k in text for k in ["cammino", "muovo", "sposto", "avvicino", "raggiungo", "esploro"]):
245
  reward += 0.5
@@ -249,13 +463,17 @@ class World:
249
  def log_transition(self, agent: Agent, action: str, reward: float):
250
  log_path = os.path.join(LOG_DIR, f"agent_{agent.agent_id}_log.jsonl")
251
  transition = {
252
- "step": self.step,
253
  "agent_id": agent.agent_id,
254
  "name": agent.name,
255
  "role": agent.role,
 
256
  "interests": agent.interests,
257
  "x": agent.x,
258
  "y": agent.y,
 
 
 
259
  "action": action,
260
  "reward": reward,
261
  "lifetime_steps": agent.lifetime_steps,
@@ -287,20 +505,23 @@ def build_agent_prompt(agent: Agent, world_state: str) -> str:
287
  prompt = f"""
288
  Sei un agente in un mondo simulato su una griglia 2D.
289
 
290
- Identità:
291
  - Nome: {agent.name}
292
- - Ruolo: {agent.role}
293
  - Interessi: {agent.interests}
 
294
 
295
- Stato del mondo:
296
  {world_state}
297
 
298
- Compito:
299
- In una sola breve azione (1-3 frasi), descrivi cosa fai ora per interagire con questo mondo
300
- e con gli altri agenti. Puoi muoverti (su/giù/sinistra/destra), avvicinarti a qualcuno, esplorare, osservare.
301
- Sii coerente con il tuo ruolo e i tuoi interessi.
 
 
302
 
303
- Rispondi SOLO con l'azione, senza spiegazioni meta.
304
  """
305
  return prompt.strip()
306
 
@@ -328,45 +549,76 @@ def generate_action(agent: Agent, world_state: str, tokenizer, model) -> str:
328
  # ==========================
329
 
330
  def world_to_svg(world: World) -> str:
331
- size = WORLD_SIZE_PX
332
- cell = size // GRID_SIZE
333
 
334
- svg = [
335
- f'<svg width="{size}" height="{size}" viewBox="0 0 {size} {size}" '
336
- f'style="background:#020617;border-radius:12px;border:1px solid #1f2937;">'
337
  ]
338
 
339
  # griglia
340
- for i in range(GRID_SIZE + 1):
341
- x = i * cell
342
- y = i * cell
343
- svg.append(
344
- f'<line x1="{x}" y1="0" x2="{x}" y2="{size}" stroke="#1f2937" stroke-width="1" />'
 
 
 
 
 
 
 
 
 
 
345
  )
346
- svg.append(
347
- f'<line x1="0" y1="{y}" x2="{size}" y2="{y}" stroke="#1f2937" stroke-width="1" />'
 
 
 
 
 
 
348
  )
349
 
350
- # agenti
351
- for agent in world.agents:
352
- ax = agent.x * cell
353
- ay = agent.y * cell
354
- cx = ax + cell / 2
355
- cy = ay + cell / 2
 
 
356
 
357
- color = "#22c55e" if agent.alive else "#f97316"
 
 
 
 
 
 
 
358
 
359
- svg.append(
360
- f'<circle cx="{cx}" cy="{cy}" r="{cell*0.3}" fill="{color}" '
361
- f'stroke="#e5e7eb" stroke-width="2" />'
 
 
 
 
 
 
362
  )
363
- svg.append(
364
- f'<text x="{cx}" y="{cy+4}" font-size="10" text-anchor="middle" '
365
- f'fill="#e5e7eb" style="font-family:system-ui;">{agent.name}</text>'
366
  )
367
 
368
- svg.append("</svg>")
369
- return "".join(svg)
370
 
371
 
372
  def world_to_html_cards(world: World) -> str:
@@ -395,13 +647,13 @@ def world_to_html_cards(world: World) -> str:
395
  }
396
  .agents-grid {
397
  display: grid;
398
- grid-template-columns: repeat(auto-fill, minmax(260px, 1fr));
399
- gap: 10px;
400
  }
401
  .agent-card {
402
  background: radial-gradient(circle at top left, #1f2937, #020617);
403
  border-radius: 10px;
404
- padding: 10px 12px;
405
  border: 1px solid rgba(148, 163, 184, 0.35);
406
  box-shadow: 0 8px 20px rgba(15, 23, 42, 0.8);
407
  }
@@ -409,7 +661,7 @@ def world_to_html_cards(world: World) -> str:
409
  display: flex;
410
  justify-content: space-between;
411
  align-items: center;
412
- margin-bottom: 6px;
413
  }
414
  .agent-name {
415
  font-size: 16px;
@@ -422,7 +674,7 @@ def world_to_html_cards(world: World) -> str:
422
  letter-spacing: 0.06em;
423
  }
424
  .agent-badge {
425
- padding: 3px 7px;
426
  border-radius: 999px;
427
  font-size: 10px;
428
  font-weight: 500;
@@ -437,35 +689,51 @@ def world_to_html_cards(world: World) -> str:
437
  color: #fecaca;
438
  border: 1px solid rgba(248, 113, 113, 0.6);
439
  }
440
- .agent-meta {
441
  font-size: 11px;
442
- color: #e5e7eb;
443
- margin-bottom: 6px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
444
  }
445
- .agent-meta span {
446
- display: inline-block;
447
- margin-right: 8px;
448
  }
449
  .agent-interests {
450
  font-size: 11px;
451
  color: #9ca3af;
452
- margin-bottom: 6px;
453
  }
454
  .agent-action {
455
  font-size: 12px;
456
  color: #e5e7eb;
457
  background: rgba(15, 23, 42, 0.7);
458
  border-radius: 8px;
459
- padding: 6px;
460
  border: 1px solid rgba(55, 65, 81, 0.8);
461
- min-height: 32px;
462
  }
463
  .agent-action-label {
464
  font-size: 10px;
465
  text-transform: uppercase;
466
  letter-spacing: 0.08em;
467
  color: #9ca3af;
468
- margin-bottom: 3px;
469
  }
470
  </style>
471
  """
@@ -473,9 +741,10 @@ def world_to_html_cards(world: World) -> str:
473
  header = f"""
474
  <div class="world-header">
475
  <div>
476
- <div class="world-title">Mondo Simulato – Step {world.step}</div>
477
  <div class="world-subtitle">
478
- Modello: <code>{world.model_id}</code> · Agenti: {len(world.agents)}
 
479
  </div>
480
  </div>
481
  </div>
@@ -484,26 +753,51 @@ def world_to_html_cards(world: World) -> str:
484
  cards = []
485
  for agent in world.agents:
486
  badge_class = "badge-alive" if agent.alive else "badge-dead"
487
- badge_text = "Vivo" if agent.alive else "Rinascita in corso"
488
  avg_reward = agent.total_reward / max(1, agent.lifetime_steps)
 
489
  card = f"""
490
  <div class="agent-card">
491
  <div class="agent-header">
492
  <div>
493
  <div class="agent-name">{agent.name}</div>
494
  <div class="agent-role">{agent.role}</div>
 
495
  </div>
496
  <div class="agent-badge {badge_class}">{badge_text}</div>
497
  </div>
498
- <div class="agent-meta">
499
- <span>Pos: <strong>({agent.x},{agent.y})</strong></span>
500
- <span>Step vita: <strong>{agent.lifetime_steps}</strong></span>
501
- <span>Reward tot: <strong>{agent.total_reward:.2f}</strong></span>
502
- <span>Reward medio: <strong>{avg_reward:.2f}</strong></span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
  </div>
 
504
  <div class="agent-interests">
505
  <strong>Interessi:</strong> {agent.interests}
506
  </div>
 
507
  <div class="agent-action">
508
  <div class="agent-action-label">Ultima azione</div>
509
  <div>{agent.last_action or "<i>Nessuna azione ancora.</i>"}</div>
@@ -535,8 +829,12 @@ def simulation_loop():
535
  def serialize_world(world: World) -> dict:
536
  return {
537
  "model_id": world.model_id,
538
- "step": world.step,
539
  "agents": [a.to_dict() for a in world.agents],
 
 
 
 
540
  }
541
 
542
 
@@ -571,15 +869,14 @@ def ui_toggle_run(run: bool, delay: float):
571
  with gr.Blocks(title="AIWorld – Conway-Like LLM Life") as demo:
572
  gr.Markdown(
573
  """
574
- # 🏆 AIWorld – Conway-Like LLM Life
575
 
576
- Un piccolo mondo simulato dove agenti LLM:
577
- - vivono su una griglia 2D,
578
- - hanno identità, ruoli, interessi e memoria,
579
- - generano azioni testuali,
580
- - si muovono nello spazio,
581
- - ricevono un reward semplice,
582
- - possono "morire" e rinascere con nuove identità.
583
 
584
  La simulazione gira in **background** in modo continuo.
585
  """
@@ -617,7 +914,7 @@ La simulazione gira in **background** in modo continuo.
617
  refresh_btn = gr.Button("Aggiorna vista")
618
 
619
  with gr.Column(scale=2):
620
- svg_out = gr.HTML(label="Mondo fisico (griglia)")
621
  html_out = gr.HTML(label="Dettagli agenti")
622
  state_out = gr.State(serialize_world(GLOBAL_WORLD))
623
 
@@ -650,4 +947,4 @@ La simulazione gira in **background** in modo continuo.
650
  threading.Thread(target=simulation_loop, daemon=True).start()
651
 
652
  if __name__ == "__main__":
653
- demo.launch()
 
1
  import os
2
  import json
3
  import random
4
+ import math
5
  import threading
6
  import time
7
  from dataclasses import dataclass, asdict
 
15
  # CONFIGURAZIONE GLOBALE
16
  # ==========================
17
 
18
+ GRID_W = 20
19
+ GRID_H = 20
20
+ CELL_SIZE = 24 # pixel per cella
21
+
22
  DEFAULT_MODEL_ID = "Mattimax/DACMini-IT"
23
+ DEFAULT_NUM_AGENTS = 10
24
+
25
+ # RL rewards
26
+ FOOD_REWARD = 1.0
27
+ DANGER_PENALTY = -1.0
28
+ RESOURCE_REWARD = 0.2
29
+ BUILD_REWARD = 0.5
30
+ COOP_GIVE_REWARD = 0.3
31
+ COOP_RECEIVE_REWARD = 0.1
32
+ DEATH_PENALTY = -0.5
33
+ STEP_COST = -0.001
34
+
35
+ COOP_RADIUS = 2.0 # in celle (distanza euclidea)
36
+ INTERACT_RADIUS = 1.0
37
+ DANGER_RADIUS = 1.0
38
 
39
  MEMORY_DIR = "memories"
40
  LOG_DIR = "logs"
 
41
  os.makedirs(MEMORY_DIR, exist_ok=True)
42
  os.makedirs(LOG_DIR, exist_ok=True)
43
 
 
 
 
44
  # Stato simulazione
45
  SIMULATION_RUNNING = True
46
  SIMULATION_DELAY = 1.0 # secondi tra uno step e l'altro
 
59
  memory_file: str
60
  x: int
61
  y: int
62
+ energy: float = 1.0
63
+ wood: int = 0
64
+ stone: int = 0
65
  lifetime_steps: int = 0
66
  total_reward: float = 0.0
67
  alive: bool = True
68
  last_action: str = ""
69
  last_reward: float = 0.0
70
+ physical_role: str = "gatherer" # 'gatherer' o 'builder' per meccaniche fisiche
71
 
72
  def to_dict(self) -> Dict[str, Any]:
73
  return asdict(self)
 
75
  def append_memory(self, text: str):
76
  with open(self.memory_file, "a", encoding="utf-8") as f:
77
  f.write(text + "\n")
78
+
79
+ def distance2(self, x: int, y: int) -> float:
80
+ return (self.x - x) ** 2 + (self.y - y) ** 2
81
+
82
+ def move_towards(self, tx: int, ty: int):
83
+ dx = tx - self.x
84
+ dy = ty - self.y
85
+ if abs(dx) > abs(dy):
86
+ self.x += 1 if dx > 0 else -1
87
+ elif dy != 0:
88
+ self.y += 1 if dy > 0 else -1
89
+ self.x = max(0, min(GRID_W - 1, self.x))
90
+ self.y = max(0, min(GRID_H - 1, self.y))
91
+
92
+ def move_away(self, tx: int, ty: int):
93
+ dx = self.x - tx
94
+ dy = self.y - ty
95
+ if abs(dx) > abs(dy):
96
+ self.x += 1 if dx > 0 else -1
97
+ elif dy != 0:
98
+ self.y += 1 if dy > 0 else -1
99
+ self.x = max(0, min(GRID_W - 1, self.x))
100
+ self.y = max(0, min(GRID_H - 1, self.y))
101
+
102
+ def move_random(self):
103
+ direction = random.choice([(1,0),(-1,0),(0,1),(0,-1),(0,0)])
104
+ self.x = max(0, min(GRID_W - 1, self.x + direction[0]))
105
+ self.y = max(0, min(GRID_H - 1, self.y + direction[1]))
106
 
107
 
108
  class World:
109
  def __init__(self, model_id: str = DEFAULT_MODEL_ID, num_agents: int = DEFAULT_NUM_AGENTS):
110
  self.model_id = model_id
111
  self.tokenizer, self.model = load_model(model_id)
112
+ self.step_count = 0
113
  self.agents: List[Agent] = []
114
+ self.food: List[Tuple[int, int]] = []
115
+ self.danger: List[Tuple[int, int]] = []
116
+ self.resources: List[Tuple[int, int, str]] = []
117
+ self.structures: List[Tuple[int, int]] = []
118
+ self.initialize_world(num_agents)
119
+
120
+ def initialize_world(self, num_agents: int):
121
+ # Inizializza agenti
122
  self.agents = []
123
+ self.step_count = 0
124
  for i in range(num_agents):
125
  agent = self._create_random_agent(i)
126
  self.agents.append(agent)
127
+
128
+ # Inizializza risorse del mondo
129
+ self.food = [
130
+ (random.randint(0, GRID_W - 1), random.randint(0, GRID_H - 1))
131
+ for _ in range(25)
132
+ ]
133
+ self.danger = [
134
+ (random.randint(0, GRID_W - 1), random.randint(0, GRID_H - 1))
135
+ for _ in range(12)
136
+ ]
137
+ self.resources = [
138
+ (
139
+ random.randint(0, GRID_W - 1),
140
+ random.randint(0, GRID_H - 1),
141
+ random.choice(["wood", "stone"]),
142
+ )
143
+ for _ in range(20)
144
+ ]
145
+ self.structures = []
146
 
147
  def _create_random_agent(self, idx: int) -> Agent:
148
  names = ["Astra", "Nexus", "Orion", "Lyra", "Helix", "Nova", "Echo", "Vega"]
 
166
  f.write(f"Ruolo: {role}\n")
167
  f.write(f"Interessi: {intr}\n\n")
168
 
169
+ # Posizione iniziale casuale
170
+ x = random.randint(0, GRID_W - 1)
171
+ y = random.randint(0, GRID_H - 1)
172
 
173
  return Agent(
174
  agent_id=idx,
 
178
  memory_file=memory_file,
179
  x=x,
180
  y=y,
181
+ physical_role=random.choice(["gatherer", "builder"]),
182
  )
183
 
184
  def reset_world(self, num_agents: int, model_id: str):
185
  self.model_id = model_id
186
+ if model_id != self.model_id:
187
+ self.tokenizer, self.model = load_model(model_id)
188
+ self.initialize_world(num_agents)
189
+
190
+ # ---------- NEIGHBORHOOD UTILS ----------
191
+
192
+ def nearest_food(self, agent: Agent):
193
+ best = None
194
+ best_d2 = float("inf")
195
+ for (fx, fy) in self.food:
196
+ d2 = agent.distance2(fx, fy)
197
+ if d2 < best_d2:
198
+ best_d2 = d2
199
+ best = (fx, fy, d2)
200
+ return best
201
+
202
+ def nearest_danger(self, agent: Agent):
203
+ best = None
204
+ best_d2 = float("inf")
205
+ for (dx, dy) in self.danger:
206
+ d2 = agent.distance2(dx, dy)
207
+ if d2 < best_d2:
208
+ best_d2 = d2
209
+ best = (dx, dy, d2)
210
+ return best
211
+
212
+ def nearest_resource(self, agent: Agent):
213
+ best = None
214
+ best_d2 = float("inf")
215
+ for (rx, ry, rtype) in self.resources:
216
+ d2 = agent.distance2(rx, ry)
217
+ if d2 < best_d2:
218
+ best_d2 = d2
219
+ best = (rx, ry, rtype, d2)
220
+ return best
221
+
222
+ def nearest_ally(self, agent: Agent):
223
+ best = None
224
+ best_d2 = float("inf")
225
+ for other in self.agents:
226
+ if other is agent:
227
+ continue
228
+ d2 = agent.distance2(other.x, other.y)
229
+ if d2 < best_d2:
230
+ best_d2 = d2
231
+ best = (other, d2)
232
+ return best
233
+
234
+ # ---------- POLICY MISTA: LLM + EURISTICA ----------
235
+
236
+ def choose_physical_action(self, agent: Agent):
237
+ """
238
+ Azioni discrete per la fisica del mondo:
239
+ 0: wander
240
+ 1: move_to_food
241
+ 2: move_away_from_danger
242
+ 3: move_to_resource
243
+ 4: move_to_ally_for_coop
244
+ 5: build_here
245
+ """
246
+ food = self.nearest_food(agent)
247
+ danger = self.nearest_danger(agent)
248
+ res = self.nearest_resource(agent)
249
+ ally = self.nearest_ally(agent)
250
+
251
+ # pericolo vicino
252
+ if danger and danger[2] <= DANGER_RADIUS ** 2:
253
+ return 2, danger
254
+
255
+ # energia bassa → cibo
256
+ if agent.energy < 0.5 and food:
257
+ return 1, food
258
+
259
+ # builder con risorse → costruisci
260
+ if agent.physical_role == "builder" and agent.wood >= 2 and agent.stone >= 1:
261
+ return 5, None
262
+
263
+ # risorse disponibili → vai a risorse
264
+ if res:
265
+ return 3, res
266
+
267
+ # cooperazione se vicino ad alleato e con surplus
268
+ if ally and ally[1] <= COOP_RADIUS ** 2:
269
+ other, _ = ally
270
+ if (agent.wood > other.wood + 1) or (agent.stone > other.stone + 1):
271
+ return 4, ally
272
+
273
+ # default
274
+ return 0, None
275
+
276
+ # ---------- STEP DI SIMULAZIONE ----------
277
 
278
  def step_world(self) -> None:
279
+ self.step_count += 1
280
+
281
  world_state_summary = self._build_world_state_summary()
282
+ new_food = []
283
+ new_resources = []
284
+ food_used = [False] * len(self.food)
285
+ res_used = [False] * len(self.resources)
286
 
287
+ # prima passata: movimento + interazioni locali
288
  for agent in self.agents:
289
  if not agent.alive:
290
  continue
291
 
292
+ # Genera azione testuale dall'LLM
293
+ text_action = generate_action(
294
  agent=agent,
295
  world_state=world_state_summary,
296
  tokenizer=self.tokenizer,
297
  model=self.model,
298
  )
299
 
300
+ # Scegli azione fisica basata su euristica RL
301
+ phys_action, phys_info = self.choose_physical_action(agent)
302
+
303
+ reward = 0.0
304
+ reward += STEP_COST
305
+ agent.energy -= 0.02
306
+
307
+ # Esegui azione fisica
308
+ if phys_action == 1 and phys_info: # move_to_food
309
+ fx, fy, _ = phys_info
310
+ agent.move_towards(fx, fy)
311
+ agent.last_action = f"Vado verso cibo a ({fx},{fy}) - {text_action}"
312
+ elif phys_action == 2 and phys_info: # move_away_from_danger
313
+ dx, dy, _ = phys_info
314
+ agent.move_away(dx, dy)
315
+ agent.last_action = f"Scappo da pericolo a ({dx},{dy}) - {text_action}"
316
+ elif phys_action == 3 and phys_info: # move_to_resource
317
+ rx, ry, rtype, d2 = phys_info
318
+ if d2 > INTERACT_RADIUS ** 2:
319
+ agent.move_towards(rx, ry)
320
+ agent.last_action = f"Raccolgo {rtype} a ({rx},{ry}) - {text_action}"
321
+ elif phys_action == 4 and phys_info: # move_to_ally_for_coop
322
+ other, d2 = phys_info
323
+ if d2 > INTERACT_RADIUS ** 2:
324
+ agent.move_towards(other.x, other.y)
325
+ agent.last_action = f"Coopero con {other.name} - {text_action}"
326
+ elif phys_action == 5:
327
+ agent.last_action = f"Costruisco struttura - {text_action}"
328
+ else:
329
+ agent.move_random()
330
+ agent.last_action = f"Esploro - {text_action}"
331
+
332
+ # pericoli
333
+ for (dx, dy) in self.danger:
334
+ if agent.distance2(dx, dy) <= DANGER_RADIUS ** 2:
335
+ reward += DANGER_PENALTY
336
+ agent.energy -= 0.2
337
+
338
+ # cibo
339
+ for i, (fx, fy) in enumerate(self.food):
340
+ if not food_used[i] and agent.distance2(fx, fy) <= INTERACT_RADIUS ** 2:
341
+ food_used[i] = True
342
+ reward += FOOD_REWARD
343
+ agent.energy = min(1.0, agent.energy + 0.5)
344
+
345
+ # risorse
346
+ for i, (rx, ry, rtype) in enumerate(self.resources):
347
+ if not res_used[i] and agent.distance2(rx, ry) <= INTERACT_RADIUS ** 2:
348
+ res_used[i] = True
349
+ reward += RESOURCE_REWARD
350
+ if rtype == "wood":
351
+ agent.wood += 1
352
+ else:
353
+ agent.stone += 1
354
+
355
+ # costruzione
356
+ if phys_action == 5 and agent.wood >= 2 and agent.stone >= 1:
357
+ self.structures.append((agent.x, agent.y))
358
+ agent.wood -= 2
359
+ agent.stone -= 1
360
+ reward += BUILD_REWARD
361
+
362
+ # Reward per azione testuale
363
+ text_reward = self.compute_text_reward(agent, text_action, world_state_summary)
364
+ reward += text_reward
365
 
 
 
 
366
  agent.last_reward = reward
367
  agent.lifetime_steps += 1
368
  agent.total_reward += reward
369
 
370
+ # Salva in memoria
371
+ mem_entry = f"[Step {self.step_count}] Pos=({agent.x},{agent.y}) Azione: {agent.last_action} | Reward: {reward:.3f}"
372
  agent.append_memory(mem_entry)
373
 
374
+ # cooperazione (seconda passata)
375
+ for agent in self.agents:
376
+ if not agent.alive:
377
+ continue
378
+ ally_info = self.nearest_ally(agent)
379
+ if not ally_info:
380
+ continue
381
+ other, d2 = ally_info
382
+ if d2 <= COOP_RADIUS ** 2:
383
+ if agent.wood > other.wood + 1:
384
+ agent.wood -= 1
385
+ other.wood += 1
386
+ agent.last_reward += COOP_GIVE_REWARD
387
+ other.last_reward += COOP_RECEIVE_REWARD
388
+ if agent.stone > other.stone + 1:
389
+ agent.stone -= 1
390
+ other.stone += 1
391
+ agent.last_reward += COOP_GIVE_REWARD
392
+ other.last_reward += COOP_RECEIVE_REWARD
393
+
394
+ # cibo e risorse rimanenti
395
+ for i, (fx, fy) in enumerate(self.food):
396
+ if not food_used[i]:
397
+ new_food.append((fx, fy))
398
+ for i, (rx, ry, rtype) in enumerate(self.resources):
399
+ if not res_used[i]:
400
+ new_resources.append((rx, ry, rtype))
401
+
402
+ self.food = new_food
403
+ self.resources = new_resources
404
+
405
+ # respawn agenti morti
406
+ for agent in self.agents:
407
+ if agent.energy <= 0:
408
  agent.alive = False
409
+ agent.last_action = "MORTO - attendo rinascita"
410
+ agent.last_reward += DEATH_PENALTY
411
 
412
  # rinasciamo gli agenti morti
413
  for i, agent in enumerate(self.agents):
414
+ if not agent.alive and self.step_count % 10 == 0: # Rinasce ogni 10 step
415
  self.agents[i] = self._create_random_agent(agent.agent_id)
416
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
417
  def _build_world_state_summary(self) -> str:
418
+ lines = [
419
+ f"Step globale: {self.step_count}",
420
+ f"Grid: {GRID_W}x{GRID_H}",
421
+ "STATO DEL MONDO FISICO:",
422
+ f"- Cibo disponibile: {len(self.food)} unità",
423
+ f"- Pericoli attivi: {len(self.danger)}",
424
+ f"- Risorse: {len(self.resources)} (legno/pietra)",
425
+ f"- Strutture costruite: {len(self.structures)}",
426
+ "",
427
+ "STATO DEGLI AGENTI:"
428
+ ]
429
+
430
  for a in self.agents:
431
+ status = "VIVO" if a.alive else "MORTO"
432
  lines.append(
433
+ f"- {a.name} ({a.role}) - Ruolo fisico: {a.physical_role}, "
434
+ f"Pos: ({a.x},{a.y}), Energia: {a.energy:.2f}, "
435
+ f"Legno: {a.wood}, Pietra: {a.stone}, "
436
+ f"Reward tot: {a.total_reward:.2f}, Stato: {status}"
437
  )
438
+
439
  return "\n".join(lines)
440
 
441
+ def compute_text_reward(self, agent: Agent, action: str, world_state: str) -> float:
442
  """
443
+ Reward per l'azione testuale (dal secondo codice)
 
 
 
 
444
  """
445
  reward = 0.0
446
  length = len(action.strip())
 
454
  if any(name in action for name in other_names):
455
  reward += 1.0
456
 
 
457
  text = action.lower()
458
  if any(k in text for k in ["cammino", "muovo", "sposto", "avvicino", "raggiungo", "esploro"]):
459
  reward += 0.5
 
463
  def log_transition(self, agent: Agent, action: str, reward: float):
464
  log_path = os.path.join(LOG_DIR, f"agent_{agent.agent_id}_log.jsonl")
465
  transition = {
466
+ "step": self.step_count,
467
  "agent_id": agent.agent_id,
468
  "name": agent.name,
469
  "role": agent.role,
470
+ "physical_role": agent.physical_role,
471
  "interests": agent.interests,
472
  "x": agent.x,
473
  "y": agent.y,
474
+ "energy": agent.energy,
475
+ "wood": agent.wood,
476
+ "stone": agent.stone,
477
  "action": action,
478
  "reward": reward,
479
  "lifetime_steps": agent.lifetime_steps,
 
505
  prompt = f"""
506
  Sei un agente in un mondo simulato su una griglia 2D.
507
 
508
+ IDENTITÀ:
509
  - Nome: {agent.name}
510
+ - Ruolo mentale: {agent.role}
511
  - Interessi: {agent.interests}
512
+ - Ruolo fisico: {agent.physical_role}
513
 
514
+ STATO DEL MONDO:
515
  {world_state}
516
 
517
+ COMPITO:
518
+ Descrivi in 1-3 frasi cosa fai ora per interagire con questo mondo.
519
+ Considera che:
520
+ 1. Hai energia: {agent.energy:.2f}, legno: {agent.wood}, pietra: {agent.stone}
521
+ 2. Puoi muoverti, raccogliere cibo/risorse, evitare pericoli, costruire, cooperare
522
+ 3. Il tuo ruolo fisico è: {agent.physical_role}
523
 
524
+ Rispondi SOLO con l'azione descrittiva, senza spiegazioni meta.
525
  """
526
  return prompt.strip()
527
 
 
549
  # ==========================
550
 
551
  def world_to_svg(world: World) -> str:
552
+ width = GRID_W * CELL_SIZE
553
+ height = GRID_H * CELL_SIZE
554
 
555
+ svg_parts = [
556
+ f'<svg width="{width}" height="{height}" viewBox="0 0 {width} {height}" '
557
+ f'style="background:#020617;border:1px solid #444;font-family:monospace;">'
558
  ]
559
 
560
  # griglia
561
+ for x in range(GRID_W):
562
+ for y in range(GRID_H):
563
+ px = x * CELL_SIZE
564
+ py = y * CELL_SIZE
565
+ svg_parts.append(
566
+ f'<rect x="{px}" y="{py}" width="{CELL_SIZE}" height="{CELL_SIZE}" '
567
+ f'style="fill:#020617;stroke:#1f2937;stroke-width:1" />'
568
+ )
569
+
570
+ # cibo (giallo)
571
+ for (fx, fy) in world.food:
572
+ px = fx * CELL_SIZE + CELL_SIZE / 2
573
+ py = fy * CELL_SIZE + CELL_SIZE / 2
574
+ svg_parts.append(
575
+ f'<circle cx="{px}" cy="{py}" r="{CELL_SIZE/5}" fill="#ffd700" />'
576
  )
577
+
578
+ # pericoli (rosso)
579
+ for (dx, dy) in world.danger:
580
+ px = dx * CELL_SIZE + CELL_SIZE / 2
581
+ py = dy * CELL_SIZE + CELL_SIZE / 2
582
+ svg_parts.append(
583
+ f'<rect x="{px - CELL_SIZE/4}" y="{py - CELL_SIZE/4}" '
584
+ f'width="{CELL_SIZE/2}" height="{CELL_SIZE/2}" fill="#ff3333" />'
585
  )
586
 
587
+ # risorse (blu)
588
+ for (rx, ry, rtype) in world.resources:
589
+ px = rx * CELL_SIZE + CELL_SIZE / 2
590
+ py = ry * CELL_SIZE + CELL_SIZE / 2
591
+ color = "#33aaff" if rtype == "wood" else "#3366ff"
592
+ svg_parts.append(
593
+ f'<circle cx="{px}" cy="{py}" r="{CELL_SIZE/6}" fill="{color}" />'
594
+ )
595
 
596
+ # strutture (bianco)
597
+ for (sx, sy) in world.structures:
598
+ px = sx * CELL_SIZE + CELL_SIZE / 2
599
+ py = sy * CELL_SIZE + CELL_SIZE / 2
600
+ svg_parts.append(
601
+ f'<rect x="{px - CELL_SIZE/4}" y="{py - CELL_SIZE/4}" '
602
+ f'width="{CELL_SIZE/2}" height="{CELL_SIZE/2}" fill="#ffffff" />'
603
+ )
604
 
605
+ # agenti
606
+ for a in world.agents:
607
+ if not a.alive:
608
+ continue
609
+ px = a.x * CELL_SIZE + CELL_SIZE / 2
610
+ py = a.y * CELL_SIZE + CELL_SIZE / 2
611
+ color = "#00ff7f" if a.physical_role == "gatherer" else "#00e5ff"
612
+ svg_parts.append(
613
+ f'<circle cx="{px}" cy="{py}" r="{CELL_SIZE/3}" fill="{color}" stroke="#e5e7eb" stroke-width="2" />'
614
  )
615
+ svg_parts.append(
616
+ f'<text x="{px}" y="{py+4}" font-size="10" text-anchor="middle" '
617
+ f'fill="#e5e7eb" style="font-family:monospace;">{a.name}</text>'
618
  )
619
 
620
+ svg_parts.append("</svg>")
621
+ return "".join(svg_parts)
622
 
623
 
624
  def world_to_html_cards(world: World) -> str:
 
647
  }
648
  .agents-grid {
649
  display: grid;
650
+ grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
651
+ gap: 12px;
652
  }
653
  .agent-card {
654
  background: radial-gradient(circle at top left, #1f2937, #020617);
655
  border-radius: 10px;
656
+ padding: 12px 14px;
657
  border: 1px solid rgba(148, 163, 184, 0.35);
658
  box-shadow: 0 8px 20px rgba(15, 23, 42, 0.8);
659
  }
 
661
  display: flex;
662
  justify-content: space-between;
663
  align-items: center;
664
+ margin-bottom: 8px;
665
  }
666
  .agent-name {
667
  font-size: 16px;
 
674
  letter-spacing: 0.06em;
675
  }
676
  .agent-badge {
677
+ padding: 3px 8px;
678
  border-radius: 999px;
679
  font-size: 10px;
680
  font-weight: 500;
 
689
  color: #fecaca;
690
  border: 1px solid rgba(248, 113, 113, 0.6);
691
  }
692
+ .agent-physical-role {
693
  font-size: 11px;
694
+ color: #c4b5fd;
695
+ margin-bottom: 4px;
696
+ }
697
+ .agent-stats {
698
+ display: grid;
699
+ grid-template-columns: repeat(2, 1fr);
700
+ gap: 6px;
701
+ margin-bottom: 8px;
702
+ font-size: 11px;
703
+ }
704
+ .stat-item {
705
+ background: rgba(15, 23, 42, 0.7);
706
+ padding: 4px 8px;
707
+ border-radius: 6px;
708
+ border: 1px solid rgba(55, 65, 81, 0.8);
709
+ }
710
+ .stat-label {
711
+ color: #9ca3af;
712
  }
713
+ .stat-value {
714
+ color: #e5e7eb;
715
+ font-weight: 600;
716
  }
717
  .agent-interests {
718
  font-size: 11px;
719
  color: #9ca3af;
720
+ margin-bottom: 8px;
721
  }
722
  .agent-action {
723
  font-size: 12px;
724
  color: #e5e7eb;
725
  background: rgba(15, 23, 42, 0.7);
726
  border-radius: 8px;
727
+ padding: 8px;
728
  border: 1px solid rgba(55, 65, 81, 0.8);
729
+ min-height: 40px;
730
  }
731
  .agent-action-label {
732
  font-size: 10px;
733
  text-transform: uppercase;
734
  letter-spacing: 0.08em;
735
  color: #9ca3af;
736
+ margin-bottom: 4px;
737
  }
738
  </style>
739
  """
 
741
  header = f"""
742
  <div class="world-header">
743
  <div>
744
+ <div class="world-title">Mondo Simulato – Step {world.step_count}</div>
745
  <div class="world-subtitle">
746
+ Modello: <code>{world.model_id}</code> · Agenti: {len([a for a in world.agents if a.alive])}/{len(world.agents)} ·
747
+ Cibo: {len(world.food)} · Risorse: {len(world.resources)} · Strutture: {len(world.structures)}
748
  </div>
749
  </div>
750
  </div>
 
753
  cards = []
754
  for agent in world.agents:
755
  badge_class = "badge-alive" if agent.alive else "badge-dead"
756
+ badge_text = "VIVO" if agent.alive else "MORTO"
757
  avg_reward = agent.total_reward / max(1, agent.lifetime_steps)
758
+
759
  card = f"""
760
  <div class="agent-card">
761
  <div class="agent-header">
762
  <div>
763
  <div class="agent-name">{agent.name}</div>
764
  <div class="agent-role">{agent.role}</div>
765
+ <div class="agent-physical-role">Ruolo fisico: {agent.physical_role}</div>
766
  </div>
767
  <div class="agent-badge {badge_class}">{badge_text}</div>
768
  </div>
769
+
770
+ <div class="agent-stats">
771
+ <div class="stat-item">
772
+ <div class="stat-label">Posizione</div>
773
+ <div class="stat-value">({agent.x},{agent.y})</div>
774
+ </div>
775
+ <div class="stat-item">
776
+ <div class="stat-label">Energia</div>
777
+ <div class="stat-value">{agent.energy:.2f}</div>
778
+ </div>
779
+ <div class="stat-item">
780
+ <div class="stat-label">Legno</div>
781
+ <div class="stat-value">{agent.wood}</div>
782
+ </div>
783
+ <div class="stat-item">
784
+ <div class="stat-label">Pietra</div>
785
+ <div class="stat-value">{agent.stone}</div>
786
+ </div>
787
+ <div class="stat-item">
788
+ <div class="stat-label">Step vita</div>
789
+ <div class="stat-value">{agent.lifetime_steps}</div>
790
+ </div>
791
+ <div class="stat-item">
792
+ <div class="stat-label">Reward step</div>
793
+ <div class="stat-value">{agent.last_reward:.3f}</div>
794
+ </div>
795
  </div>
796
+
797
  <div class="agent-interests">
798
  <strong>Interessi:</strong> {agent.interests}
799
  </div>
800
+
801
  <div class="agent-action">
802
  <div class="agent-action-label">Ultima azione</div>
803
  <div>{agent.last_action or "<i>Nessuna azione ancora.</i>"}</div>
 
829
  def serialize_world(world: World) -> dict:
830
  return {
831
  "model_id": world.model_id,
832
+ "step": world.step_count,
833
  "agents": [a.to_dict() for a in world.agents],
834
+ "food": world.food,
835
+ "danger": world.danger,
836
+ "resources": world.resources,
837
+ "structures": world.structures,
838
  }
839
 
840
 
 
869
  with gr.Blocks(title="AIWorld – Conway-Like LLM Life") as demo:
870
  gr.Markdown(
871
  """
872
+ # 🏆 AIWorld – Conway-Like LLM Life (Versione Ibrida)
873
 
874
+ Un mondo simulato ibrido dove agenti LLM:
875
+ - Hanno identità, ruoli mentali e interessi (LLM)
876
+ - Interagiscono con un mondo fisico con cibo, pericoli, risorse e strutture (sistema RL)
877
+ - Generano azioni testuali descrittive basate sulla loro situazione
878
+ - Ricevono reward sia per le azioni testuali che per le interazioni fisiche
879
+ - Possono morire e rinascere con nuove identità
 
880
 
881
  La simulazione gira in **background** in modo continuo.
882
  """
 
914
  refresh_btn = gr.Button("Aggiorna vista")
915
 
916
  with gr.Column(scale=2):
917
+ svg_out = gr.HTML(label="Mondo fisico (griglia 20x20)")
918
  html_out = gr.HTML(label="Dettagli agenti")
919
  state_out = gr.State(serialize_world(GLOBAL_WORLD))
920
 
 
947
  threading.Thread(target=simulation_loop, daemon=True).start()
948
 
949
  if __name__ == "__main__":
950
+ demo.launch()