OctaveLeroy's picture
Upload agent.py
a7f2ced verified
"""
Example: MCP ReAct Agent
A complete ReAct agent that uses MCP tools to play text adventure games.
This is a working example students can learn from.
"""
MAX_STEPS_ALLOWED = 8
import json
import os
import re
from dataclasses import dataclass, field
from typing import Optional
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
from typing import List, Optional, Dict
from pydantic import BaseModel
import json
from adaptive_knowledge import AdaptiveKnowledgeManager
from spatial_memory import SpatialMemorySystem
from memory import HierarchicalMemoryManager
from prompts import (
SYSTEM_PROMPT,
PLANNER_SYSTEM_PROMPT,
EXTRACTOR_SYSTEM_PROMPT,
CRITIC_SYSTEM_PROMPT,
MEMORY_SYNTHESIS_PROMPT
)
load_dotenv()
# =============================================================================
# LLM Configuration - DO NOT MODIFY
# =============================================================================
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
# LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct:featherless-ai"
# LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct::fireworks-ai"
# LLM_MODEL = "Qwen/Qwen2.5-1.5B-Instruct"
# LLM_MODEL = "Qwen/Qwen2.5-7B-Instruct"
# LLM_MODEL = "meta-llama/Llama-3.2-3B-Instruct"
# LLM_MODEL = "mistralai/Mistral-7B-Instruct-v0.3"
_hf_token = os.getenv("HF_TOKEN")
if not _hf_token:
raise ValueError("HF_TOKEN not found. Set it in your .env file.")
LLM_CLIENT = InferenceClient(token=_hf_token)
def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
"""Call the LLM with the given prompt."""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]
response = LLM_CLIENT.chat.completions.create(
model=LLM_MODEL,
messages=messages,
temperature=0.0,
max_tokens=max_tokens,
seed=seed,
)
return response.choices[0].message.content
@dataclass
class RunResult:
"""Result of running the agent. Do not modify this class."""
final_score: int
max_score: int
moves: int
locations_visited: set[str]
game_completed: bool
error: Optional[str] = None
history: list[tuple[str, str, str]] = field(default_factory=list)
# =============================================================================
# Student Agent Implementation
# =============================================================================
class StudentAgent:
"""
MCP ReAct Agent - A complete working example.
This agent demonstrates:
- ReAct loop (Thought -> Tool -> Observation)
- Loop detection
- Action validation
- Score tracking via memory tool
"""
def __init__(self):
"""Initialize the agent state."""
self.history: list[dict] = []
self.recent_actions: list[str] = []
self.score: int = 0
self.strategist = StrategyModule()
self.critic = CriticAgent(call_llm_func=call_llm, verbose=True)
self.knowledge_manager = AdaptiveKnowledgeManager("knowledgebase.md")
self.world_mapper = WorldMapper()
self.extractor = ObservationExtractor(call_llm)
self.learning_interval = 10
self.last_learning_step = 0
self.seen_responses = {}
self.last_room_id = None # Utilise celui-ci pour la logique ID
self.last_world_hash = None
self.memory_manager = HierarchicalMemoryManager(call_llm)
self.current_location = "Start"
self.location_action_memory = {}
self.steps_in_current_room = 0
self.current_room_actions = set()
async def run(
self,
client,
game: str,
max_steps: int,
seed: int,
verbose: bool = False,
) -> RunResult:
"""Run the agentknowledge_content = self.knowledge_manager.load_knowledge()
if verbose and knowledge_content:
print(f"\n[INIT] Loaded strategic knowledge ({len(knowledge_content)} chars)")
for a game session."""
locations_visited = set()
history = []
moves = 0
# Charger la connaissance au démarrage
knowledge_content = self.knowledge_manager.load_knowledge()
if verbose and knowledge_content:
print(f"\n[INIT] Loaded strategic knowledge ({len(knowledge_content)} chars)")
# Get list of available tools
tools = await client.list_tools()
tool_names = [t.name for t in tools]
# Get initial observation
result = await client.call_tool("play_action", {"action": "look"})
observation = self._extract_result(result)
loc_result = await client.call_tool("get_location_info", {})
raw_res = self._extract_result(loc_result)
if isinstance(raw_res, str):
import json
loc_dict = json.loads(raw_res)
else:
loc_dict = raw_res
structured_data = self.extractor.extract(
raw_text=observation,
seed=seed + 0,
ram_data=loc_dict,
last_location=self.current_location
)
self.world_mapper.update_map(structured_data, "look", observation)
# Track initial location
# location = observation.split("\n")[0] if observation else "Unknown"
# locations_visited.add(location)
if verbose:
print(f"\n{observation}")
# Main ReAct loop
for step in range(1, max_steps + 1):
raw_possible_actions = "No actions available"
structured_data = {"location_name": self.current_location, "is_new_location": False}
priority_guidance = ""
try:
inv_result = await client.call_tool("inventory", {})
current_inv = self._extract_result(inv_result)
loc_result = await client.call_tool("get_location_info", {})
raw_res = self._extract_result(loc_result)
if isinstance(raw_res, str):
import json
loc_dict = json.loads(raw_res)
else:
loc_dict = raw_res
if loc_dict.get("status") == "success":
current_loc_id = loc_dict["location"].get("id")
current_loc_name = loc_dict["location"].get("name")
current_world_hash = loc_dict.get("world_hash")
objects_in_room = [obj["name"] for obj in loc_dict.get("detected_objects", [])]
else:
print("⚠️ Erreur lors de la récupération des données RAM")
is_new_room = False
if current_loc_id != self.last_room_id:
is_new_room = True
self.steps_in_current_room = 0
print(f"🚀 Mouvement détecté vers : {current_loc_name} (ID: {current_loc_id})")
self.last_room_id = current_loc_id
self.current_location = current_loc_name
else :
self.steps_in_current_room += 1
world_changed = False
if current_world_hash != self.last_world_hash:
world_changed = True
print(f"🔍 Le monde a changé (objet déplacé/modifié)")
self.last_world_hash = current_world_hash
map_result = await client.call_tool("get_map", {})
current_map = self._extract_result(map_result)
# Extraction structurée
structured_data = self.extractor.extract(
raw_text=observation,
seed=seed + step,
ram_data=loc_dict,
last_location=self.current_location
)
print("structured_data run loop ", structured_data)
structured_data["location_id"] = current_loc_id
structured_data["location_name"] = current_loc_name
structured_data["is_new_location"] = is_new_room
structured_data["world_changed"] = world_changed
should_refresh_actions = is_new_room or world_changed
if should_refresh_actions:
cheat_result = await client.call_tool("get_valid_actions_cheat", {})
new_actions = self._extract_result(cheat_result)
if is_new_room:
self.current_room_actions = set()
if isinstance(new_actions, list):
self.current_room_actions.update(new_actions)
elif isinstance(new_actions, str):
self.current_room_actions.update([a.strip() for a in new_actions.split(',')])
structured_data["cheat_actions"] = list(self.current_room_actions)
# valid_actions_result = await client.call_tool("get_valid_actions_cheat", {})
# raw_possible_actions = self._extract_result(valid_actions_result)
priority_guidance = self._build_priority_guidance(structured_data)
print("priority guidance",priority_guidance)
print("fin PRIORITY")
except Exception as e:
print(f"⚠️ Sensory Error: {e}")
structured_data = {}
current_inv = "Unknown"
current_map = "Unknown"
is_urgent = False
enriched_actions = self._generate_enriched_actions(structured_data)
# strategic_knowledge = self.knowledge_manager.get_strategic_knowledge()
# memory_context = self.memory_manager.get_context(self.current_location)
# visible_exits = structured_data.get("visible_exits", [])
# visible_objs = structured_data.get("visible_objects", [])
# visible_entities = structured_data.get("visible_entities", [])
# is_urgent = structured_data.get("in_combat_or_chase", False)
# summary = structured_data.get("description_summary", "")
# if hasattr(self, 'memory_manager'):
# memory_context = self.memory_manager.get_context(current_loc)
# else:
# memory_context = ""
# ============================================================
# 1. ADAPTIVE KNOWLEDGE CYCLE
# ============================================================
# On vérifie si on doit apprendre (intervalle ou mort détectée dans l'obs précédente)
is_dead = self._is_game_over(observation)
time_to_learn = (step - self.last_learning_step >= self.learning_interval)
# if (time_to_learn or is_dead) and len(self.history) > 0:
# if verbose: print(f"\n[KNOWLEDGE] analyzing recent turns to extract wisdom...")
# updated = self.knowledge_manager.update_knowledge(
# history=self.history,
# start_idx=self.last_learning_step,
# end_idx=len(self.history),
# llm_function=lambda prompt, system_prompt, seed: call_llm(
# prompt, # Premier arg
# system_prompt, # Deuxième arg
# seed # Troisième arg
# )
# )
# if updated:
# knowledge_content = self.knowledge_manager.load_knowledge()
# if verbose: print("[KNOWLEDGE] Strategic Knowledge Base updated!")
# self.last_learning_step = len(self.history)
# id_result = await client.call_tool("get_location_id", {})
# try:
# room_id = int(self._extract_result(id_result))
# except:
# room_id = -1 # Fallback
# room_name = structured_data.get("location_name", "Unknown Area")
# if step > 1:
# self.map.update(room_id, room_name, self.last_move_action)
# spatial_context = self.map.get_context_for_llm(room_id)
# Build context based on urgency
if is_urgent:
if verbose:
print("\n🔥 [MODE URGENCE ACTIVÉ] Le cochon court / Combat en cours !")
# STRATEGIC KNOWLEDGE: {strategic_knowledge if strategic_knowledge else "No knowledge yet."}
rich_context = f"""
!!! URGENT SITUATION - FOCUS ON IMMEDIATE ACTION !!!
SITUATION : {observation}
PRIORITY GUIDANCE : {priority_guidance if priority_guidance else "- [STATUS] Standard exploration."}
POSSIBLE ACTIONS: {enriched_actions}
"""
# rich_context = f"""
# !!! URGENT SITUATION - FOCUS ON IMMEDIATE ACTION !!!
# TARGETS/ENEMIES: {visible_entities}
# LAST EVENT: {summary}
# INVENTORY: {current_inv}
# KNOWLEDGE: {knowledge_content}
# SPATIAL : {spatial_context}
# RELEVANT MEMORIES: {memory_context}
# """
else:
# STRATEGIC KNOWLEDGE: {strategic_knowledge if strategic_knowledge else "No knowledge yet."}
# LOCAL MEMORY : {self.current_location} {memory_context}
rich_context = f"""
CURRENT SITUATION : {observation}
INVENTORY : {current_inv}
PRIORITY GUIDANCE : {priority_guidance if priority_guidance else "- [STATUS] Standard exploration."}
POSSIBLE ACTIONS: {enriched_actions}
"""
# rich_context = f"""
# LOCATION: {current_loc}
# ENTITIES: {visible_entities}
# OBJECTS: {visible_objs}
# EXITS: {visible_exits}
# INVENTORY: {current_inv}
# KNOWN MAP: {current_map}
# SUMMARY: {summary}
# KNOWLEDGE: {knowledge_content}
# SPATIAL : {spatial_context}
# RELEVANT MEMORIES : {memory_context}
# """
if verbose :
print(f"Context {rich_context}")
# # --- 2. STRATEGY (Planner) ---
# if step == 1 or step % 5 == 0:
# if verbose:
# print(f"\n[STRATEGY] Thinking about long-term plan...")
# plan = self.strategist.generate_plan(rich_context, self.history, step,knowledge_content)
# if verbose and plan:
# print(f"[STRATEGY] Objective: {plan.get('current_objective')}")
# # --- 3. GENERATION & CRITIC LOOP (Actor) ---
prompt = self._build_prompt(rich_context,knowledge="")
max_retries = 3
# Default values in case something goes wrong
final_tool_name = "play_action"
final_tool_args = {"action": "look"}
final_thought = "No thought generated"
for attempt in range(max_retries):
# Call LLM
response = call_llm(prompt, SYSTEM_PROMPT, seed + step + attempt)
# Parse response
thought, tool_name, tool_args = self._parse_response(response, tool_names)
final_thought = thought # Keep track of the thought
final_tool_name = tool_name
final_tool_args = tool_args
# Validate basics
tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
# Logic Validation (Critic)
# if tool_name == "play_action":
# proposed_action = tool_args.get("action", "look")
# is_allowed = self.critic.critique_action(
# proposed_action,
# rich_context,
# current_inv,
# current_loc,
# seed + step,
# valid_exits=visible_exits
# )
# if is_allowed:
# final_tool_name = tool_name
# final_tool_args = tool_args
# break # Success!
# else:
# if verbose:
# print(f"⚠️ Action '{proposed_action}' blocked by Critic. Retrying ({attempt+1}/{max_retries})...")
# # Add feedback to prompt for next attempt
# prompt += f"\n\nUSER: The action '{proposed_action}' is invalid, impossible (check exits), or repetitive. Please propose a DIFFERENT action."
# # If this was the last attempt, force fallback
# if attempt == max_retries - 1:
# if verbose:
# print("❌ Too many rejections. Forcing 'look'.")
# final_tool_name = "play_action"
# final_tool_args = {"action": "look"}
# else:
# # Non-game actions (map, memory) are always allowed
# final_tool_name = tool_name
# final_tool_args = tool_args
# break
# --- 4. EXECUTION PREPARATION ---
tool_name = final_tool_name
tool_args = final_tool_args
thought = final_thought # Ensure we have the thought for history
if verbose:
print(f"\n--- Step {step} ---")
print(f"[THOUGHT] {thought}")
print(f"[TOOL] {tool_name}({tool_args})")
# Handle history and loop detection for ACTIONS only
proposed_action = "tool_use"
if tool_name == "play_action":
proposed_action = tool_args.get("action", "look")
self.recent_actions.append(proposed_action)
if len(self.recent_actions) > 5:
self.recent_actions = self.recent_actions[-5:]
if len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1:
# On cherche les directions dans la liste possible_actions
import random
all_raw_actions = []
if isinstance(enriched_actions, str):
all_raw_actions = [line.strip("- ").strip() for line in enriched_actions.split('\n') if "-" in line]
move_keywords = ["north", "south", "east", "west", "ne", "nw", "se", "sw", "up", "down", "in", "out", "go "]
valid_moves = [
a for a in all_raw_actions
if any(k == a or a.startswith("go ") for k in move_keywords) or a == "wait"
]
if valid_moves:
# On choisit un mouvement au hasard PARMI ceux qui ne sont pas l'action répétée
last_action = self.recent_actions[-1]
choices = [m for m in valid_moves if m != last_action]
forced_move = random.choice(choices if choices else valid_moves)
tool_args = {"action": forced_move}
proposed_action = forced_move
if verbose:
print(f"🔄 [LOOP BREAK] Agent stuck on '{last_action}'. Forcing move to: {forced_move}")
else:
# Fallback ultime si aucun mouvement n'est détecté dans les actions possibles
tool_args = {"action": "wait"}
proposed_action = "wait"
moves += 1
# --- 5. EXECUTION ---
try:
result = await client.call_tool(tool_name, tool_args)
new_observation = self._extract_result(result)
full_action_key = tool_args.get("action", tool_name)
if self.current_location not in self.location_action_memory:
self.location_action_memory[self.current_location] = []
if tool_name :
summary = self._clean_memory_result(new_observation)
# On stocke un petit dictionnaire par action pour garder le contexte
action_entry = {
"action": full_action_key,
"result": summary,
"step": getattr(self, 'step_count', 0) # Optionnel: pour savoir quand c'est arrivé
}
self.location_action_memory[self.current_location].append(action_entry)
if tool_name == "play_action":
loc_res = await client.call_tool("get_location_info", {})
new_loc_dict = self._extract_result(loc_res)
print("new loc dict",new_loc_dict)
if isinstance(new_loc_dict, str):
import json
try:
new_loc_dict = json.loads(new_loc_dict)
except:
new_loc_dict = {}
else:
new_loc_dict = new_loc_dict
print("new loc dict",new_loc_dict)
new_id = str(new_loc_dict["location"].get("id"))
action_text = tool_args.get("action", "")
direction_attempted = self.world_mapper._extract_direction(action_text)
if direction_attempted and new_id == str(self.last_room_id):
reason = self._clean_memory_result(new_observation)
self.world_mapper.mark_blocked_exit(new_id, direction_attempted, reason)
print(f"🚫 BLOCAGE : {direction_attempted} n'a pas fonctionné.")
else:
print("direction attempted",direction_attempted,action_text)
new_structured = self.extractor.extract(
raw_text=new_observation,
seed=seed,
ram_data=new_loc_dict,
last_location=self.current_location
)
self.world_mapper.update_map(new_structured, action_text, new_observation)
# Update critic memory
# if tool_name == "play_action":
# self.critic.record_result(proposed_action, current_loc, new_observation)
# if hasattr(self, 'memory_manager'):
# try:
# self.memory_manager.synthesize(current_loc, proposed_action, new_observation, step)
# except Exception as mem_e:
# print(f"⚠️ Memory Synthesis Failed: {mem_e}") # On log mais on continue
observation = new_observation
if verbose:
print(f"[RESULT] {observation}...")
except Exception as e:
observation = f"Error: {e}"
if verbose:
print(f"[ERROR] {e}")
# --- 6. TRACKING ---
# location = observation.split("\n")[0] if observation else "Unknown"
# locations_visited.add(location)
self.history.append({
"step": step,
"thought": thought,
"tool": tool_name,
"args": tool_args,
"result": observation[:200]
})
if len(self.history) > 10:
self.history = self.history[-10:]
self._update_score(observation)
# Add to full run history
history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
if self._is_game_over(observation):
if verbose:
print("\n*** GAME OVER ***")
# self.knowledge_manager.update_knowledge(
# self.history, self.last_learning_step, len(self.history),
# lambda p, s, seed=seed: call_llm(p, s, seed, max_tokens=2000)
# )
break
return RunResult(
final_score=self.score,
max_score=350,
moves=moves,
locations_visited=locations_visited,
game_completed=self._is_game_over(observation),
history=history,
)
def _clean_memory_result(self, text: str) -> str:
"""Nettoie le résultat pour la mémoire : une seule ligne, max 80 chars."""
if not text: return ""
# 1. Remplace les sauts de ligne par des espaces
clean = text.replace('\n', ' ').strip()
# 2. Supprime les doubles espaces
clean = " ".join(clean.split())
return clean
# def _generate_enriched_actions(self, raw_actions: str, structured_data: dict) -> str:
# enriched = [raw_actions]
# # 1. Objets à prendre
# objs = structured_data.get("takeable_objects", [])
# if objs:
# enriched.append("\nOBJECTS TO TAKE:")
# for obj in objs:
# enriched.append(f" - take {obj}")
# enriched.append(f" - examine {obj}")
# # 2. Éléments du décor (Vital pour éviter les "examine" dans le vide)
# features = structured_data.get("interactable_features", [])
# if features:
# enriched.append("\nENVIRONMENTAL FEATURES (MUST EXAMINE):")
# for feat in features:
# enriched.append(f" - examine {feat}")
# # 3. Entités (Sans Grunk)
# ents = [e for e in structured_data.get("entities", []) if e.lower() != "grunk"]
# if ents:
# enriched.append("\nENTITIES (INTERACT):")
# for ent in ents:
# enriched.append(f" - talk to {ent}")
# return "\n".join(enriched)
def _generate_enriched_actions(self, structured_data: dict) -> str:
"""
Génère un kit d'action utilisant les noms traduits (friendly)
ou signalant les noms techniques si nécessaire.
"""
ram_objects = structured_data.get("raw_ram_objects", [])
mapping = structured_data.get("name_translation", {})
inventory_raw = structured_data.get("inventory", [])
inventory = [obj["name"] for obj in inventory_raw]
kit = ["### 🛠️ ACTION CONSTRUCTION KIT"]
cheat_actions = structured_data.get("cheat_actions", [])
if cheat_actions:
# On met ces actions en avant car elles sont GARANTIES valides par le moteur
kit.append(f"**DIRECT ACTIONS (Proven Valid)**: {', '.join(cheat_actions)}")
kit.append(f"ABOVE ACTIONS ARE VALID BUT CAN BE DUMB")
# 1. VERBES PERMANENTS
verbs = ["examine", "take", "drop", "look", "inventory", "wait", "listen", "search"]
# 2. VERBES CONTEXTUELS
# On scanne le texte et la RAM pour adapter les verbes
all_context = (structured_data.get("description_summary", "") + " ".join(ram_objects)).lower()
if any(x in all_context for x in ["door", "gate", "mailbox", "chest", "box", "case", "window"]):
verbs += ["open", "close", "unlock", "lock"]
if any(x in all_context or "torch" in str(inventory).lower() for x in ["lamp", "torch", "switch", "device"]):
verbs += ["turn on", "turn off", "light", "extinguish"]
kit.append(f"**CORE VERBS**: {', '.join(sorted(set(verbs)))}")
# 3. OBJETS (Traitement Friendly vs Technical)
visible_ram = [o for o in ram_objects if "(missing" not in o.lower()]
hidden_ram = [o for o in ram_objects if "(missing" in o.lower()]
if visible_ram:
formatted_objects = []
for tech_name in visible_ram:
# Si on a une traduction dans le mapping, on l'utilise
if tech_name in mapping:
friendly_name = mapping[tech_name]
formatted_objects.append(f"{friendly_name.upper()}")
else:
# Sinon, on affiche le nom technique avec un avertissement
formatted_objects.append(f"{tech_name} [! technical name] map to possible real and simple words")
kit.append(f"**OBJECTS AROUND YOU**: {', '.join(formatted_objects)}")
kit.append("_Note: Use names exactly as shown above._")
# 4. ALERTE SECRET
# if hidden_ram:
# kit.append(f"**⚠️ SENSORY ALERT**: Something is hidden. Use 'listen', 'search' or 'examine' on specific features.")
# 5. NAVIGATION
# exits = structured_data.get("visible_exits", [])
# kit.append(f"**NAVIGATION**: {', '.join([e.upper() for e in exits]) if exits else 'N, S, E, W, U, D, IN, OUT'}")
return "\n".join(kit)
def classify_interaction(self, action: str, result: str,observation: str,inventory : str, name_mapping : dict, possible_actions : str) -> dict:
"""
Demande au LLM si l'action a échoué et pourquoi.
"""
valid_names = ", ".join([f"'{friendly}' (ID: {tech})" for tech, friendly in name_mapping.items()])
prompt = f"""
Analyze this text adventure interaction:
Current Observation : {observation}
Inventory : {inventory}
valid objects to use: {valid_names}
ACTION: "{action}"
RESULT: "{result}"
Here is possible actions to take {possible_actions}
Is this interaction a FAILURE (e.g., object not found, too dark, locked, invalid name, generic response)?
If it's a failure, provide a short reason and a suggestion.
Return ONLY JSON:
{{
"is_failure": true/false,
"reason": "short explanation",
"suggestion": "what to try instead"
}}
"""
# On utilise un seed fixe pour la consistance
response = call_llm(prompt, "You are a Game Logic Analyzer.", seed=42)
try:
import json
return json.loads(response)
except:
return {"is_failure": False, "reason": "", "suggestion": ""}
# def _build_priority_guidance(self, structured_data: dict) -> str:
# guidance = []
# loc_name = self.current_location
# loc_id = structured_data.get("location_id", self.last_room_id or -1)
# tree_display = structured_data.get("tree", [])
# # On utilise objects_in_room pour avoir la hiérarchie (parents/enfants)
# # Si absent, on replie sur raw_ram_objects pour la compatibilité
# # objects_tree = structured_data.get("objects_in_room", [])
# objects_tree = structured_data.get("objects_in_room", [])
# raw_ram_objects = structured_data.get("raw_ram_objects",[])
# mapping = structured_data.get("name_translation", {})
# past_actions = self.location_action_memory.get(loc_name, [])
# interacted_res = None
# world_changed = structured_data.get("world_changed", False)
# is_new_loc = structured_data.get("is_new_location", False)
# if self.recent_actions:
# last_action_taken = self.recent_actions[-1]
# # On récupère le dernier élément de notre liste de mémoire
# last_entry = past_actions[-1] if past_actions else {}
# last_res = last_entry.get("result", "No feedback available.")
# guidance.append("### ⚡ LAST ACTION FEEDBACK:")
# guidance.append(f"- Command: '{last_action_taken}'")
# guidance.append(f"- Full Result: '{last_res}'") # Ici on met tout le texte
# guidance.append(f"USE ALL THE HINT FROM THIS ACTIONS. YOU CAN EXAMINE OBJECTS IN THE RESULT. EX : Examine bowl")
# guidance.append("⚠️ TRUST THE CURRENT OBSERVATION TEXT OVER MEMORY FOR OBJECT NAMES. USE IT.")
# guidance.append("")
# if world_changed:
# guidance.append("- [!] SUCCESS: The world state changed. Analyze the new situation.")
# elif not is_new_loc:
# guidance.append("- [!] STASIS: No change detected. Do not repeat this exact command.")
# guidance.append("")
# visible_exits = structured_data.get("visible_exits", [])
# guidance.append("### 🗺️ STRATEGIC NAVIGATION:")
# visible_exits = structured_data.get("visible_exits", [])
# room_data = self.world_mapper.graph.get(loc_id, {})
# known_exits = room_data.get("exits", {})
# blocked_exits = room_data.get("blocked_exits", {})
# for direction in visible_exits:
# d_l = direction.lower()
# if d_l in blocked_exits:
# guidance.append(f"- {direction.upper()} : 🚫 BLOCKED ({blocked_exits[d_l]})")
# elif d_l in known_exits:
# target_id = known_exits[d_l]
# target_room = self.world_mapper.graph.get(str(target_id), {})
# t_name = target_room.get("name", "Unknown Area")
# status = "✅ Fully Visited" if target_room.get("items_scanned") else "🔎 Unexplored Items"
# guidance.append(f"- {direction.upper()} : Leads to **{t_name}** ({status})")
# else:
# guidance.append(f"- {direction.upper()} : 🌟 [NEW PATHWAY - EXPLORE THIS]")
# guidance.append("")
# guidance.append("\n### 🧠 GLOBAL STRATEGY & PUZZLE RADAR:")
# world_knowledge = []
# active_puzzles = []
# for r_id, r_data in self.world_mapper.graph.items():
# # On ne liste pas la pièce actuelle pour éviter le bruit
# if r_id != loc_id:
# # Objets laissés ailleurs
# if r_data.get("items"):
# world_knowledge.append(f"- In {r_data['name']}: {', '.join(r_data['items'])}")
# # Puzzles détectés ailleurs
# for p in r_data.get("puzzles", []):
# active_puzzles.append(f"- [{r_data['name']}] Obstacle: {p}")
# if world_knowledge:
# guidance.append("**Dropped/Left Items:**")
# guidance.extend(world_knowledge)
# print("VISIBLE EXISTS",visible_exits)
# if visible_exits:
# guidance.append("### 🚪 AVAILABLE EXITS:")
# # On extrait les directions déjà tentées dans cette pièce
# # On cherche des actions comme "go east", "east", "move north", etc.
# attempted_exits = []
# for entry in past_actions:
# action_str = entry.get("action", "").lower()
# for direction in visible_exits:
# if direction.lower() in action_str:
# attempted_exits.append(direction.lower())
# exit_guidance = []
# for direction in visible_exits:
# if direction.lower() in attempted_exits:
# exit_guidance.append(f"- {direction.upper()} (Already explored/tried)")
# else:
# exit_guidance.append(f"- {direction.upper()} 🌟 [NEW PATHWAY]")
# guidance.extend(exit_guidance)
# # Petit conseil stratégique si tout est exploré
# if all(d.lower() in attempted_exits for d in visible_exits) and visible_exits:
# guidance.append("- [TIP] All exits tried. If stuck, look for hidden objects or interact with items.")
# guidance.append("")
# # --- 2. HISTORIQUE CHRONOLOGIQUE (LES 10 DERNIÈRES) ---
# if past_actions:
# guidance.append(f"### 🛑 CHRONOLOGICAL HISTORY (Last 10 steps in this room):")
# # On prend les 10 dernières entrées de la liste
# for entry in past_actions[-10:-1]:
# act = entry.get("action")
# res = entry.get("result")
# # On affiche tout le résultat, mais ligne par ligne pour la clarté
# guidance.append(f"STEP {entry.get('step', '?')}: '{act}'")
# guidance.append(f" Result: {res}")
# guidance.append("")
# if world_changed or is_new_loc:
# guidance.append("### 🔥 HOT FOCUS: NEW STATE DETECTED")
# if is_new_loc:
# guidance.append(f"- [LOCATION] You just entered '{loc_name}'. All previous room logic is VOID.")
# if world_changed:
# guidance.append("- [WORLD CHANGE] The game state has mutated! Something opened, moved, or appeared.")
# guidance.append("- [URGENT] Acts based on the change (e.g., if something opened, look inside).")
# # --- SCAN HIERARCHIQUE (OBJETS PARENTS) ---
# # print("TREE DISPLAY",tree_display)
# # if tree_display:
# # guidance.append("### 🌳 ENVIRONMENT HIERARCHY (SENSORS):")
# # guidance.append("This is the exact structure of the room:")
# # for line in tree_display:
# # guidance.append(line)
# # guidance.append("")
# guidance.append(f"### 🔍 RAM SENSORS (INTELLIGENT SCAN):")
# objects_in_room = structured_data.get("objects_in_room", [])
# unexplored = [] # Jamais touchés
# failed_attempts = [] # Tentés mais avec erreur
# known_success = [] # Déjà examinés avec succès
# for obj_data in objects_in_room:
# if not isinstance(obj_data, dict):
# continue
# raw_name = obj_data.get("name", "Unknown")
# friendly_name = mapping.get(raw_name, raw_name).upper()
# # 1. Recherche de la dernière interaction
# last_entry = None
# for entry in reversed(past_actions):
# act = entry.get("action", "").lower()
# if raw_name.lower() in act or friendly_name.lower() in act:
# last_entry = entry
# break
# # 2. Construction de l'info de contenu (Hiérarchie)
# content_list = obj_data.get("contents", [])
# content_str = f" [Contains: {', '.join([mapping.get(c['name'], c['name']).upper() for c in content_list])}]" if content_list else ""
# display_line = f"- {friendly_name}{content_str}"
# # 3. Classification par LLM si une interaction existe
# if last_entry:
# action_tried = last_entry.get("action")
# result_received = last_entry.get("result")
# # On demande au LLM si c'est un échec
# analysis = self.classify_interaction(action_tried, result_received)
# if analysis.get("is_failure"):
# # C'est un échec : on le met en priorité pour correction
# fail_msg = f"{display_line}\n ⚠️ FAILED: '{action_tried}' -> {analysis['reason']}\n 💡 SUGGESTION: {analysis['suggestion']}"
# failed_attempts.append(fail_msg)
# else:
# # C'est un succès : on le met dans les acquis
# known_success.append(f"{display_line} | ✅ Known: {result_received[:60]}...")
# else:
# # Jamais tenté
# unexplored.append(display_line)
# # --- AFFICHAGE HIÉRARCHISÉ ---
# if failed_attempts:
# guidance.append("### ⚠️ ACTIONS TO CORRECT (FAILED ATTEMPTS):")
# guidance.extend(failed_attempts)
# guidance.append("")
# if unexplored:
# guidance.append("### ✨ NEW / UNEXPLORED IN ROOM:")
# guidance.extend(unexplored)
# guidance.append("")
# if known_success:
# guidance.append("### ✅ ALREADY INTERACTED (SUCCESS):")
# guidance.extend(known_success)
# guidance.append("")
# # --- GUIDANCE STRATÉGIQUE MISE À JOUR ---
# guidance.append("\n**STRATEGY GUIDANCE**:")
# if failed_attempts:
# guidance.append("- [RECOVERY] Prioritize correcting FAILED actions. Do not repeat the same command; follow the suggestion.")
# guidance.append("- [PHYSICS] If an item is inside a container, you MUST 'OPEN' or 'EXAMINE' the parent first.")
# guidance.append("- [EXPLORATION] Do not leave this room until all 'NEW' and 'FAILED' items are resolved.")
# # --- DÉTECTION DES SECRETS ---
# # On utilise raw_ram_objects ici car c'est une liste de strings simple
# raw_names_list = structured_data.get("raw_ram_objects", [])
# has_secrets = any("(missing" in o.lower() for o in raw_names_list)
# if has_secrets:
# guidance.append("### ⚠️ SENSORY ANOMALY:")
# guidance.append("- [GOD-MODE] Hidden structures detected. Use 'SEARCH' or 'EXAMINE' on the scenery.")
# clues = " ".join(structured_data.get("puzzle_clues", [])).lower()
# if any(k in clues for k in ["noise", "hear", "sound"]):
# guidance.append("- [AUDIO] Noise detected! Use 'listen'.")
# if is_new_loc:
# guidance.append("### 🚀 NEW AREA PROTOCOL: 1. 'look', 2. 'examine' new objects, 3. 'listen', 4. explore exits.")
# return "\n".join(guidance)
def _build_priority_guidance(self, structured_data: dict) -> str:
guidance = []
loc_name = self.current_location
loc_id = str(structured_data.get("location_id", self.last_room_id or -1))
mapping = structured_data.get("name_translation", {})
past_actions = self.location_action_memory.get(loc_name, [])
current_inventory = str(structured_data.get("inventory", "Unknown"))
current_obs = structured_data.get("description_summary", "No description")
enriched_actions = self._generate_enriched_actions(structured_data)
if self.steps_in_current_room >= MAX_STEPS_ALLOWED:
guidance.append("\n### 🚀 URGENT STRATEGIC DIRECTIVE:")
guidance.append(f"- [STAGNATION ALERT] {self.steps_in_current_room} turns in this room.")
known_room = self.world_mapper.graph.get(loc_id, {})
potential = known_room.get("potential_exits", [])
already_linked = known_room.get("exits", {}) # Dict: {direction: target_id}
new_paths = [p for p in potential if p not in already_linked]
# --- CAS 1 : Il y a des nouvelles sorties à tester ---
if new_paths:
guidance.append(f"- [ACTION] Move to a NEW area. Priority: {', '.join(new_paths).upper()}")
# --- CAS 2 : On connaît déjà des sorties ---
elif already_linked:
# On suggère de faire marche arrière ou de changer de zone
guidance.append("- [ACTION] Room exhausted. Backtrack or move to a known adjacent room.")
directions_list = [d.upper() for d in already_linked.keys()]
guidance.append(f"- [HINT] Known exits: {', '.join(directions_list)}")
# --- CAS 3 : AUCUNE SORTIE TROUVÉE (Le vrai problème) ---
else:
guidance.append("- [CRITICAL] No exits found in memory or observation.")
guidance.append("- [ACTION] Use 'SEARCH', 'LISTEN', or 'EXAMINE' on the scenery to find hidden passages.")
guidance.append("- [HINT] Try common directions anyway: NORTH, SOUTH, EAST, WEST,NORTHEAST,NORTHWEST,SOUTHEAST,SOUTHWEST, UP, DOWN.")
# ============================================================
# 1. FEEDBACK IMMÉDIAT (Ce qui vient de se passer)
# ============================================================
if self.recent_actions and past_actions:
last_action_taken = self.recent_actions[-1]
last_entry = past_actions[-1]
guidance.append("### ⚡ LAST ACTION FEEDBACK:")
guidance.append(f"- Command: '{last_action_taken}'")
# On analyse l'action qui vient d'être faite
immediate_analysis = self.classify_interaction(
action=last_entry['action'],
result=last_entry['result'],
observation=current_obs,
inventory=current_inventory,
name_mapping=mapping,
possible_actions=enriched_actions
)
if immediate_analysis.get("is_failure"):
guidance.append(f"- [!] STATUS: FAILURE")
guidance.append(f"- [!] REASON: {immediate_analysis['reason']}")
guidance.append(f"- [!] SUGGESTION: {immediate_analysis['suggestion']}")
elif structured_data.get("world_changed"):
guidance.append("- [!] STATUS: SUCCESS (World state updated)")
else:
guidance.append(f"- [!] STATUS: NEUTRAL / INFO: {last_entry.get('result', '')}")
guidance.append("IF ANOTHER ENTITIES IS INTERESTED BY AN ITEM IT'S A HINT. EX : pig climb fountain -> fountain must be an important object to examine or search")
# ============================================================
# 2. VISION STRATÉGIQUE (Utilisation de TA fonction !)
# ============================================================
# On appelle la fonction de ton WorldMapper
strategic_summary = self.world_mapper.generate_summary(loc_id)
guidance.append(strategic_summary)
guidance.append("")
# ============================================================
# 3. SCAN RAM LOCAL (Détails des objets de la pièce)
# ============================================================
guidance.append("### 🔍 LOCAL OBJECT SCAN (RAM):")
objects_in_room = structured_data.get("objects_in_room", [])
unexplored = []
failed_attempts = []
known_success = []
for obj_data in objects_in_room:
if not isinstance(obj_data, dict): continue
raw_name = obj_data.get("name", "Unknown")
has_hidden = obj_data.get("contains_count", 0) > 0
friendly_name = mapping.get(raw_name, "").upper()
present = True
if friendly_name == "":
present = False
# Trouver la dernière interaction spécifique à cet objet
last_obj_entry = None
for entry in reversed(past_actions):
act = entry.get("action", "").lower()
found_raw = raw_name and raw_name.lower() in act
found_friendly = friendly_name and friendly_name.lower() in act
if found_raw or found_friendly:
last_obj_entry = entry
break
# Texte de hiérarchie (Contenu)
contents = obj_data.get("contents", [])
content_str = f" [Contains: {', '.join([mapping.get(c['name'], c['name']).upper() for c in contents])}]" if contents else ""
if contents:
child_names = [c.get("name", "Unknown").upper() for c in contents]
content_str = f" [Contains: {', '.join(child_names)}]"
if present:
display_line = f"- {friendly_name}{content_str}"
if not present :
display_line = f"- {raw_name}{content_str} [!] Technical Name use the real name or a full word. Ex : fountabowl -> bowl, brokstair -> stairs ... CHECK YOUR HISTORY AND OBSERVATION"
if last_obj_entry:
last_act_text = last_obj_entry['action']
last_res_text = last_obj_entry['result']
# Si cette action était un échec (on réutilise ta fonction de classification)
analysis = self.classify_interaction(
action=last_obj_entry['action'],
result=last_obj_entry['result'],
observation=current_obs,
inventory=current_inventory,
name_mapping=mapping,
possible_actions=self._generate_enriched_actions(structured_data)
)
if analysis.get("is_failure"):
guidance.append(f"{display_line}\n ❌ Last tried: '{last_act_text}' -> {analysis['reason']}")
guidance.append(f" 💡 Suggestion: {analysis['suggestion']}")
else:
guidance.append(f"{display_line} | ✅ Last: '{last_act_text}' (Success) {last_act_text}")
else:
# Jamais touché
if present or contents :
guidance.append(f"{display_line} | ✨ UNEXPLORED")
# # Affichage des listes
# if failed_attempts:
# guidance.append("\n**⚠️ TO CORRECT:**")
# guidance.extend(failed_attempts)
# if unexplored:
# guidance.append("\n**✨ UNEXPLORED:**")
# guidance.extend(unexplored)
# ============================================================
# 4. CHRONOLOGIE (Historique des commandes)
# ============================================================
if past_actions:
guidance.append(f"\n### 🛑 ROOM HISTORY (Last steps):")
for entry in past_actions[-10:]:
guidance.append(f"- '{entry.get('action')}' -> {entry.get('result')}...")
return "\n".join(guidance)
def _build_prompt(self, observation: str,knowledge:str) -> str:
"""Build the prompt for the LLM with context."""
parts = []
# strategy_text = self.strategist.get_strategy_context()
# parts.append(strategy_text)
# parts.append("-" * 40)
parts.append(f"Current Score: {self.score}")
# Recent history
if self.history:
parts.append("\nRecent actions:")
for entry in self.history[-3:]:
action = entry.get("args", {}).get("action", entry["tool"])
result_short = entry["result"][:80] + "..." if len(entry["result"]) > 80 else entry["result"]
parts.append(f" > {action} -> {result_short}")
# Warn about repeated actions
if self.recent_actions and len(set(self.recent_actions[-3:])) == 1:
parts.append(f"\n[WARNING: You've been doing '{self.recent_actions[-1]}' repeatedly. TRY SOMETHING DIFFERENT!]")
# if knowledge:
# parts.append("\n=== ACQUIRED STRATEGIC KNOWLEDGE ===")
# parts.append(knowledge)
# parts.append("====================================\n")
parts.append(f"\nCURRENT SITUATION:\n{observation}")
parts.append("\nWhat is your next specific command?")
return "\n".join(parts)
def _parse_response(self, response: str, valid_tools: list[str]) -> tuple[str, str, dict]:
"""Parse the LLM response to extract thought, tool, and arguments."""
thought = "No reasoning provided"
tool_name = "play_action"
tool_args = {"action": "look"}
lines = response.strip().split("\n")
for line in lines:
line_clean = line.strip()
line_upper = line_clean.upper()
if line_upper.startswith("THOUGHT:"):
thought = line_clean.split(":", 1)[1].strip()
elif line_upper.startswith("TOOL:"):
raw_tool = line_clean.split(":", 1)[1].strip().lower()
raw_tool = raw_tool.replace("**", "").replace("*", "").replace("`", "")
raw_tool = raw_tool.split()[0] if raw_tool else "play_action"
tool_name = raw_tool
elif line_upper.startswith("ARGS:"):
args_part = line_clean.split(":", 1)[1].strip()
try:
args_part = args_part.replace("'", '"')
tool_args = json.loads(args_part)
except json.JSONDecodeError:
match = re.search(r'"action"\s*:\s*"([^"]+)"', args_part)
if match:
tool_args = {"action": match.group(1)}
else:
tool_args = {"action": "look"}
return thought, tool_name, tool_args
def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: list[str]) -> tuple[str, dict]:
"""Validate and fix common tool call issues."""
# Fix tool name
if tool_name not in valid_tools:
if tool_name in ["action", "do", "command"]:
tool_name = "play_action"
elif tool_name in ["map", "location"]:
tool_name = "get_map"
elif tool_name in ["mem", "state", "status"]:
tool_name = "memory"
elif tool_name in ["inv", "items"]:
tool_name = "inventory"
else:
tool_name = "play_action"
# Fix action verbs
if tool_name == "play_action":
action = str(tool_args.get("action", "look")).lower().strip()
direction = tool_args.get("direction")
# Fusion de la direction si le LLM l'a mise à part
if direction and str(direction).lower() not in action:
action = f"{action} {direction}"
nav_map = {
"north": "n", "south": "s", "east": "e", "west": "w",
"northeast": "ne", "northwest": "nw",
"southeast": "se", "southwest": "sw",
"up": "u", "down": "d"
}
invalid_verb_map = {
"check": "examine",
"inspect": "examine",
"search": "look",
"grab": "take",
"pick": "take",
"use": "examine",
"investigate": "examine",
}
if action.startswith("go "):
action = action.replace("go ", "").strip()
words = action.split()
if words:
if words[0] in invalid_verb_map:
words[0] = invalid_verb_map[words[0]]
action = " ".join(words)
if words[0] == "examine":
words = words[:2]
action = " ".join(words)
else:
action = " ".join(words)
if action in nav_map:
action = nav_map[action]
action = action.replace("**", "").replace("*", "").replace("`", "")
action = " ".join(action.split())
return tool_name, {"action": action}
return tool_name, tool_args
def _extract_result(self, result) -> str:
"""Extract text from MCP tool result."""
if hasattr(result, 'content') and result.content:
return result.content[0].text
if isinstance(result, list) and result:
return result[0].text if hasattr(result[0], 'text') else str(result[0])
return str(result)
def _update_score(self, text: str) -> None:
"""Update score from game text."""
patterns = [
r'Score:\s*(\d+)',
r'score[:\s]+(\d+)',
r'\[Score:\s*(\d+)',
]
for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
self.score = max(self.score, int(match.group(1)))
def _is_game_over(self, text: str) -> bool:
"""Check if the game is over."""
game_over_phrases = [
"game over",
"you have died",
"you are dead",
"*** you have died ***",
]
text_lower = text.lower()
return any(phrase in text_lower for phrase in game_over_phrases)
# =============================================================================
# Local Testing
# =============================================================================
async def test_agent():
"""Test the agent locally."""
from fastmcp import Client
agent = StudentAgent()
async with Client("mcp_server.py") as client:
result = await agent.run(
client=client,
game="zork1",
max_steps=20,
seed=42,
verbose=True,
)
print(f"\n{'=' * 50}")
print(f"Final Score: {result.final_score}")
print(f"Moves: {result.moves}")
print(f"Locations: {len(result.locations_visited)}")
class StrategyModule:
"""
Gère la planification haut niveau (The 'Brain').
"""
def __init__(self):
self.current_plan = None
self.last_update_step = 0
def generate_plan(self, observation: str, history: list, step: int,knowledge:str) -> dict:
"""Génère ou met à jour le plan stratégique."""
# Préparer le contexte pour le stratège
history_summary = "\n".join(
[f"- {h['thought']} -> {h['result'][:50]}..." for h in history[-5:]]
)
prompt = f"""
CURRENT SITUATION:
{observation}
RECENT HISTORY:
{history_summary}
ACQUIRED KNOWLEDGE (Tips & Rules from previous games):
{knowledge if knowledge else "No prior knowledge available."}
TASK:
Based on the Situation and Knowledge, create a strategic plan.
If the Knowledge says "Trolls fear swords", and you see a Troll, your plan must be "Find sword".
"""
# Appel LLM dédié à la stratégie (on peut utiliser un seed différent)
response = call_llm(prompt, PLANNER_SYSTEM_PROMPT, seed=step, max_tokens=400)
try:
# Nettoyage basique pour extraire le JSON si le modèle bavarde
json_str = response.strip()
if "```json" in json_str:
json_str = json_str.split("```json")[1].split("```")[0]
elif "```" in json_str:
json_str = json_str.split("```")[1].split("```")[0]
self.current_plan = json.loads(json_str)
self.last_update_step = step
return self.current_plan
except Exception as e:
print(f"[Strategy Error] Failed to parse plan: {e}")
return None
def get_strategy_context(self) -> str:
"""Renvoie une chaîne de texte à injecter dans le prompt de l'Acteur."""
if not self.current_plan:
return "NO ACTIVE PLAN. Explore cautiously."
return f"""
*** STRATEGIC GUIDANCE ***
CURRENT OBJECTIVE: {self.current_plan.get('current_objective', 'Unknown')}
STRATEGIC REASONING: {self.current_plan.get('reasoning', 'None')}
STEPS TO TAKE:
{chr(10).join(['- ' + s for s in self.current_plan.get('suggested_steps', [])])}
"""
##### CRITIC Agent
@dataclass
class CriticResponse:
score: float
justification: str
is_fatal: bool = False
class ActionHistoryTracker:
"""
Detect loop and critics
"""
def __init__(self):
self.recent_actions: List[str] = []
self.location_history: List[str] = []
self.failed_actions_per_location: Dict[str, Set[str]] = {}
def update(self, action: str, location: str, result: str):
self.recent_actions.append(action)
self.location_history.append(location)
# Détection basique d'échec basée sur le texte du jeu
failure_keywords = ["can't", "don't", "nothing happens", "impossible", "failed", "no such"]
if any(k in result.lower() for k in failure_keywords):
if location not in self.failed_actions_per_location:
self.failed_actions_per_location[location] = set()
self.failed_actions_per_location[location].add(action)
#
if len(self.recent_actions) > 20:
self.recent_actions.pop(0)
self.location_history.pop(0)
def is_looping(self, proposed_action: str) -> bool:
"""Détecte les répétitions immédiates."""
if len(self.recent_actions) >= 3:
# Si les 3 dernières actions sont identiques à la proposée
if all(a == proposed_action for a in self.recent_actions[-3:]):
return True
return False
def is_known_failure(self, proposed_action: str, current_location: str) -> bool:
"""Vérifie si cette action a déjà échoué ici."""
if current_location in self.failed_actions_per_location:
if proposed_action in self.failed_actions_per_location[current_location]:
return True
return False
class CriticAgent:
"""
Le module critique qui valide les actions avant exécution.
"""
def __init__(self,call_llm_func, verbose: bool = False):
self.history_tracker = ActionHistoryTracker()
self.verbose = verbose
self.call_llm = call_llm_func
def check_heuristics(self, action: str, current_location: str,valid_exits:list[str]) -> tuple[bool, str]:
"""
Vérification rapide basée sur des règles (Pas de LLM).
Retourne (Est_Valid, Raison).
"""
# 1. Vérifier si c'est une action vide
if not action or len(action.strip()) < 2:
return False, "Action too short or empty"
# 2. Vérifier les boucles immédiates
if self.history_tracker.is_looping(action):
return False, "Detected infinite loop (action repeated too many times)"
# 3. Vérifier les échecs connus (Memory-based rejection)
if self.history_tracker.is_known_failure(action, current_location):
return False, f"Action '{action}' previously failed in this location"
directions = ["north", "south", "east", "west", "up", "down",
"n", "s", "e", "w", "u", "d", "ne", "nw", "se", "sw"]
action_word = action.lower().strip()
if action_word in directions and valid_exits:
is_possible = False
for exit_name in valid_exits:
if action_word in exit_name.lower() or exit_name.lower().startswith(action_word):
is_possible = True
break
if not is_possible:
return False, f"You can't go '{action_word}'. Visible exits are: {valid_exits}"
return True, "Heuristics passed"
return True, "Heuristics passed"
def evaluate_with_llm(self, action: str, observation: str, inventory: str, seed: int) -> CriticResponse:
"""
Évaluation sémantique lente via LLM.
"""
# Construction du prompt
prompt = f"""
OBSERVATION:
{observation[:1000]}...
INVENTORY:
{inventory}
PROPOSED ACTION:
{action}
Evaluate this action.
"""
try:
response_text = self.call_llm(prompt, CRITIC_SYSTEM_PROMPT, seed=seed, max_tokens=150)
# Parsing JSON résilient
json_str = response_text.strip()
if "```json" in json_str:
json_str = json_str.split("```json")[1].split("```")[0]
elif "```" in json_str:
json_str = json_str.split("```")[1].split("```")[0]
data = json.loads(json_str)
return CriticResponse(
score=float(data.get("score", 0.5)),
justification=data.get("justification", "No reason provided"),
is_fatal=data.get("is_fatal", False)
)
except Exception as e:
if self.verbose:
print(f"[Critic Error] LLM validation failed: {e}")
# En cas d'erreur, on laisse passer (fail open)
return CriticResponse(score=1.0, justification="Validation failed, allowing action")
def critique_action(self, action: str, observation: str, inventory: str, current_location: str, seed: int,valid_exits: list[str] = None) -> bool:
"""
Méthode principale à appeler depuis l'agent.
Retourne True si l'action est acceptée, False sinon.
"""
# 1. Filtre Heuristique (Rapide & Gratuit)
is_valid, reason = self.check_heuristics(action, current_location,valid_exits)
if not is_valid:
if self.verbose:
print(f"🛑 [CRITIC REJECT - RULE] {reason}")
return False
# 2. Filtre LLM (Lent & Coûteux - on peut l'activer seulement pour les actions complexes)
# Pour optimiser, on ne vérifie pas les mouvements simples (north, south, etc.)
simple_moves = ["north", "south", "east", "west", "up", "down", "look", "inventory"]
if action.lower() in simple_moves:
return True
# Appel LLM pour les actions complexes (take, attack, open...)
evaluation = self.evaluate_with_llm(action, observation, inventory, seed)
if evaluation.score < 0.4 or evaluation.is_fatal:
if self.verbose:
print(f"🛑 [CRITIC REJECT - LLM] Score: {evaluation.score} | Reason: {evaluation.justification}")
return False
return True
def record_result(self, action: str, current_location: str, result_text: str):
"""Met à jour la mémoire du critique après l'exécution."""
self.history_tracker.update(action, current_location, result_text)
#### Extractor of data
class StructuredObservation(BaseModel):
location_id: int # L'ID RAM de la pièce
location_name: str
is_new_location: bool
world_changed: bool # Basé sur le Hash
description_summary: str
takeable_objects: List[str]
visible_exits: List[str]
interactable_features: List[str]
puzzle_clues: List[str]
entities: List[str]
in_combat: bool
raw_ram_objects : List[str]
name_translation: Dict[str, str]
class ObservationExtractor:
def __init__(self, call_llm_func):
self.call_llm = call_llm_func
def extract(self, raw_text: str, seed: int, ram_data: dict, last_location: str = "Unknown") -> dict:
"""
Convertit le texte brut et les données RAM structurées en dictionnaire.
"""
inventory_raw = ram_data.get("inventory", [])
inventory_names = [item["name"] for item in inventory_raw]
# 1. Extraction sécurisée des données imbriquées de ram_data
location_info = ram_data.get("location", {})
curr_id = location_info.get("id", -1)
curr_name = location_info.get("name", last_location)
# Extraction des noms d'objets depuis la liste de dictionnaires [{'name':...}, ...]
detected_objs_raw_all = ram_data.get("detected_objects", [])
inventory_names_l = [item["name"].lower() for item in ram_data.get("inventory", [])]
detected_objs_raw = []
for obj in detected_objs_raw_all:
name_l = obj["name"].lower()
content_names_l = [c["name"].lower() for c in obj.get("contents", [])]
# Si c'est le joueur (par son nom ou parce qu'il contient nos items d'inventaire), on l'ignore
is_player = (
name_l in ["inconnu", "self", "player", "me", "grunk"] or
(content_names_l and any(inv_item in content_names_l for inv_item in inventory_names_l))
)
if not is_player:
detected_objs_raw.append(obj)
inventory_data = ram_data.get("inventory", [])
inv_names_lower = [str(item.get("name", "")).lower() for item in inventory_data]
objects_in_ram = []
tree_view = []
for parent in detected_objs_raw:
p_name = parent["name"]
p_contents = parent.get("contents", [])
p_content_names = [c["name"] for c in p_contents]
# On normalise tout en minuscule pour la comparaison
p_name_l = p_name.lower()
p_content_names_l = [n.lower() for n in p_content_names]
# Logique de détection du joueur améliorée
is_player_container = (
p_name_l in ["inconnu", "self", "player", "me", "inventory", "grunk"] or
# On vérifie si UN des objets de l'inventaire est présent dans ce container
any(inv_item in p_content_names_l for inv_item in inv_names_lower) or
# Ou si le nom du parent est lui même dans l'inventaire
p_name_l in inv_names_lower
)
if is_player_container:
# Optionnel: print(f"DEBUG: On ignore l'objet joueur : {p_name}")
continue
# --- Si c'est un vrai objet du décor ---
objects_in_ram.append(p_name)
if p_contents:
c_names = [c["name"] for c in p_contents]
objects_in_ram.extend(c_names)
tree_view.append(f"- {p_name} (contains: {', '.join(c_names)})")
else:
tree_view.append(f"- {p_name}")
# 3. Construction du prompt
# Note : on signale au LLM que les objets (missing...) sont des indices de puzzles
prompt = f"""
RAW GAME TEXT:
{raw_text}
--- RAM DATA (TECHNICAL TRUTH) ---
CURRENT LOCATION ID: {curr_id}
OBJECTS DETECTED IN RAM: {", ".join(objects_in_ram)}
OBJECTS DETECTED (HIERARCHY):
{chr(10).join(tree_view)}
JSON SCHEMA:
Follow the StructuredObservation model.
"""
try:
# Appel LLM (Assure-toi que call_llm supporte ton nouveau token/modèle)
response = self.call_llm(
prompt,
EXTRACTOR_SYSTEM_PROMPT,
seed=seed
)
json_match = re.search(r'\{.*\}', response, re.DOTALL)
data = json.loads(json_match.group(0)) if json_match else json.loads(response)
data["location_id"] = curr_id
data["location_name"] = curr_name
data["raw_ram_objects"] = objects_in_ram
if not data.get("location_name") or data.get("location_name") == "Unknown":
data["location_name"] = curr_name
raw_data_mapping = data.get("name_translation", {})
valid_mapping = {}
lower_text = raw_text.lower()
for tech_name, friendly_name in raw_data_mapping.items():
if tech_name in objects_in_ram:
if friendly_name.lower() in lower_text:
valid_mapping[tech_name] = friendly_name
else:
pass
parent_names = [item["name"] for item in detected_objs_raw]
# Filtrage pour éviter les hallucinations
data["takeable_objects"] = [o for o in data.get("takeable_objects", []) if o in parent_names]
data["interactable_features"] = [o for o in data.get("interactable_features", []) if o in parent_names]
data["objects_in_room"] = detected_objs_raw
data["name_translation"] = valid_mapping
# Ajout automatique des secrets détectés dans la RAM (comme ton ID 101)
secrets = [o for o in objects_in_ram if "(missing" in o.lower()]
if secrets:
if "puzzle_clues" not in data: data["puzzle_clues"] = []
data["puzzle_clues"].append(f"RAM Alert: {len(secrets)} hidden object(s) detected. Search the area.")
return data
except Exception as e:
print(f"[Extractor Error] {e}")
manual_exits = []
lower_text = raw_text.lower()
for d in VALID_DIRECTIONS:
# On cherche la direction entourée d'espaces ou de ponctuation
if re.search(rf"\b{d}\b", lower_text):
manual_exits.append(d)
return {
"location_id": curr_id,
"location_name": curr_name,
"description_summary": raw_text,
"raw_ram_objects": objects_in_ram,
"objects_in_room" : detected_objs_raw,
"visible_exits": manual_exits, # On injecte le scan manuel
"name_translation": {},
"takeable_objects": [],
"puzzle_clues": ["Erreur d'extraction LLM."]
}
#### Utilitaires de sections
class SectionUtils:
"""
Utilitaires de nos fonctions
"""
@staticmethod
def extract_section_content(content: str, section_name: str) -> str:
if not content: return ""
pattern = rf"## {re.escape(section_name)}(.*?)(?=\n## |$)"
match = re.search(pattern, content, re.DOTALL)
return match.group(1).strip() if match else ""
@staticmethod
def update_section_content(content: str, section_name: str, new_content: str) -> str:
if not content: content = "# Zork Strategic Knowledge Base\n\n"
section_header = f"## {section_name}"
pattern = rf"## {re.escape(section_name)}(.*?)(?=\n## |$)"
match = re.search(pattern, content, re.DOTALL)
full_new_section = f"{section_header}\n\n{new_content}\n"
if match:
return content.replace(match.group(0), full_new_section, 1)
else:
return f"{content}\n\n{full_new_section}\n"
@staticmethod
def extract_cross_episode_section(content: str) -> str:
"""Extrait la section 'Wisdom' qui doit persister entre les parties."""
return SectionUtils.extract_section_content(content, "CROSS-EPISODE INSIGHTS")
VALID_DIRECTIONS = {
"n", "s", "e", "w", "ne", "nw", "se", "sw", "u", "d", "in", "out",
"north", "south", "east", "west", "northeast", "northwest",
"southeast", "southwest", "up", "down"
}
class WorldMapper:
def __init__(self):
self.graph = {}
self.last_room_id = None
self.last_direction = None
def update_map(self, structured_data: dict, last_action: str, observation: str):
curr_id = str(structured_data.get("location_id"))
curr_name = structured_data.get("location_name")
new_exits = structured_data.get("visible_exits", [])
clean_directions = [d.lower() for d in new_exits if d.lower() in VALID_DIRECTIONS]
print("cleand directions", clean_directions)
if curr_id not in self.graph:
self.graph[curr_id] = {
"name": curr_name,
"exits": {},
"blocked_exits" : {},
"potential_exits": clean_directions,
"puzzles": structured_data.get("puzzle_clues", []),
"items": structured_data.get("takeable_objects", []),
"scenery": structured_data.get("interactable_features", []),
"visited_count": 1,
"items_scanned": False ,
"description": observation
}
else:
self.graph[curr_id]["visited_count"] += 1
self.graph[curr_id]["items"] = structured_data.get("takeable_objects", [])
self.graph[curr_id]["puzzles"] = list(set(self.graph[curr_id]["puzzles"] + structured_data.get("puzzle_clues", [])))
existing_potentials = set(self.graph[curr_id].get("potential_exits", []))
existing_potentials.update(clean_directions)
self.graph[curr_id]["potential_exits"] = list(existing_potentials)
direction_taken = self._extract_direction(last_action)
if self.last_room_id and self.last_room_id != curr_id and direction_taken:
# On vérifie si la direction prise était bien dans les "potentielles" de la pièce d'avant
self.graph[self.last_room_id]["exits"][direction_taken.lower()] = curr_id
opp = self._get_opposite(direction_taken.lower())
if opp:
# On crée le lien inverse immédiatement
self.graph[curr_id]["exits"][opp] = self.last_room_id
print(f"🗺️ Map Link: {self.graph[self.last_room_id]['name']} <({direction_taken})--({opp})> {curr_name}")
self.last_room_id = curr_id
def mark_as_scanned(self, room_id: str):
"""Appelé quand l'agent a fini d'examiner tous les objets d'une pièce."""
if room_id in self.graph:
self.graph[room_id]["items_scanned"] = True
def mark_blocked_exit(self, room_id: str, direction: str, reason: str):
"""Stocke une direction qui a échoué pour ne plus la tenter inutilement."""
room_id = str(room_id)
if room_id in self.graph:
if "blocked_exits" not in self.graph[room_id]:
self.graph[room_id]["blocked_exits"] = {}
# On stocke la direction et la raison (ex: "locked", "too dark")
self.graph[room_id]["blocked_exits"][direction.lower()] = reason
def _extract_direction(self, action: str) -> str:
action = action.lower().strip()
# Mapping complet pour transformer chaque raccourci en toutes lettres
mapping = {
"n": "north",
"s": "south",
"e": "east",
"w": "west",
"u": "up",
"d": "down",
"ne": "northeast",
"nw": "northwest",
"se": "southeast",
"sw": "southwest",
"in": "inside",
"out": "outside",
"enter": "inside",
"exit": "outside"
}
# 1. On vérifie d'abord si l'action est un raccourci exact (ex: "ne")
if action in mapping:
return mapping[action]
# 2. Sinon, on cherche si un mot complet est présent dans la phrase (ex: "go northeast")
# On trie par longueur décroissante pour ne pas matcher "north" si c'est "northeast"
full_directions = sorted(mapping.values(), key=len, reverse=True)
for d in full_directions:
if d in action:
return d
return None
def get_navigation_guidance(self, current_id: str, visible_exits: list) -> list:
current_id = str(current_id)
room_data = self.graph.get(current_id, {})
known_exits = room_data.get("exits", {}) # Ce qu'on a déjà traversé
nav_lines = []
for direction in visible_exits:
dir_lower = direction.lower()
if dir_lower in known_exits:
target_id = known_exits[dir_lower]
target_name = self.graph.get(target_id, {}).get("name", "Unknown")
nav_lines.append(f"- {direction.upper()} : Already taken (leads to {target_name})")
else:
# C'est ici qu'on pousse l'agent à explorer !
nav_lines.append(f"- {direction.upper()} : 🌟 [NEW PATHWAY - TRY THIS]")
return nav_lines
def _get_opposite(self, direction: str) -> str:
opposites = {
"north": "south", "south": "north",
"east": "west", "west": "east",
"up": "down", "down": "up",
"northeast": "southwest", "southwest": "northeast",
"northwest": "southeast", "southeast": "northwest",
"inside": "outside", "outside": "inside",
"n": "s", "s": "n", "e": "w", "w": "e",
"u": "d", "d": "u", "ne": "sw", "sw": "ne",
"nw": "se", "se": "nw", "in": "out", "out": "in"
}
return opposites.get(direction)
def generate_summary(self, current_id: str) -> str:
"""Génère une vision stratégique complète pour le prompt de l'IA."""
current_id = str(current_id)
if current_id not in self.graph:
return "### 🗺️ MAP: Position current unknown in strategic memory."
summary = ["### 🗺️ STRATEGIC MAP & SPATIAL MEMORY:"]
# 1. SORTIES ET NAVIGATION IMMÉDIATE
summary.append(f"**Current Location:** {self.graph[current_id]['name']}")
exits = self.graph[current_id].get("exits", {})
if exits:
for direction, target_id in exits.items():
room = self.graph.get(target_id, {})
name = room.get("name", "Unknown Area")
# Indicateur visuel pour savoir si on a fini le boulot là-bas
status = "✅ Scanned" if room.get("items_scanned") else "🔎 Items left"
summary.append(f"- {direction.upper()} -> {name} ({status})")
else:
summary.append("- No known exits explored yet from here.")
# 2. OBJETS DISPONIBLES DANS LE MONDE (AILLEURS)
# On ne liste pas la salle actuelle car elle est déjà dans l'observation RAM
other_rooms_with_stuff = []
for r_id, r_data in self.graph.items():
if r_id != current_id:
items = r_data.get("items", [])
features = r_data.get("scenery", [])
if items or features:
room_info = f"- In {r_data['name']}:"
if items: room_info += f" Items: [{', '.join(items)}]"
if features: room_info += f" Scenery: {', '.join(features)}"
other_rooms_with_stuff.append(room_info)
if other_rooms_with_stuff:
summary.append("\n**🌍 WORLD OBJECTS (Memory):**")
summary.extend(other_rooms_with_stuff)
# 3. PUZZLES ET OBSTACLES (VISION GLOBALE)
unsolved_puzzles = []
for r_id, r_data in self.graph.items():
for p in r_data.get("puzzles", []):
unsolved_puzzles.append(f"[{r_data['name']}] {p}")
# if unsolved_puzzles:
# summary.append("\n**⚠️ ACTIVE PUZZLES / OBSTACLES:**")
# summary.extend([f"- {p}" for p in unsolved_puzzles])
# 4. ÉTAT D'EXPLORATION DE LA SALLE ACTUELLE
# is_scanned = self.graph[current_id].get("items_scanned", False)
# summary.append(f"\n**STATUS:** {'Room fully examined. You can move.' if is_scanned else 'New items detected here. Examine them before leaving.'}")
summary.append(f"**Current Location:** {self.graph[current_id]['name']}")
potential = self.graph[current_id].get("potential_exits", [])
known = self.graph[current_id].get("exits", {})
blocked = self.graph[current_id].get("blocked_exits", {})
summary.append("**Immediate Navigation:**")
# On fusionne tout pour ne rien rater
all_possible = set(potential) | set(known.keys()) | set(blocked.keys())
if not all_possible :
summary.append("- ⚠️ NO EXITS DETECTED IN SCAN.")
summary.append("- **HYPOTHETICAL DIRECTIONS:** [NORTH, SOUTH, EAST, WEST,NORTHEAST,NORTHWEST, SOUTHEAST, SOUTHWEST , UP, DOWN]")
summary.append("- **ADVICE:** Use 'LOOK' or 'SEARCH' to confirm exits before moving, otherwise you might hit a wall.")
for d in all_possible:
d_l = d.lower()
if d_l in blocked:
summary.append(f"- {d.upper()} : 🚫 BLOCKED ({blocked[d_l]})")
elif d_l in known:
target_name = self.graph.get(known[d_l], {}).get("name", "Unknown")
summary.append(f"- {d.upper()} : Leads to {target_name} ✅")
else:
summary.append(f"- {d.upper()} : 🌟 [NEW - NEVER TESTED]")
room_data = self.graph.get(current_id, {})
# Affichage des sorties bloquées
blocked = room_data.get("blocked_exits", {})
if blocked:
summary.append("\n**🚫 BLOCKED / FAILED DIRECTIONS:**")
for d, reason in blocked.items():
summary.append(f"- {d.upper()} : {reason}")
# Affichage des notes (ex: One-way)
for note in room_data.get("notes", []):
summary.append(f"- ⚠️ {note}")
return "\n".join(summary)
if __name__ == "__main__":
import asyncio
asyncio.run(test_agent())